當前位置: 首頁>>代碼示例>>Python>>正文


Python parser.unescape方法代碼示例

本文整理匯總了Python中html.parser.unescape方法的典型用法代碼示例。如果您正苦於以下問題:Python parser.unescape方法的具體用法?Python parser.unescape怎麽用?Python parser.unescape使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在html.parser的用法示例。


在下文中一共展示了parser.unescape方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: mtranslate_google

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def mtranslate_google(word):
	import html.parser
	import urllib.request
	import urllib.parse

	agent = {'User-Agent':
	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36"}

	def unescape(text):
		parser = html.parser.HTMLParser()
		return (parser.unescape(text))

	def translate(to_translate, to_language="auto", from_language="auto"):
		base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"

		to_translate = urllib.parse.quote(to_translate)
		link = base_link % (to_language, from_language, to_translate)
		request = urllib.request.Request(link, headers=agent)
		raw_data = urllib.request.urlopen(request).read()

		data = raw_data.decode("utf-8")
		expr = r'class="t0">(.*?)<'
		re_result = re.findall(expr, data)

		if (len(re_result) == 0):
			result = ""
		else:
			result = unescape(re_result[0])
		return (result)

	return [[word, translate(word, lang_to, lang_from)]], ['', '']

# reverso.net 
開發者ID:oltodosel,項目名稱:interSubs,代碼行數:35,代碼來源:interSubs.py

示例2: getNews

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def getNews(symbol):
    url = buildNewsUrl(symbol)
 
    content = urlopen(url).read().decode('utf-8')
 
    content_json = demjson.decode(content)
 
    article_json = []
    news_json = content_json['clusters']
    for cluster in news_json:
        for article in cluster:
            if article == 'a':
                article_json.extend(cluster[article])
 
    return [[unescape(art['t']).strip(), art['u']] for art in article_json] 
開發者ID:aseylys,項目名稱:KStock,代碼行數:17,代碼來源:gfc.py

示例3: format_text

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def format_text(text):
    return parser.unescape(text).strip() 
開發者ID:fitnr,項目名稱:twitter_bot_utils,代碼行數:4,代碼來源:helpers.py

示例4: find_news

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def find_news():  # I'm adventuring with regular expressions for parsing!
    """Finds news for tweeting, along with their links."""

    nyTech = requests.get('https://nytimes.com/section/technology')
    latest = latest_expr.search(nyTech.text)
    news_blocks = news_block_expr.findall(latest.group(1))
    news = []
    for i in range(len(news_blocks)):
        item = (
            news_blocks[i][1].strip() + ' ' + shorten_url(news_blocks[i][0]),
            news_blocks[i][2].strip())  # This is img src.
        if item[1].startswith('Daily Report: '):
            item = item[14:]
        news.append(item)

    '''tv = requests.get('https://theverge.com', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'})
    feed_patt = r'(?s)<div class="c-compact-river">(.*?)<div class="l-col__sidebar"'
    bunches = re.findall(feed_patt, tv.text)
    verge_news = []
    for cluster in bunches:
        snippets = re.findall(r'<h2.*?><a.*>(.*?)</a></h2>', cluster)
        verge_news.extend(snippets)
    for item in verge_news:
        news.append(parser.unescape(item))
    random.shuffle(news) #to bring a feel of randomness'''
    return news 
開發者ID:schedutron,項目名稱:chirps,代碼行數:28,代碼來源:useful_twitter.py

示例5: _unescape

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def _unescape(data):
        """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
        try:
            return htmlparser.unescape(data)
        except:
            return data 
開發者ID:ibmresilient,項目名稱:resilient-community-apps,代碼行數:8,代碼來源:create_zoom_meeting.py

示例6: clean_html

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    s = BeautifulSoup(unescape(htmlFragment), "html.parser")

    return ' '.join(s.strings) 
開發者ID:ibmresilient,項目名稱:resilient-community-apps,代碼行數:16,代碼來源:resilient_common.py

示例7: unescape

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def unescape(data):
    """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
    try:
        return htmlparser.unescape(data)
    except:
        return data 
開發者ID:ibmresilient,項目名稱:resilient-community-apps,代碼行數:8,代碼來源:resilient_common.py

示例8: clean_html

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    return BeautifulSoup(unescape(htmlFragment), "html.parser").text 
開發者ID:ibmresilient,項目名稱:resilient-community-apps,代碼行數:14,代碼來源:resilient_common.py

示例9: run

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import unescape [as 別名]
def run(self):
        """
        Checks through the submissions and archives and posts comments.
        """
        if not self._setup:
            raise Exception("Snapshill not ready yet!")

        submissions = self.reddit.front.new(limit=self.limit)

        for submission in submissions:
            debugTime = time.time()
            warned = False

            log.debug("Found submission.\n" + submission.permalink)

            if not should_notify(submission):
                log.debug("Skipping.")
                continue

            archives = [ArchiveContainer(fix_url(submission.url), submission.title)]

            if submission.is_self and submission.selftext_html is not None:
                log.debug("Found text post...")

                links = BeautifulSoup(unescape(submission.selftext_html)).find_all("a")

                finishedURLs = []

                for anchor in links:
                    if time.time() > debugTime + WARN_TIME and not warned:
                        log.warn(
                            "Spent over {} seconds on post (ID: {})".format(
                                WARN_TIME, submission.name
                            )
                        )

                        warned = True

                    log.debug("Found link in text post...")

                    url = fix_url(anchor["href"])

                    if skip_url(url):
                        continue

                    if url in finishedURLs:
                        continue  # skip for sanity

                    archives.append(ArchiveContainer(url, anchor.contents[0]))
                    finishedURLs.append(url)
                    ratelimit(url)

            Notification(
                self.reddit,
                submission,
                self._get_header(submission.subreddit),
                archives,
            ).notify()
            db.commit() 
開發者ID:justcool393,項目名稱:SnapshillBot,代碼行數:61,代碼來源:snapshill.py


注:本文中的html.parser.unescape方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。