Python parser.unescape方法代码示例

本文整理汇总了Python中html.parser.unescape方法的典型用法代码示例。如果您正苦于以下问题：Python parser.unescape方法的具体用法？Python parser.unescape怎么用？Python parser.unescape使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.parser的用法示例。

在下文中一共展示了parser.unescape方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mtranslate_google

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def mtranslate_google(word):
	import html.parser
	import urllib.request
	import urllib.parse

	agent = {'User-Agent':
	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36"}

	def unescape(text):
		parser = html.parser.HTMLParser()
		return (parser.unescape(text))

	def translate(to_translate, to_language="auto", from_language="auto"):
		base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"

		to_translate = urllib.parse.quote(to_translate)
		link = base_link % (to_language, from_language, to_translate)
		request = urllib.request.Request(link, headers=agent)
		raw_data = urllib.request.urlopen(request).read()

		data = raw_data.decode("utf-8")
		expr = r'class="t0">(.*?)<'
		re_result = re.findall(expr, data)

		if (len(re_result) == 0):
			result = ""
		else:
			result = unescape(re_result[0])
		return (result)

	return [[word, translate(word, lang_to, lang_from)]], ['', '']

# reverso.net

开发者ID:oltodosel，项目名称:interSubs，代码行数:35，代码来源:interSubs.py

示例2: getNews

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def getNews(symbol):
    url = buildNewsUrl(symbol)
 
    content = urlopen(url).read().decode('utf-8')
 
    content_json = demjson.decode(content)
 
    article_json = []
    news_json = content_json['clusters']
    for cluster in news_json:
        for article in cluster:
            if article == 'a':
                article_json.extend(cluster[article])
 
    return [[unescape(art['t']).strip(), art['u']] for art in article_json]

开发者ID:aseylys，项目名称:KStock，代码行数:17，代码来源:gfc.py

示例3: format_text

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def format_text(text):
    return parser.unescape(text).strip()

开发者ID:fitnr，项目名称:twitter_bot_utils，代码行数:4，代码来源:helpers.py

示例4: find_news

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def find_news():  # I'm adventuring with regular expressions for parsing!
    """Finds news for tweeting, along with their links."""

    nyTech = requests.get('https://nytimes.com/section/technology')
    latest = latest_expr.search(nyTech.text)
    news_blocks = news_block_expr.findall(latest.group(1))
    news = []
    for i in range(len(news_blocks)):
        item = (
            news_blocks[i][1].strip() + ' ' + shorten_url(news_blocks[i][0]),
            news_blocks[i][2].strip())  # This is img src.
        if item[1].startswith('Daily Report: '):
            item = item[14:]
        news.append(item)

    '''tv = requests.get('https://theverge.com', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'})
    feed_patt = r'(?s)<div class="c-compact-river">(.*?)<div class="l-col__sidebar"'
    bunches = re.findall(feed_patt, tv.text)
    verge_news = []
    for cluster in bunches:
        snippets = re.findall(r'<h2.*?><a.*>(.*?)</a></h2>', cluster)
        verge_news.extend(snippets)
    for item in verge_news:
        news.append(parser.unescape(item))
    random.shuffle(news) #to bring a feel of randomness'''
    return news

开发者ID:schedutron，项目名称:chirps，代码行数:28，代码来源:useful_twitter.py

示例5: _unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def _unescape(data):
        """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
        try:
            return htmlparser.unescape(data)
        except:
            return data

开发者ID:ibmresilient，项目名称:resilient-community-apps，代码行数:8，代码来源:create_zoom_meeting.py

示例6: clean_html

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    s = BeautifulSoup(unescape(htmlFragment), "html.parser")

    return ' '.join(s.strings)

开发者ID:ibmresilient，项目名称:resilient-community-apps，代码行数:16，代码来源:resilient_common.py

示例7: unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def unescape(data):
    """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
    try:
        return htmlparser.unescape(data)
    except:
        return data

开发者ID:ibmresilient，项目名称:resilient-community-apps，代码行数:8，代码来源:resilient_common.py

示例8: clean_html

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    return BeautifulSoup(unescape(htmlFragment), "html.parser").text

开发者ID:ibmresilient，项目名称:resilient-community-apps，代码行数:14，代码来源:resilient_common.py

示例9: run

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def run(self):
        """
        Checks through the submissions and archives and posts comments.
        """
        if not self._setup:
            raise Exception("Snapshill not ready yet!")

        submissions = self.reddit.front.new(limit=self.limit)

        for submission in submissions:
            debugTime = time.time()
            warned = False

            log.debug("Found submission.\n" + submission.permalink)

            if not should_notify(submission):
                log.debug("Skipping.")
                continue

            archives = [ArchiveContainer(fix_url(submission.url), submission.title)]

            if submission.is_self and submission.selftext_html is not None:
                log.debug("Found text post...")

                links = BeautifulSoup(unescape(submission.selftext_html)).find_all("a")

                finishedURLs = []

                for anchor in links:
                    if time.time() > debugTime + WARN_TIME and not warned:
                        log.warn(
                            "Spent over {} seconds on post (ID: {})".format(
                                WARN_TIME, submission.name
                            )
                        )

                        warned = True

                    log.debug("Found link in text post...")

                    url = fix_url(anchor["href"])

                    if skip_url(url):
                        continue

                    if url in finishedURLs:
                        continue  # skip for sanity

                    archives.append(ArchiveContainer(url, anchor.contents[0]))
                    finishedURLs.append(url)
                    ratelimit(url)

            Notification(
                self.reddit,
                submission,
                self._get_header(submission.subreddit),
                archives,
            ).notify()
            db.commit()

开发者ID:justcool393，项目名称:SnapshillBot，代码行数:61，代码来源:snapshill.py

注：本文中的html.parser.unescape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。