当前位置: 首页>>代码示例>>Python>>正文


Python parser.unescape方法代码示例

本文整理汇总了Python中html.parser.unescape方法的典型用法代码示例。如果您正苦于以下问题:Python parser.unescape方法的具体用法?Python parser.unescape怎么用?Python parser.unescape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.parser的用法示例。


在下文中一共展示了parser.unescape方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mtranslate_google

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def mtranslate_google(word):
	import html.parser
	import urllib.request
	import urllib.parse

	agent = {'User-Agent':
	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36"}

	def unescape(text):
		parser = html.parser.HTMLParser()
		return (parser.unescape(text))

	def translate(to_translate, to_language="auto", from_language="auto"):
		base_link = "http://translate.google.com/m?hl=%s&sl=%s&q=%s"

		to_translate = urllib.parse.quote(to_translate)
		link = base_link % (to_language, from_language, to_translate)
		request = urllib.request.Request(link, headers=agent)
		raw_data = urllib.request.urlopen(request).read()

		data = raw_data.decode("utf-8")
		expr = r'class="t0">(.*?)<'
		re_result = re.findall(expr, data)

		if (len(re_result) == 0):
			result = ""
		else:
			result = unescape(re_result[0])
		return (result)

	return [[word, translate(word, lang_to, lang_from)]], ['', '']

# reverso.net 
开发者ID:oltodosel,项目名称:interSubs,代码行数:35,代码来源:interSubs.py

示例2: getNews

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def getNews(symbol):
    url = buildNewsUrl(symbol)
 
    content = urlopen(url).read().decode('utf-8')
 
    content_json = demjson.decode(content)
 
    article_json = []
    news_json = content_json['clusters']
    for cluster in news_json:
        for article in cluster:
            if article == 'a':
                article_json.extend(cluster[article])
 
    return [[unescape(art['t']).strip(), art['u']] for art in article_json] 
开发者ID:aseylys,项目名称:KStock,代码行数:17,代码来源:gfc.py

示例3: format_text

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def format_text(text):
    return parser.unescape(text).strip() 
开发者ID:fitnr,项目名称:twitter_bot_utils,代码行数:4,代码来源:helpers.py

示例4: find_news

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def find_news():  # I'm adventuring with regular expressions for parsing!
    """Finds news for tweeting, along with their links."""

    nyTech = requests.get('https://nytimes.com/section/technology')
    latest = latest_expr.search(nyTech.text)
    news_blocks = news_block_expr.findall(latest.group(1))
    news = []
    for i in range(len(news_blocks)):
        item = (
            news_blocks[i][1].strip() + ' ' + shorten_url(news_blocks[i][0]),
            news_blocks[i][2].strip())  # This is img src.
        if item[1].startswith('Daily Report: '):
            item = item[14:]
        news.append(item)

    '''tv = requests.get('https://theverge.com', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'})
    feed_patt = r'(?s)<div class="c-compact-river">(.*?)<div class="l-col__sidebar"'
    bunches = re.findall(feed_patt, tv.text)
    verge_news = []
    for cluster in bunches:
        snippets = re.findall(r'<h2.*?><a.*>(.*?)</a></h2>', cluster)
        verge_news.extend(snippets)
    for item in verge_news:
        news.append(parser.unescape(item))
    random.shuffle(news) #to bring a feel of randomness'''
    return news 
开发者ID:schedutron,项目名称:chirps,代码行数:28,代码来源:useful_twitter.py

示例5: _unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def _unescape(data):
        """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
        try:
            return htmlparser.unescape(data)
        except:
            return data 
开发者ID:ibmresilient,项目名称:resilient-community-apps,代码行数:8,代码来源:create_zoom_meeting.py

示例6: clean_html

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    s = BeautifulSoup(unescape(htmlFragment), "html.parser")

    return ' '.join(s.strings) 
开发者ID:ibmresilient,项目名称:resilient-community-apps,代码行数:16,代码来源:resilient_common.py

示例7: unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def unescape(data):
    """ Return unescaped data such as &gt; -> >, &quot -> ', etc. """
    try:
        return htmlparser.unescape(data)
    except:
        return data 
开发者ID:ibmresilient,项目名称:resilient-community-apps,代码行数:8,代码来源:resilient_common.py

示例8: clean_html

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def clean_html(htmlFragment):
    """
    Resilient textarea fields return html fragments. This routine will remove the html and insert any code within <div></div>
    with a linefeed
    :param htmlFragment:
    :return: cleaned up code
    """

    if not htmlFragment or not isinstance(htmlFragment, string_types):
        return htmlFragment

    return BeautifulSoup(unescape(htmlFragment), "html.parser").text 
开发者ID:ibmresilient,项目名称:resilient-community-apps,代码行数:14,代码来源:resilient_common.py

示例9: run

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import unescape [as 别名]
def run(self):
        """
        Checks through the submissions and archives and posts comments.
        """
        if not self._setup:
            raise Exception("Snapshill not ready yet!")

        submissions = self.reddit.front.new(limit=self.limit)

        for submission in submissions:
            debugTime = time.time()
            warned = False

            log.debug("Found submission.\n" + submission.permalink)

            if not should_notify(submission):
                log.debug("Skipping.")
                continue

            archives = [ArchiveContainer(fix_url(submission.url), submission.title)]

            if submission.is_self and submission.selftext_html is not None:
                log.debug("Found text post...")

                links = BeautifulSoup(unescape(submission.selftext_html)).find_all("a")

                finishedURLs = []

                for anchor in links:
                    if time.time() > debugTime + WARN_TIME and not warned:
                        log.warn(
                            "Spent over {} seconds on post (ID: {})".format(
                                WARN_TIME, submission.name
                            )
                        )

                        warned = True

                    log.debug("Found link in text post...")

                    url = fix_url(anchor["href"])

                    if skip_url(url):
                        continue

                    if url in finishedURLs:
                        continue  # skip for sanity

                    archives.append(ArchiveContainer(url, anchor.contents[0]))
                    finishedURLs.append(url)
                    ratelimit(url)

            Notification(
                self.reddit,
                submission,
                self._get_header(submission.subreddit),
                archives,
            ).notify()
            db.commit() 
开发者ID:justcool393,项目名称:SnapshillBot,代码行数:61,代码来源:snapshill.py


注:本文中的html.parser.unescape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。