当前位置: 首页>>代码示例>>Python>>正文


Python html.unescape方法代码示例

本文整理汇总了Python中html.unescape方法的典型用法代码示例。如果您正苦于以下问题:Python html.unescape方法的具体用法?Python html.unescape怎么用?Python html.unescape使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html的用法示例。


在下文中一共展示了html.unescape方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: write_voiceactivity

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def write_voiceactivity(path, subs):

    if not path.endswith('.annotation'):
        path += '.annotation'

#    print('writing {}'.format(path))

    count = 0

    with open(path + '~', 'w', encoding='latin-1') as fp:        
        for sub in subs:      
            if not sub:
                continue
            count += 1
            start = convert_timestamp_to_s(sub.format_start())
            end = convert_timestamp_to_s(sub.format_end())  
            cap = html.unescape(sub.get_text().replace('\n', ' ').replace(';', ','))
            if not re.match('\s*\*.*\*\s*', cap) \
                and not re.match('\s*\[.*\]\s*', cap):                
                fp.write('{};{};0;1.0\n'.format(start, end)) 

    with open(path, 'w', encoding='latin-1') as fp:
        fp.write('<?xml version="1.0" ?>\n<annotation ssi-v="3">\n\t<info ftype="ASCII" size="{}" />\n\t<meta role="subtitles" annotator="system" />\n\t<scheme name="voiceactivity" type="DISCRETE" color="#FFDDD9C3">\n\t\t<item name="VOICE" id="0" color="#FF494429" />\n\t</scheme>\n</annotation>\n'.format(count)) 
开发者ID:hcmlab,项目名称:vadnet,代码行数:25,代码来源:annotation.py

示例2: handle_data

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def handle_data(self, raw):
        if raw and not self.ishidden:
            if self.text[-1] == "":
                tmp = raw.lstrip()
            else:
                tmp = raw
            if self.ispref:
                line = unescape(tmp)
            else:
                line = unescape(re.sub(r"\s+", " ", tmp))
            self.text[-1] += line
            if self.ishead:
                self.idhead.add(len(self.text)-1)
            elif self.isbull:
                self.idbull.add(len(self.text)-1)
            elif self.isinde:
                self.idinde.add(len(self.text)-1)
            elif self.ispref:
                self.idpref.add(len(self.text)-1) 
开发者ID:wustho,项目名称:epr,代码行数:21,代码来源:epr.py

示例3: backtracking_id

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def backtracking_id(site):
    cookies = cookies_raw2jar(site['cookies'])
    for _tid in range(site['start_torrent'], site['end_torrent'] + 2):
        t0 = time.time()

        _link = site['torrent_url'].format(_tid)
        torrent_page = requests.get(_link, cookies=cookies, headers=headers)
        title_search = re.search(site['search_ptn'], torrent_page.text)

        if title_search:
            _title = pymysql.escape_string(unescape(title_search.group("title")))
            pubDate = re.search("发布于(.+?)<", torrent_page.text).group(1)
            _timestamp = time.mktime(time.strptime(pubDate, "%Y-%m-%d %H:%M:%S"))

            wrap_insert(site=site['name'], sid=_tid, title=_title, link=_link, pubdate=_timestamp, t=t0)
        else:
            print("ID: {}, Cost: {:.5f} s, No torrent.".format(_tid, time.time() - t0))

        time.sleep(2) 
开发者ID:Rhilip,项目名称:PT-help,代码行数:21,代码来源:backtracking.py

示例4: write_transcription

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def write_transcription(path, subs):
        
    if not path.endswith('.annotation'):
        path += '.annotation'

#    print('writing {}'.format(path))

    count = 0
    
    with open(path + '~', 'w', encoding='latin-1') as fp:        
        for sub in subs:    
            if not sub:
                continue
            count += 1
            start = convert_timestamp_to_s(sub.format_start())
            end = convert_timestamp_to_s(sub.format_end())  
            cap = html.unescape(sub.get_text().replace('\n', ' ').replace(';', ','))
            fp.write('{};{};{};1.0\n'.format(start, end, cap)) 

    with open(path, 'w', encoding='latin-1') as fp:
        fp.write('<?xml version="1.0" ?>\n<annotation ssi-v="3">\n\t<info ftype="ASCII" size="{}" />\n\t<meta role="youtube" annotator="system" />\n\t<scheme name="transcription" type="FREE"/>\n</annotation>\n'.format(count)) 
开发者ID:hcmlab,项目名称:vadnet,代码行数:23,代码来源:annotation.py

示例5: get_labels

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def get_labels(key, fa_subs, kb_subs):
    # split into labels for each part of key
    labels = key.split('\n')
    for i, label in enumerate(labels):
        tree = lxml.html.fragment_fromstring(label, create_parent=True)
        # set key.pic to true and make label url of image
        if tree.xpath('//img[1]/@src'):
            return (tree.xpath('//img[1]/@src'), True)

        # replace icons with unicode characters
        for fa_icon in tree.find_class('fa'):
            fa_class = re.search(r'fa-\S+', fa_icon.get('class'))
            if fa_class and fa_class.group(0) in fa_subs:
                fa_icon.text = chr(int(fa_subs[fa_class.group(0)], 16))
        for kb_icon in tree.find_class('kb'):
            kb_class = re.search(r'kb-\S+', kb_icon.get('class'))
            if kb_class and kb_class.group(0) in kb_subs:
                kb_icon.text = chr(int(kb_subs[kb_class.group(0)], 16))

        # replace breaks with newlines and remove html entities
        for br in tree.xpath('//br'): br.text = '\n'
        labels[i] = html.unescape(tree.text_content())
    return (labels, False) 
开发者ID:CQCumbers,项目名称:kle_render,代码行数:25,代码来源:keyboard.py

示例6: test_views_lti_video_post_error

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def test_views_lti_video_post_error(self, mock_verify, mock_logger):
        """Validate the response returned in case of an LTI exception."""
        role = random.choice(["instructor", "student"])
        data = {"resource_link_id": "123", "roles": role, "context_id": "abc"}
        response = self.client.post("/lti/videos/{!s}".format(uuid.uuid4()), data)
        self.assertEqual(response.status_code, 200)
        self.assertContains(response, "<html>")
        content = response.content.decode("utf-8")

        mock_logger.assert_called_once_with("lti error")

        match = re.search(
            '<div id="marsha-frontend-data" data-context="(.*)">', content
        )

        context = json.loads(unescape(match.group(1)))
        self.assertEqual(context.get("state"), "error")
        self.assertIsNone(context.get("resource"))
        self.assertEqual(context.get("modelName"), "videos") 
开发者ID:openfun,项目名称:marsha,代码行数:21,代码来源:test_views_lti_video.py

示例7: test_views_lti_document_post_error

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def test_views_lti_document_post_error(self, mock_verify, mock_logger):
        """Validate the response returned in case of an LTI exception."""
        role = random.choice(["instructor", "student"])
        data = {"resource_link_id": "123", "roles": role, "context_id": "abc"}
        response = self.client.post("/lti/documents/{!s}".format(uuid.uuid4()), data)
        self.assertEqual(response.status_code, 200)
        self.assertContains(response, "<html>")
        content = response.content.decode("utf-8")

        mock_logger.assert_called_once_with("lti error")

        match = re.search(
            '<div id="marsha-frontend-data" data-context="(.*)">', content
        )

        context = json.loads(html.unescape(match.group(1)))
        self.assertEqual(context.get("state"), "error")
        self.assertIsNone(context.get("resource")) 
开发者ID:openfun,项目名称:marsha,代码行数:20,代码来源:test_views_lti_document.py

示例8: extract_toot

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def extract_toot(toot):
	toot = html.unescape(toot) # convert HTML escape codes to text
	soup = BeautifulSoup(toot, "html.parser")
	for lb in soup.select("br"): # replace <br> with linebreak
		lb.replace_with("\n")

	for p in soup.select("p"): # ditto for <p>
		p.replace_with("\n")

	for ht in soup.select("a.hashtag"): # convert hashtags from links to text
		ht.unwrap()

	for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com
		if 'href' in link:
			# apparently not all a tags have a href, which is understandable if you're doing normal web stuff, but on a social media platform??
			link.replace_with(link["href"])

	text = soup.get_text()
	text = re.sub(r"https://([^/]+)/(@[^\s]+)", r"\2@\1", text) # put mastodon-style mentions back in
	text = re.sub(r"https://([^/]+)/users/([^\s/]+)", r"@\2@\1", text) # put pleroma-style mentions back in
	text = text.rstrip("\n") # remove trailing newline(s)
	return text 
开发者ID:Lynnesbian,项目名称:mstdn-ebooks,代码行数:24,代码来源:functions.py

示例9: unescape

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def unescape(s):
        html_parser = HTMLParser.HTMLParser()
        return html_parser.unescape(s)

# Return addrlist sequence at random, it can help create_connection function 
开发者ID:ForgQi,项目名称:bilibiliupload,代码行数:7,代码来源:compact.py

示例10: _get_links

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def _get_links(self, name: str) -> List[Dict[str, str]]:
        cache = JSONCache(
            'warehouse-simple', urlparse(self.url).hostname, 'links', name,
            ttl=config['cache']['ttl'],
        )
        links = cache.load()
        if links is not None:
            yield from links
            return

        dep_url = posixpath.join(self.url, quote(name)) + '/'
        with requests_session() as session:
            logger.debug('getting dep info from simple repo', extra=dict(url=dep_url))
            response = session.get(dep_url, auth=self.auth)
        if response.status_code == 404:
            raise PackageNotFoundError(package=name, url=dep_url)
        response.raise_for_status()
        document = html5lib.parse(response.text, namespaceHTMLElements=False)

        links = []
        for tag in document.findall('.//a'):
            link = tag.get('href')
            if not link:
                continue
            parsed = urlparse(link)
            if not parsed.path.endswith(ARCHIVE_EXTENSIONS):
                continue

            python = tag.get('data-requires-python')
            fragment = parse_qs(parsed.fragment)
            link = dict(
                url=urljoin(dep_url, link),
                name=parsed.path.strip('/').split('/')[-1],
                python=html.unescape(python) if python else '*',
                digest=fragment['sha256'][0] if 'sha256' in fragment else None,
            )
            links.append(link)
            yield link

        cache.dump(links)
        return links 
开发者ID:dephell,项目名称:dephell,代码行数:43,代码来源:_simple.py

示例11: escape

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def escape(s):
    return html.unescape(s).replace(os.path.sep, '、').replace(':', '_').replace(' ', '_').replace('\t', '').replace('?','.').replace('/','_').replace('\'','_').replace('<','').replace('>','').replace('#','').replace(';','').replace('*','_').replace("\"",'_').replace("\'",'_').replace('|','') 
开发者ID:Trinkle23897,项目名称:learn2018-autodown,代码行数:4,代码来源:learn.py

示例12: sanitize

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def sanitize(s: str) -> str:
    """Try and corral the given string-ish thing into a unicode string. Expects input from files in arbitrary encodings and with bits of HTML in them. Useful for Lim-Dûl and similar."""
    try:
        s = s.encode('latin-1').decode('utf-8')
    except UnicodeDecodeError:
        pass
    return html.unescape(s) 
开发者ID:PennyDreadfulMTG,项目名称:Penny-Dreadful-Tools,代码行数:9,代码来源:text.py

示例13: parse_cardhoarder_prices

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def parse_cardhoarder_prices(s: str) -> PriceListType:
    details = []
    for line in s.splitlines()[2:]: # Skipping date and header line.

        if line.count('\t') != 7:
            raise InvalidDataException('Bad line (cardhoarder): {line}'.format(line=line))
        _mtgo_id, mtgo_set, _mtgjson_set, set_number, name, p, quantity, _mtgjson_uuid = line.split('\t')  # pylint: disable=unused-variable
        name = html.unescape(name.strip())
        if int(quantity) > 0 and not mtgo_set.startswith('CH-') and mtgo_set != 'VAN' and mtgo_set != 'EVENT' and not re.search(r'(Booster|Commander Deck|Commander:|Theme Deck|Draft Pack|Duel Decks|Reward Pack|Intro Pack|Tournament Pack|Premium Deck Series:|From the Vault)', name):
            details.append((name, p, mtgo_set))
    return [(name_lookup(name), html.unescape(p.strip()), mtgo_set) for name, p, mtgo_set in details if name_lookup(name) is not None] 
开发者ID:PennyDreadfulMTG,项目名称:Penny-Dreadful-Tools,代码行数:13,代码来源:parser.py

示例14: to_xml

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def to_xml(cls, doc):
    return html.unescape(tostring(doc).decode("utf8")) 
开发者ID:joeyism,项目名称:py-edgar,代码行数:4,代码来源:txtml.py

示例15: print_rt

# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def print_rt(self, tweet):
        text = html.unescape(tweet.retweeted_status.text)
        fmt = "@{user.screen_name} RT @{rt.user.screen_name}: {text}"
        return fmt.format(user=tweet.user, rt=tweet.retweeted_status,
                          text=text) 
开发者ID:odrling,项目名称:peony-twitter,代码行数:7,代码来源:stream.py


注:本文中的html.unescape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。