本文整理汇总了Python中html.unescape方法的典型用法代码示例。如果您正苦于以下问题:Python html.unescape方法的具体用法?Python html.unescape怎么用?Python html.unescape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html
的用法示例。
在下文中一共展示了html.unescape方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: write_voiceactivity
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def write_voiceactivity(path, subs):
if not path.endswith('.annotation'):
path += '.annotation'
# print('writing {}'.format(path))
count = 0
with open(path + '~', 'w', encoding='latin-1') as fp:
for sub in subs:
if not sub:
continue
count += 1
start = convert_timestamp_to_s(sub.format_start())
end = convert_timestamp_to_s(sub.format_end())
cap = html.unescape(sub.get_text().replace('\n', ' ').replace(';', ','))
if not re.match('\s*\*.*\*\s*', cap) \
and not re.match('\s*\[.*\]\s*', cap):
fp.write('{};{};0;1.0\n'.format(start, end))
with open(path, 'w', encoding='latin-1') as fp:
fp.write('<?xml version="1.0" ?>\n<annotation ssi-v="3">\n\t<info ftype="ASCII" size="{}" />\n\t<meta role="subtitles" annotator="system" />\n\t<scheme name="voiceactivity" type="DISCRETE" color="#FFDDD9C3">\n\t\t<item name="VOICE" id="0" color="#FF494429" />\n\t</scheme>\n</annotation>\n'.format(count))
示例2: handle_data
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def handle_data(self, raw):
if raw and not self.ishidden:
if self.text[-1] == "":
tmp = raw.lstrip()
else:
tmp = raw
if self.ispref:
line = unescape(tmp)
else:
line = unescape(re.sub(r"\s+", " ", tmp))
self.text[-1] += line
if self.ishead:
self.idhead.add(len(self.text)-1)
elif self.isbull:
self.idbull.add(len(self.text)-1)
elif self.isinde:
self.idinde.add(len(self.text)-1)
elif self.ispref:
self.idpref.add(len(self.text)-1)
示例3: backtracking_id
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def backtracking_id(site):
cookies = cookies_raw2jar(site['cookies'])
for _tid in range(site['start_torrent'], site['end_torrent'] + 2):
t0 = time.time()
_link = site['torrent_url'].format(_tid)
torrent_page = requests.get(_link, cookies=cookies, headers=headers)
title_search = re.search(site['search_ptn'], torrent_page.text)
if title_search:
_title = pymysql.escape_string(unescape(title_search.group("title")))
pubDate = re.search("发布于(.+?)<", torrent_page.text).group(1)
_timestamp = time.mktime(time.strptime(pubDate, "%Y-%m-%d %H:%M:%S"))
wrap_insert(site=site['name'], sid=_tid, title=_title, link=_link, pubdate=_timestamp, t=t0)
else:
print("ID: {}, Cost: {:.5f} s, No torrent.".format(_tid, time.time() - t0))
time.sleep(2)
示例4: write_transcription
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def write_transcription(path, subs):
if not path.endswith('.annotation'):
path += '.annotation'
# print('writing {}'.format(path))
count = 0
with open(path + '~', 'w', encoding='latin-1') as fp:
for sub in subs:
if not sub:
continue
count += 1
start = convert_timestamp_to_s(sub.format_start())
end = convert_timestamp_to_s(sub.format_end())
cap = html.unescape(sub.get_text().replace('\n', ' ').replace(';', ','))
fp.write('{};{};{};1.0\n'.format(start, end, cap))
with open(path, 'w', encoding='latin-1') as fp:
fp.write('<?xml version="1.0" ?>\n<annotation ssi-v="3">\n\t<info ftype="ASCII" size="{}" />\n\t<meta role="youtube" annotator="system" />\n\t<scheme name="transcription" type="FREE"/>\n</annotation>\n'.format(count))
示例5: get_labels
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def get_labels(key, fa_subs, kb_subs):
# split into labels for each part of key
labels = key.split('\n')
for i, label in enumerate(labels):
tree = lxml.html.fragment_fromstring(label, create_parent=True)
# set key.pic to true and make label url of image
if tree.xpath('//img[1]/@src'):
return (tree.xpath('//img[1]/@src'), True)
# replace icons with unicode characters
for fa_icon in tree.find_class('fa'):
fa_class = re.search(r'fa-\S+', fa_icon.get('class'))
if fa_class and fa_class.group(0) in fa_subs:
fa_icon.text = chr(int(fa_subs[fa_class.group(0)], 16))
for kb_icon in tree.find_class('kb'):
kb_class = re.search(r'kb-\S+', kb_icon.get('class'))
if kb_class and kb_class.group(0) in kb_subs:
kb_icon.text = chr(int(kb_subs[kb_class.group(0)], 16))
# replace breaks with newlines and remove html entities
for br in tree.xpath('//br'): br.text = '\n'
labels[i] = html.unescape(tree.text_content())
return (labels, False)
示例6: test_views_lti_video_post_error
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def test_views_lti_video_post_error(self, mock_verify, mock_logger):
"""Validate the response returned in case of an LTI exception."""
role = random.choice(["instructor", "student"])
data = {"resource_link_id": "123", "roles": role, "context_id": "abc"}
response = self.client.post("/lti/videos/{!s}".format(uuid.uuid4()), data)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "<html>")
content = response.content.decode("utf-8")
mock_logger.assert_called_once_with("lti error")
match = re.search(
'<div id="marsha-frontend-data" data-context="(.*)">', content
)
context = json.loads(unescape(match.group(1)))
self.assertEqual(context.get("state"), "error")
self.assertIsNone(context.get("resource"))
self.assertEqual(context.get("modelName"), "videos")
示例7: test_views_lti_document_post_error
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def test_views_lti_document_post_error(self, mock_verify, mock_logger):
"""Validate the response returned in case of an LTI exception."""
role = random.choice(["instructor", "student"])
data = {"resource_link_id": "123", "roles": role, "context_id": "abc"}
response = self.client.post("/lti/documents/{!s}".format(uuid.uuid4()), data)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "<html>")
content = response.content.decode("utf-8")
mock_logger.assert_called_once_with("lti error")
match = re.search(
'<div id="marsha-frontend-data" data-context="(.*)">', content
)
context = json.loads(html.unescape(match.group(1)))
self.assertEqual(context.get("state"), "error")
self.assertIsNone(context.get("resource"))
示例8: extract_toot
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def extract_toot(toot):
toot = html.unescape(toot) # convert HTML escape codes to text
soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"): # replace <br> with linebreak
lb.replace_with("\n")
for p in soup.select("p"): # ditto for <p>
p.replace_with("\n")
for ht in soup.select("a.hashtag"): # convert hashtags from links to text
ht.unwrap()
for link in soup.select("a"): #ocnvert <a href='https://example.com>example.com</a> to just https://example.com
if 'href' in link:
# apparently not all a tags have a href, which is understandable if you're doing normal web stuff, but on a social media platform??
link.replace_with(link["href"])
text = soup.get_text()
text = re.sub(r"https://([^/]+)/(@[^\s]+)", r"\2@\1", text) # put mastodon-style mentions back in
text = re.sub(r"https://([^/]+)/users/([^\s/]+)", r"@\2@\1", text) # put pleroma-style mentions back in
text = text.rstrip("\n") # remove trailing newline(s)
return text
示例9: unescape
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def unescape(s):
html_parser = HTMLParser.HTMLParser()
return html_parser.unescape(s)
# Return addrlist sequence at random, it can help create_connection function
示例10: _get_links
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def _get_links(self, name: str) -> List[Dict[str, str]]:
cache = JSONCache(
'warehouse-simple', urlparse(self.url).hostname, 'links', name,
ttl=config['cache']['ttl'],
)
links = cache.load()
if links is not None:
yield from links
return
dep_url = posixpath.join(self.url, quote(name)) + '/'
with requests_session() as session:
logger.debug('getting dep info from simple repo', extra=dict(url=dep_url))
response = session.get(dep_url, auth=self.auth)
if response.status_code == 404:
raise PackageNotFoundError(package=name, url=dep_url)
response.raise_for_status()
document = html5lib.parse(response.text, namespaceHTMLElements=False)
links = []
for tag in document.findall('.//a'):
link = tag.get('href')
if not link:
continue
parsed = urlparse(link)
if not parsed.path.endswith(ARCHIVE_EXTENSIONS):
continue
python = tag.get('data-requires-python')
fragment = parse_qs(parsed.fragment)
link = dict(
url=urljoin(dep_url, link),
name=parsed.path.strip('/').split('/')[-1],
python=html.unescape(python) if python else '*',
digest=fragment['sha256'][0] if 'sha256' in fragment else None,
)
links.append(link)
yield link
cache.dump(links)
return links
示例11: escape
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def escape(s):
return html.unescape(s).replace(os.path.sep, '、').replace(':', '_').replace(' ', '_').replace('\t', '').replace('?','.').replace('/','_').replace('\'','_').replace('<','').replace('>','').replace('#','').replace(';','').replace('*','_').replace("\"",'_').replace("\'",'_').replace('|','')
示例12: sanitize
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def sanitize(s: str) -> str:
"""Try and corral the given string-ish thing into a unicode string. Expects input from files in arbitrary encodings and with bits of HTML in them. Useful for Lim-Dûl and similar."""
try:
s = s.encode('latin-1').decode('utf-8')
except UnicodeDecodeError:
pass
return html.unescape(s)
示例13: parse_cardhoarder_prices
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def parse_cardhoarder_prices(s: str) -> PriceListType:
details = []
for line in s.splitlines()[2:]: # Skipping date and header line.
if line.count('\t') != 7:
raise InvalidDataException('Bad line (cardhoarder): {line}'.format(line=line))
_mtgo_id, mtgo_set, _mtgjson_set, set_number, name, p, quantity, _mtgjson_uuid = line.split('\t') # pylint: disable=unused-variable
name = html.unescape(name.strip())
if int(quantity) > 0 and not mtgo_set.startswith('CH-') and mtgo_set != 'VAN' and mtgo_set != 'EVENT' and not re.search(r'(Booster|Commander Deck|Commander:|Theme Deck|Draft Pack|Duel Decks|Reward Pack|Intro Pack|Tournament Pack|Premium Deck Series:|From the Vault)', name):
details.append((name, p, mtgo_set))
return [(name_lookup(name), html.unescape(p.strip()), mtgo_set) for name, p, mtgo_set in details if name_lookup(name) is not None]
示例14: to_xml
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def to_xml(cls, doc):
return html.unescape(tostring(doc).decode("utf8"))
示例15: print_rt
# 需要导入模块: import html [as 别名]
# 或者: from html import unescape [as 别名]
def print_rt(self, tweet):
text = html.unescape(tweet.retweeted_status.text)
fmt = "@{user.screen_name} RT @{rt.user.screen_name}: {text}"
return fmt.format(user=tweet.user, rt=tweet.retweeted_status,
text=text)