本文整理汇总了Python中html.entities.html5方法的典型用法代码示例。如果您正苦于以下问题:Python entities.html5方法的具体用法?Python entities.html5怎么用?Python entities.html5使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.entities
的用法示例。
在下文中一共展示了entities.html5方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_html_declaration
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def parse_html_declaration(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<!', ('unexpected call to '
'parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+3] == '<![':
return self.parse_marked_section(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', i+9)
if gtpos == -1:
return -1
self.handle_decl(rawdata[i+2:gtpos])
return gtpos+1
else:
return self.parse_bogus_comment(i)
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
示例2: _replace_charref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def _replace_charref(s):
s = s.group(1)
if s[0] == '#':
# numeric charref
if s[1] in 'xX':
num = int(s[2:].rstrip(';'), 16)
else:
num = int(s[1:].rstrip(';'))
if num in _invalid_charrefs:
return _invalid_charrefs[num]
if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
return '\uFFFD'
if num in _invalid_codepoints:
return ''
return chr(num)
else:
# named charref
if s in _html5:
return _html5[s]
# find the longest matching name (as defined by the standard)
for x in range(len(s)-1, 1, -1):
if s[:x] in _html5:
return _html5[s[:x]] + s[x:]
else:
return '&' + s
示例3: write_items
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def write_items(entities, file=sys.stdout):
"""Write the items of the dictionary in the specified file."""
# The keys in the generated dictionary should be sorted
# in a case-insensitive way, however, when two keys are equal,
# the uppercase version should come first so that the result
# looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
# To do this we first sort in a case-sensitive way (so all the
# uppercase chars come first) and then sort with key=str.lower.
# Since the sorting is stable the uppercase keys will eventually
# be before their equivalent lowercase version.
keys = sorted(entities.keys())
keys = sorted(keys, key=str.lower)
print('html5 = {', file=file)
for name in keys:
print(' {!r}: {!a},'.format(name, entities[name]), file=file)
print('}', file=file)
示例4: _replace_charref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def _replace_charref(s):
s = s.group(1)
if s[0] == '#':
# numeric charref
if s[1] in 'xX':
num = int(s[2:].rstrip(';'), 16)
else:
num = int(s[1:].rstrip(';'))
if num in _invalid_charrefs:
return _invalid_charrefs[num]
if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
return '\uFFFD'
if num in _invalid_codepoints:
return ''
return unichr(num)
else:
# named charref
if s in _html5:
return _html5[s]
# find the longest matching name (as defined by the standard)
for x in range(len(s)-1, 1, -1):
if s[:x] in _html5:
return _html5[s[:x]] + s[x:]
else:
return '&' + s
示例5: unescape
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def unescape(self, s):
if '&' not in s:
return s
def replaceEntities(s):
s = s.groups()[0]
try:
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:].rstrip(';'), 16)
else:
c = int(s.rstrip(';'))
return chr(c)
except ValueError:
return '&#' + s
else:
from html.entities import html5
if s in html5:
return html5[s]
elif s.endswith(';'):
return '&' + s
for x in range(2, len(s)):
if s[:x] in html5:
return html5[s[:x]] + s[x:]
else:
return '&' + s
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
replaceEntities, s, flags=re.ASCII)
示例6: create_dict
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def create_dict(entities):
"""Create the html5 dict from the decoded json object."""
new_html5 = {}
for name, value in entities.items():
new_html5[name.lstrip('&')] = value['characters']
return new_html5
示例7: _mapEntity
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def _mapEntity(m):
name = _extract_entity_name(m)
if name.startswith('#'):
return _sharp2uni(name)
try:
return _entities[name]
except KeyError:
return '&' + name
示例8: parse_endtag
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import html5 [as 别名]
def parse_endtag(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
match = endendtag.search(rawdata, i+1) # >
if not match:
return -1
gtpos = match.end()
match = endtagfind.match(rawdata, i) # </ + tag + >
if not match:
if self.cdata_elem is not None:
self.handle_data(rawdata[i:gtpos])
return gtpos
if self.strict:
self.error("bad end tag: %r" % (rawdata[i:gtpos],))
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
namematch = tagfind_tolerant.match(rawdata, i+2)
if not namematch:
# w3.org/TR/html5/tokenization.html#end-tag-open-state
if rawdata[i:i+3] == '</>':
return i+3
else:
return self.parse_bogus_comment(i)
tagname = namematch.group().lower()
# consume and ignore other stuff between the name and the >
# Note: this is not 100% correct, since we might have things like
# </tag attr=">">, but looking for > after tha name should cover
# most of the cases and is much simpler
gtpos = rawdata.find('>', namematch.end())
self.handle_endtag(tagname)
return gtpos+1
elem = match.group(1).lower() # script or style
if self.cdata_elem is not None:
if elem != self.cdata_elem:
self.handle_data(rawdata[i:gtpos])
return gtpos
self.handle_endtag(elem.lower())
self.clear_cdata_mode()
return gtpos
# Overridable -- finish processing of start+end tag: <tag.../>