本文整理汇总了Python中htmlentitydefs.name2codepoint方法的典型用法代码示例。如果您正苦于以下问题:Python htmlentitydefs.name2codepoint方法的具体用法?Python htmlentitydefs.name2codepoint怎么用?Python htmlentitydefs.name2codepoint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类htmlentitydefs
的用法示例。
在下文中一共展示了htmlentitydefs.name2codepoint方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: handle_entityref
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, name):
if self.in_par:
c = ''
if name == 'star':
c = u'*'
elif name == 'bquot':
c = u'"'
elif name == 'equot':
c = u'"'
elif name == 'lowbar':
c = u'_'
elif name == 'parole.tax':
c = u''
else:
if name in name2codepoint:
c = unichr(name2codepoint[name])
else:
logging.warning("unknown entityref: %s" % name)
c = ''
# print "Named ent:", c
self.buf += c
示例2: _convertEntities
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def _convertEntities(self, match):
"""Used in a call to re.sub to replace HTML, XML, and numeric
entities with the appropriate Unicode characters. If HTML
entities are being converted, any unrecognized entities are
escaped."""
x = match.group(1)
if self.convertHTMLEntities and x in name2codepoint:
return unichr(name2codepoint[x])
elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
if self.convertXMLEntities:
return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
else:
return u'&%s;' % x
elif len(x) > 0 and x[0] == '#':
# Handle numeric entities
if len(x) > 1 and x[1] == 'x':
return unichr(int(x[2:], 16))
else:
return unichr(int(x[1:]))
elif self.escapeUnrecognizedEntities:
return u'&%s;' % x
else:
return u'&%s;' % x
示例3: unescape
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
def fixup(m):
text = m.group(0)
code = m.group(1)
try:
if text[1] == "#": # character reference
if text[2] == "x":
return unichr(int(code[1:], 16))
else:
return unichr(int(code))
else: # named entity
return unichr(name2codepoint[code])
except:
return text # leave as is
return re.sub("&#?(\w+);", fixup, text)
# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->"
示例4: handle_entityref
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
# called for each entity reference, e.g. for '©', ref will be 'copy'
if not self.elementstack:
return
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
text = '&%s;' % ref
elif ref in self.entities:
text = self.entities[ref]
if text.startswith('&#') and text.endswith(';'):
return self.handle_entityref(text)
else:
try:
name2codepoint[ref]
except KeyError:
text = '&%s;' % ref
else:
text = unichr(name2codepoint[ref]).encode('utf-8')
self.elementstack[-1][2].append(text)
示例5: replace_entity
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def replace_entity(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
示例6: decodeHtmlentities
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例7: get_entitydefs
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def get_entitydefs():
import htmlentitydefs
from codecs import latin_1_decode
entitydefs = {}
try:
htmlentitydefs.name2codepoint
except AttributeError:
entitydefs = {}
for name, char in htmlentitydefs.entitydefs.items():
uc = latin_1_decode(char)[0]
if uc.startswith("&#") and uc.endswith(";"):
uc = unescape_charref(uc[2:-1], None)
entitydefs["&%s;" % name] = uc
else:
for name, codepoint in htmlentitydefs.name2codepoint.items():
entitydefs["&%s;" % name] = unichr(codepoint)
return entitydefs
示例8: decodeHtmlentities
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
if PY3:
from html.entities import name2codepoint as n2cp
else:
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例9: handle_entityref
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
# called for each entity reference, e.g. for '©', ref will be 'copy'
if not self.elementstack: return
if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
text = '&%s;' % ref
else:
# entity resolution graciously donated by Aaron Swartz
def name2cp(k):
import htmlentitydefs
if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
return htmlentitydefs.name2codepoint[k]
k = htmlentitydefs.entitydefs[k]
if k.startswith('&#') and k.endswith(';'):
return int(k[2:-1]) # not in latin-1
return ord(k)
try: name2cp(ref)
except KeyError: text = '&%s;' % ref
else: text = unichr(name2cp(ref)).encode('utf-8')
self.elementstack[-1][2].append(text)
示例10: unescape
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
"""
Removes HTML or XML character references and entities from a text string.
"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
示例11: decode_entities
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decode_entities(x):
if x.group(1).startswith('#'):
char = x.group(1)[1:]
if char.startswith('x'):
try:
return unichr(int(x.group(1)[2:], 16))
except:
pass
try:
return unichr(int(x.group(1)[1:]))
except:
pass
try:
return unichr(name2codepoint[x.group(1)])
except:
return x.group(1)
示例12: unescape
# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
"""Replace XML character references with the referenced characters"""
def fixup(m):
text = m.group(0)
if text[1] == '#':
# Character reference
if text[2] == 'x':
code = int(text[3:-1], 16)
else:
code = int(text[2:-1])
else:
# Named entity
try:
code = htmlentitydefs.name2codepoint[text[1:-1]]
except KeyError:
return text # leave unchanged
try:
return chr(code) if code < 256 else unichr(code)
except (ValueError, OverflowError):
return text # leave unchanged
return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)