本文整理汇总了Python中html.entities.name2codepoint方法的典型用法代码示例。如果您正苦于以下问题:Python entities.name2codepoint方法的具体用法?Python entities.name2codepoint怎么用?Python entities.name2codepoint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.entities
的用法示例。
在下文中一共展示了entities.name2codepoint方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decodeHtmlentities
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
if PY3:
from html.entities import name2codepoint as n2cp
else:
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例2: unescape
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
"""Replace XML character references with the referenced characters"""
def fixup(m):
text = m.group(0)
if text[1] == '#':
# Character reference
if text[2] == 'x':
code = int(text[3:-1], 16)
else:
code = int(text[2:-1])
else:
# Named entity
try:
code = htmlentitydefs.name2codepoint[text[1:-1]]
except KeyError:
return text # leave unchanged
try:
return chr(code) if code < 256 else unichr(code)
except (ValueError, OverflowError):
return text # leave unchanged
return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)
示例3: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, ref):
# called for each entity reference, e.g. for '©', ref will be 'copy'
if not self.elementstack:
return
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
text = '&%s;' % ref
elif ref in self.entities:
text = self.entities[ref]
if text.startswith('&#') and text.endswith(';'):
return self.handle_entityref(text)
else:
try:
name2codepoint[ref]
except KeyError:
text = '&%s;' % ref
else:
text = chr(name2codepoint[ref]).encode('utf-8')
self.elementstack[-1][2].append(text)
示例4: unescape
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
"""Replace XML character references in a string with the referenced
characters.
"""
def fixup(m):
text = m.group(0)
if text[1] == '#':
# Character reference
if text[2] == 'x':
code = int(text[3:-1], 16)
else:
code = int(text[2:-1])
else:
# Named entity
try:
code = htmlentitydefs.name2codepoint[text[1:-1]]
except KeyError:
return text # leave unchanged
try:
return chr(code) if code < 256 else unichr(code)
except (ValueError, OverflowError):
return text # leave unchanged
return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)
示例5: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
try:
c = chr(name2codepoint[name])
except KeyError:
c = None
self.output_char(c, '&' + name + ';')
self.last = "ref"
示例6: unescape
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
"""
Removes HTML or XML character references and entities from a text string.
:param text The HTML (or XML) source text.
:return The plain text, as a Unicode string, if necessary.
"""
def fixup(m):
text = m.group(0)
code = m.group(1)
try:
if text[1] == "#": # character reference
if text[2] == "x":
return chr(int(code[1:], 16))
else:
return chr(int(code))
else: # named entity
return chr(name2codepoint[code])
except:
return text # leave as is
return re.sub("&#?(\w+);", fixup, text)
# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->"
示例7: name2cp
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def name2cp(k):
if k == 'apos': return ord("'")
if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
return htmlentitydefs.name2codepoint[k]
else:
k = htmlentitydefs.entitydefs[k]
if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
return ord(codecs.latin_1_decode(k)[0])
示例8: entity2text
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def entity2text(entitydef):
"""Convert an HTML entity reference into unicode.
http://stackoverflow.com/a/58125/408556
"""
if entitydef.startswith('&#x'):
cp = int(entitydef[3:-1], 16)
elif entitydef.startswith('&#'):
cp = int(entitydef[2:-1])
elif entitydef.startswith('&'):
cp = name2codepoint[entitydef[1:-1]]
else:
logger.debug(entitydef)
cp = None
return chr(cp) if cp else entitydef
示例9: unescape
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
"""Removes HTML or XML character references
and entities from a text string.
keep &, >, < in the source code.
from Fredrik Lundh
http://effbot.org/zone/re-sub.htm#unescape-html
"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16)).encode("utf-8")
else:
return unichr(int(text[2:-1])).encode("utf-8")
except ValueError:
logger.error("error de valor")
pass
else:
# named entity
try:
if PY3:
import html.entities as htmlentitydefs
else:
import htmlentitydefs
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
except KeyError:
logger.error("keyerror")
pass
except:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, str(text))
# Convierte los codigos html "ñ" y lo reemplaza por "ñ" caracter unicode utf-8
示例10: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
if name in entities.name2codepoint:
self.handle_data(chr(entities.name2codepoint[name]))
示例11: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self,name):
if name in name2codepoint and not self.hide_output:
c = chr(name2codepoint[name])
self.__text.append(c)
示例12: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
if name in name2codepoint and not self.hide_output:
c = chr(name2codepoint[name])
self._buf.append(c)
示例13: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
self.add_str_node(chr(name2codepoint[name]))
示例14: handle_entityref
# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
c = chr(name2codepoint[name])
pass