当前位置: 首页>>代码示例>>Python>>正文


Python entities.name2codepoint方法代码示例

本文整理汇总了Python中html.entities.name2codepoint方法的典型用法代码示例。如果您正苦于以下问题:Python entities.name2codepoint方法的具体用法?Python entities.name2codepoint怎么用?Python entities.name2codepoint使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.entities的用法示例。


在下文中一共展示了entities.name2codepoint方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: decodeHtmlentities

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0] 
开发者ID:alfa-addon,项目名称:addon,代码行数:23,代码来源:scrapertools.py

示例2: unescape

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
    """Replace XML character references with the referenced characters"""
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) 
开发者ID:gtonkinhill,项目名称:panaroo,代码行数:24,代码来源:isvalid.py

示例3: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        if not self.elementstack:
            return
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        elif ref in self.entities:
            text = self.entities[ref]
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
            try:
                name2codepoint[ref]
            except KeyError:
                text = '&%s;' % ref
            else:
                text = chr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text) 
开发者ID:luoliyan,项目名称:incremental-reading,代码行数:20,代码来源:feedparser.py

示例4: unescape

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
    """Replace XML character references in a string with the referenced
    characters.
    """
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) 
开发者ID:SpaceGroupUCL,项目名称:qgisSpaceSyntaxToolkit,代码行数:26,代码来源:gml.py

示例5: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
        try:
            c = chr(name2codepoint[name])
        except KeyError:
            c = None
        self.output_char(c, '&' + name + ';')
        self.last = "ref" 
开发者ID:frostming,项目名称:marko,代码行数:9,代码来源:normalize.py

示例6: unescape

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
    """
    Removes HTML or XML character references and entities from a text string.

    :param text The HTML (or XML) source text.
    :return The plain text, as a Unicode string, if necessary.
    """

    def fixup(m):
        text = m.group(0)
        code = m.group(1)
        try:
            if text[1] == "#":  # character reference
                if text[2] == "x":
                    return chr(int(code[1:], 16))
                else:
                    return chr(int(code))
            else:  # named entity
                return chr(name2codepoint[code])
        except:
            return text  # leave as is

    return re.sub("&#?(\w+);", fixup, text)


# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->" 
开发者ID:husseinmozannar,项目名称:SOQAL,代码行数:29,代码来源:WikiExtractor.py

示例7: name2cp

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0]) 
开发者ID:schollz,项目名称:extract_recipe,代码行数:10,代码来源:extract_recipe.py

示例8: entity2text

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def entity2text(entitydef):
    """Convert an HTML entity reference into unicode.
    http://stackoverflow.com/a/58125/408556
    """
    if entitydef.startswith('&#x'):
        cp = int(entitydef[3:-1], 16)
    elif entitydef.startswith('&#'):
        cp = int(entitydef[2:-1])
    elif entitydef.startswith('&'):
        cp = name2codepoint[entitydef[1:-1]]
    else:
        logger.debug(entitydef)
        cp = None

    return chr(cp) if cp else entitydef 
开发者ID:nerevu,项目名称:riko,代码行数:17,代码来源:parsers.py

示例9: unescape

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def unescape(text):
    """Removes HTML or XML character references
       and entities from a text string.
       keep &amp;, &gt;, &lt; in the source code.
    from Fredrik Lundh
    http://effbot.org/zone/re-sub.htm#unescape-html
    """

    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16)).encode("utf-8")
                else:
                    return unichr(int(text[2:-1])).encode("utf-8")

            except ValueError:
                logger.error("error de valor")
                pass
        else:
            # named entity
            try:
                if PY3:
                    import html.entities as htmlentitydefs
                else:
                    import htmlentitydefs
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
            except KeyError:
                logger.error("keyerror")
                pass
            except:
                pass
        return text  # leave as is

    return re.sub("&#?\w+;", fixup, str(text))

    # Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8 
开发者ID:alfa-addon,项目名称:addon,代码行数:41,代码来源:scrapertools.py

示例10: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
        if name in entities.name2codepoint:
            self.handle_data(chr(entities.name2codepoint[name])) 
开发者ID:pyglet,项目名称:pyglet,代码行数:5,代码来源:html.py

示例11: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self,name):
         if name in name2codepoint and not self.hide_output:
            c = chr(name2codepoint[name])
            self.__text.append(c) 
开发者ID:bkerler,项目名称:MR,代码行数:6,代码来源:unhtml.py

示例12: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
        if name in name2codepoint and not self.hide_output:
            c = chr(name2codepoint[name])
            self._buf.append(c) 
开发者ID:GiveMeAllYourCats,项目名称:cats-blender-plugin,代码行数:6,代码来源:common.py

示例13: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
        self.add_str_node(chr(name2codepoint[name])) 
开发者ID:python273,项目名称:telegraph,代码行数:4,代码来源:utils.py

示例14: handle_entityref

# 需要导入模块: from html import entities [as 别名]
# 或者: from html.entities import name2codepoint [as 别名]
def handle_entityref(self, name):
        c = chr(name2codepoint[name])
        pass 
开发者ID:codex-team,项目名称:html-slacker,代码行数:5,代码来源:htmlslacker.py


注:本文中的html.entities.name2codepoint方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。