當前位置: 首頁>>代碼示例>>Python>>正文


Python entities.name2codepoint方法代碼示例

本文整理匯總了Python中html.entities.name2codepoint方法的典型用法代碼示例。如果您正苦於以下問題:Python entities.name2codepoint方法的具體用法?Python entities.name2codepoint怎麽用?Python entities.name2codepoint使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在html.entities的用法示例。


在下文中一共展示了entities.name2codepoint方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: decodeHtmlentities

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0] 
開發者ID:alfa-addon,項目名稱:addon,代碼行數:23,代碼來源:scrapertools.py

示例2: unescape

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def unescape(text):
    """Replace XML character references with the referenced characters"""
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) 
開發者ID:gtonkinhill,項目名稱:panaroo,代碼行數:24,代碼來源:isvalid.py

示例3: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        if not self.elementstack:
            return
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        elif ref in self.entities:
            text = self.entities[ref]
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
            try:
                name2codepoint[ref]
            except KeyError:
                text = '&%s;' % ref
            else:
                text = chr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text) 
開發者ID:luoliyan,項目名稱:incremental-reading,代碼行數:20,代碼來源:feedparser.py

示例4: unescape

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def unescape(text):
    """Replace XML character references in a string with the referenced
    characters.
    """
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) 
開發者ID:SpaceGroupUCL,項目名稱:qgisSpaceSyntaxToolkit,代碼行數:26,代碼來源:gml.py

示例5: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, name):
        try:
            c = chr(name2codepoint[name])
        except KeyError:
            c = None
        self.output_char(c, '&' + name + ';')
        self.last = "ref" 
開發者ID:frostming,項目名稱:marko,代碼行數:9,代碼來源:normalize.py

示例6: unescape

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def unescape(text):
    """
    Removes HTML or XML character references and entities from a text string.

    :param text The HTML (or XML) source text.
    :return The plain text, as a Unicode string, if necessary.
    """

    def fixup(m):
        text = m.group(0)
        code = m.group(1)
        try:
            if text[1] == "#":  # character reference
                if text[2] == "x":
                    return chr(int(code[1:], 16))
                else:
                    return chr(int(code))
            else:  # named entity
                return chr(name2codepoint[code])
        except:
            return text  # leave as is

    return re.sub("&#?(\w+);", fixup, text)


# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->" 
開發者ID:husseinmozannar,項目名稱:SOQAL,代碼行數:29,代碼來源:WikiExtractor.py

示例7: name2cp

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0]) 
開發者ID:schollz,項目名稱:extract_recipe,代碼行數:10,代碼來源:extract_recipe.py

示例8: entity2text

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def entity2text(entitydef):
    """Convert an HTML entity reference into unicode.
    http://stackoverflow.com/a/58125/408556
    """
    if entitydef.startswith('&#x'):
        cp = int(entitydef[3:-1], 16)
    elif entitydef.startswith('&#'):
        cp = int(entitydef[2:-1])
    elif entitydef.startswith('&'):
        cp = name2codepoint[entitydef[1:-1]]
    else:
        logger.debug(entitydef)
        cp = None

    return chr(cp) if cp else entitydef 
開發者ID:nerevu,項目名稱:riko,代碼行數:17,代碼來源:parsers.py

示例9: unescape

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def unescape(text):
    """Removes HTML or XML character references
       and entities from a text string.
       keep &amp;, &gt;, &lt; in the source code.
    from Fredrik Lundh
    http://effbot.org/zone/re-sub.htm#unescape-html
    """

    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16)).encode("utf-8")
                else:
                    return unichr(int(text[2:-1])).encode("utf-8")

            except ValueError:
                logger.error("error de valor")
                pass
        else:
            # named entity
            try:
                if PY3:
                    import html.entities as htmlentitydefs
                else:
                    import htmlentitydefs
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
            except KeyError:
                logger.error("keyerror")
                pass
            except:
                pass
        return text  # leave as is

    return re.sub("&#?\w+;", fixup, str(text))

    # Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8 
開發者ID:alfa-addon,項目名稱:addon,代碼行數:41,代碼來源:scrapertools.py

示例10: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, name):
        if name in entities.name2codepoint:
            self.handle_data(chr(entities.name2codepoint[name])) 
開發者ID:pyglet,項目名稱:pyglet,代碼行數:5,代碼來源:html.py

示例11: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self,name):
         if name in name2codepoint and not self.hide_output:
            c = chr(name2codepoint[name])
            self.__text.append(c) 
開發者ID:bkerler,項目名稱:MR,代碼行數:6,代碼來源:unhtml.py

示例12: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, name):
        if name in name2codepoint and not self.hide_output:
            c = chr(name2codepoint[name])
            self._buf.append(c) 
開發者ID:GiveMeAllYourCats,項目名稱:cats-blender-plugin,代碼行數:6,代碼來源:common.py

示例13: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, name):
        self.add_str_node(chr(name2codepoint[name])) 
開發者ID:python273,項目名稱:telegraph,代碼行數:4,代碼來源:utils.py

示例14: handle_entityref

# 需要導入模塊: from html import entities [as 別名]
# 或者: from html.entities import name2codepoint [as 別名]
def handle_entityref(self, name):
        c = chr(name2codepoint[name])
        pass 
開發者ID:codex-team,項目名稱:html-slacker,代碼行數:5,代碼來源:htmlslacker.py


注:本文中的html.entities.name2codepoint方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。