Python htmlentitydefs.name2codepoint方法代码示例

本文整理汇总了Python中htmlentitydefs.name2codepoint方法的典型用法代码示例。如果您正苦于以下问题：Python htmlentitydefs.name2codepoint方法的具体用法？Python htmlentitydefs.name2codepoint怎么用？Python htmlentitydefs.name2codepoint使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类htmlentitydefs的用法示例。

在下文中一共展示了htmlentitydefs.name2codepoint方法的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, name):
        if self.in_par:
            c = ''
            if name == 'star':
                c = u'*'
            elif name == 'bquot':
                c = u'"'
            elif name == 'equot':
                c = u'"'
            elif name == 'lowbar':
                c = u'_'
            elif name == 'parole.tax':
                c = u''
            else:
                if name in name2codepoint:
                    c = unichr(name2codepoint[name])
                else:
                    logging.warning("unknown entityref: %s" % name)
                    c = ''
            # print "Named ent:", c
            self.buf += c

开发者ID:gooofy，项目名称:zamia-speech，代码行数:23，代码来源:parole.py

示例2: _convertEntities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def _convertEntities(self, match):
        """Used in a call to re.sub to replace HTML, XML, and numeric
        entities with the appropriate Unicode characters. If HTML
        entities are being converted, any unrecognized entities are
        escaped."""
        x = match.group(1)
        if self.convertHTMLEntities and x in name2codepoint:
            return unichr(name2codepoint[x])
        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
            if self.convertXMLEntities:
                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
            else:
                return u'&%s;' % x
        elif len(x) > 0 and x[0] == '#':
            # Handle numeric entities
            if len(x) > 1 and x[1] == 'x':
                return unichr(int(x[2:], 16))
            else:
                return unichr(int(x[1:]))

        elif self.escapeUnrecognizedEntities:
            return u'&amp;%s;' % x
        else:
            return u'&%s;' % x

开发者ID:Autodesk，项目名称:arnold-usd，代码行数:26，代码来源:__init__.py

示例3: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    def fixup(m):
        text = m.group(0)
        code = m.group(1)
        try:
            if text[1] == "#":  # character reference
                if text[2] == "x":
                    return unichr(int(code[1:], 16))
                else:
                    return unichr(int(code))
            else:               # named entity
                return unichr(name2codepoint[code])
        except:
            return text # leave as is

    return re.sub("&#?(\w+);", fixup, text)

# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->"

开发者ID:motazsaad，项目名称:comparable-text-miner，代码行数:21，代码来源:WikiExtractor.py

示例4: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        if not self.elementstack:
            return
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        elif ref in self.entities:
            text = self.entities[ref]
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
            try:
                name2codepoint[ref]
            except KeyError:
                text = '&%s;' % ref
            else:
                text = unichr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text)

开发者ID:liantian-cn，项目名称:RSSNewsGAE，代码行数:20，代码来源:feedparser.py

示例5: replace_entity

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def replace_entity(text):
  def fixup(m):
    text = m.group(0)
    if text[:2] == "&#":
      # character reference
      try:
        if text[:3] == "&#x":
          return unichr(int(text[3:-1], 16))
        else:
          return unichr(int(text[2:-1]))
      except ValueError:
        pass
    else:
      # named entity
      try:
        text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
      except KeyError:
        pass
      return text # leave as is
  return re.sub("&#?\w+;", fixup, text)

开发者ID:blissland，项目名称:blissflixx，代码行数:22，代码来源:chanutils.py

示例6: decodeHtmlentities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()
                
    return entity_re.subn(substitute_entity, string)[0]

开发者ID:tvalacarta，项目名称:tvalacarta，代码行数:20，代码来源:scrapertools.py

示例7: get_entitydefs

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def get_entitydefs():
    import htmlentitydefs
    from codecs import latin_1_decode
    entitydefs = {}
    try:
        htmlentitydefs.name2codepoint
    except AttributeError:
        entitydefs = {}
        for name, char in htmlentitydefs.entitydefs.items():
            uc = latin_1_decode(char)[0]
            if uc.startswith("&#") and uc.endswith(";"):
                uc = unescape_charref(uc[2:-1], None)
            entitydefs["&%s;" % name] = uc
    else:
        for name, codepoint in htmlentitydefs.name2codepoint.items():
            entitydefs["&%s;" % name] = unichr(codepoint)
    return entitydefs

开发者ID:krintoxi，项目名称:NoobSec-Toolkit，代码行数:19，代码来源:clientform.py

示例8: decodeHtmlentities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0]

开发者ID:alfa-addon，项目名称:addon，代码行数:23，代码来源:scrapertools.py

示例9: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
        if not self.elementstack: return
        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        else:
            # entity resolution graciously donated by Aaron Swartz
            def name2cp(k):
                import htmlentitydefs
                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
                    return htmlentitydefs.name2codepoint[k]
                k = htmlentitydefs.entitydefs[k]
                if k.startswith('&#') and k.endswith(';'):
                    return int(k[2:-1]) # not in latin-1
                return ord(k)
            try: name2cp(ref)
            except KeyError: text = '&%s;' % ref
            else: text = unichr(name2cp(ref)).encode('utf-8')
        self.elementstack[-1][2].append(text)

开发者ID:MyRobotLab，项目名称:pyrobotlab，代码行数:22，代码来源:feedparser.py

示例10: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    """
    Removes HTML or XML character references and entities from a text string.
    
    """
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text)

开发者ID:blackye，项目名称:luscan-devel，代码行数:26，代码来源:util.py

示例11: decode_entities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decode_entities(x):
    if x.group(1).startswith('#'):
        char = x.group(1)[1:]
        if char.startswith('x'):
            try:
                return unichr(int(x.group(1)[2:], 16))
            except:
                pass
        try:
            return unichr(int(x.group(1)[1:]))
        except:
            pass
    try:
        return unichr(name2codepoint[x.group(1)])
    except:
        return x.group(1)

开发者ID:medialab，项目名称:gazouilloire，代码行数:18，代码来源:tweets.py

示例12: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    """Replace XML character references with the referenced characters"""
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)

开发者ID:gtonkinhill，项目名称:panaroo，代码行数:24，代码来源:isvalid.py

注：本文中的htmlentitydefs.name2codepoint方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。