当前位置: 首页>>代码示例>>Python>>正文


Python htmlentitydefs.name2codepoint方法代码示例

本文整理汇总了Python中htmlentitydefs.name2codepoint方法的典型用法代码示例。如果您正苦于以下问题:Python htmlentitydefs.name2codepoint方法的具体用法?Python htmlentitydefs.name2codepoint怎么用?Python htmlentitydefs.name2codepoint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在htmlentitydefs的用法示例。


在下文中一共展示了htmlentitydefs.name2codepoint方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, name):
        if self.in_par:
            c = ''
            if name == 'star':
                c = u'*'
            elif name == 'bquot':
                c = u'"'
            elif name == 'equot':
                c = u'"'
            elif name == 'lowbar':
                c = u'_'
            elif name == 'parole.tax':
                c = u''
            else:
                if name in name2codepoint:
                    c = unichr(name2codepoint[name])
                else:
                    logging.warning("unknown entityref: %s" % name)
                    c = ''
            # print "Named ent:", c
            self.buf += c 
开发者ID:gooofy,项目名称:zamia-speech,代码行数:23,代码来源:parole.py

示例2: _convertEntities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def _convertEntities(self, match):
        """Used in a call to re.sub to replace HTML, XML, and numeric
        entities with the appropriate Unicode characters. If HTML
        entities are being converted, any unrecognized entities are
        escaped."""
        x = match.group(1)
        if self.convertHTMLEntities and x in name2codepoint:
            return unichr(name2codepoint[x])
        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
            if self.convertXMLEntities:
                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
            else:
                return u'&%s;' % x
        elif len(x) > 0 and x[0] == '#':
            # Handle numeric entities
            if len(x) > 1 and x[1] == 'x':
                return unichr(int(x[2:], 16))
            else:
                return unichr(int(x[1:]))

        elif self.escapeUnrecognizedEntities:
            return u'&%s;' % x
        else:
            return u'&%s;' % x 
开发者ID:Autodesk,项目名称:arnold-usd,代码行数:26,代码来源:__init__.py

示例3: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    def fixup(m):
        text = m.group(0)
        code = m.group(1)
        try:
            if text[1] == "#":  # character reference
                if text[2] == "x":
                    return unichr(int(code[1:], 16))
                else:
                    return unichr(int(code))
            else:               # named entity
                return unichr(name2codepoint[code])
        except:
            return text # leave as is

    return re.sub("&#?(\w+);", fixup, text)

# Match HTML comments
# The buggy template {{Template:T}} has a comment terminating with just "->" 
开发者ID:motazsaad,项目名称:comparable-text-miner,代码行数:21,代码来源:WikiExtractor.py

示例4: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '©', ref will be 'copy'
        if not self.elementstack:
            return
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        elif ref in self.entities:
            text = self.entities[ref]
            if text.startswith('&#') and text.endswith(';'):
                return self.handle_entityref(text)
        else:
            try:
                name2codepoint[ref]
            except KeyError:
                text = '&%s;' % ref
            else:
                text = unichr(name2codepoint[ref]).encode('utf-8')
        self.elementstack[-1][2].append(text) 
开发者ID:liantian-cn,项目名称:RSSNewsGAE,代码行数:20,代码来源:feedparser.py

示例5: replace_entity

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def replace_entity(text):
  def fixup(m):
    text = m.group(0)
    if text[:2] == "&#":
      # character reference
      try:
        if text[:3] == "&#x":
          return unichr(int(text[3:-1], 16))
        else:
          return unichr(int(text[2:-1]))
      except ValueError:
        pass
    else:
      # named entity
      try:
        text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
      except KeyError:
        pass
      return text # leave as is
  return re.sub("&#?\w+;", fixup, text) 
开发者ID:blissland,项目名称:blissflixx,代码行数:22,代码来源:chanutils.py

示例6: decodeHtmlentities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()
                
    return entity_re.subn(substitute_entity, string)[0] 
开发者ID:tvalacarta,项目名称:tvalacarta,代码行数:20,代码来源:scrapertools.py

示例7: get_entitydefs

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def get_entitydefs():
    import htmlentitydefs
    from codecs import latin_1_decode
    entitydefs = {}
    try:
        htmlentitydefs.name2codepoint
    except AttributeError:
        entitydefs = {}
        for name, char in htmlentitydefs.entitydefs.items():
            uc = latin_1_decode(char)[0]
            if uc.startswith("&#") and uc.endswith(";"):
                uc = unescape_charref(uc[2:-1], None)
            entitydefs["&%s;" % name] = uc
    else:
        for name, codepoint in htmlentitydefs.name2codepoint.items():
            entitydefs["&%s;" % name] = unichr(codepoint)
    return entitydefs 
开发者ID:krintoxi,项目名称:NoobSec-Toolkit,代码行数:19,代码来源:clientform.py

示例8: decodeHtmlentities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0] 
开发者ID:alfa-addon,项目名称:addon,代码行数:23,代码来源:scrapertools.py

示例9: handle_entityref

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def handle_entityref(self, ref):
        # called for each entity reference, e.g. for '©', ref will be 'copy'
        if not self.elementstack: return
        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
            text = '&%s;' % ref
        else:
            # entity resolution graciously donated by Aaron Swartz
            def name2cp(k):
                import htmlentitydefs
                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
                    return htmlentitydefs.name2codepoint[k]
                k = htmlentitydefs.entitydefs[k]
                if k.startswith('&#') and k.endswith(';'):
                    return int(k[2:-1]) # not in latin-1
                return ord(k)
            try: name2cp(ref)
            except KeyError: text = '&%s;' % ref
            else: text = unichr(name2cp(ref)).encode('utf-8')
        self.elementstack[-1][2].append(text) 
开发者ID:MyRobotLab,项目名称:pyrobotlab,代码行数:22,代码来源:feedparser.py

示例10: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    """
    Removes HTML or XML character references and entities from a text string.
    
    """
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is
    return re.sub("&#?\w+;", fixup, text) 
开发者ID:blackye,项目名称:luscan-devel,代码行数:26,代码来源:util.py

示例11: decode_entities

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def decode_entities(x):
    if x.group(1).startswith('#'):
        char = x.group(1)[1:]
        if char.startswith('x'):
            try:
                return unichr(int(x.group(1)[2:], 16))
            except:
                pass
        try:
            return unichr(int(x.group(1)[1:]))
        except:
            pass
    try:
        return unichr(name2codepoint[x.group(1)])
    except:
        return x.group(1) 
开发者ID:medialab,项目名称:gazouilloire,代码行数:18,代码来源:tweets.py

示例12: unescape

# 需要导入模块: import htmlentitydefs [as 别名]
# 或者: from htmlentitydefs import name2codepoint [as 别名]
def unescape(text):
    """Replace XML character references with the referenced characters"""
    def fixup(m):
        text = m.group(0)
        if text[1] == '#':
            # Character reference
            if text[2] == 'x':
                code = int(text[3:-1], 16)
            else:
                code = int(text[2:-1])
        else:
            # Named entity
            try:
                code = htmlentitydefs.name2codepoint[text[1:-1]]
            except KeyError:
                return text  # leave unchanged
        try:
            return chr(code) if code < 256 else unichr(code)
        except (ValueError, OverflowError):
            return text  # leave unchanged

    return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text) 
开发者ID:gtonkinhill,项目名称:panaroo,代码行数:24,代码来源:isvalid.py


注:本文中的htmlentitydefs.name2codepoint方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。