Python name2codepoint.get方法代码示例

本文整理汇总了Python中html.entities.name2codepoint.get方法的典型用法代码示例。如果您正苦于以下问题：Python name2codepoint.get方法的具体用法？Python name2codepoint.get怎么用？Python name2codepoint.get使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.entities.name2codepoint的用法示例。

在下文中一共展示了name2codepoint.get方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: decodeHtmlentities

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0]

开发者ID:alfa-addon，项目名称:addon，代码行数:23，代码来源:scrapertools.py

示例2: get

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def get(self, val, default=None):
        if 0 <= val < self.num_terms:
            return str(val)
        return default

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:6，代码来源:word2vecReaderUtils.py

示例3: init

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def __init__(self, corpus, reps):
        """
        Wrap a `corpus` as another corpus of length `reps`. This is achieved by
        repeating documents from `corpus` over and over again, until the requested
        length `len(result)==reps` is reached. Repetition is done
        on-the-fly=efficiently, via `itertools`.

        >>> corpus = [[(1, 0.5)], []] # 2 documents
        >>> list(RepeatCorpus(corpus, 5)) # repeat 2.5 times to get 5 documents
        [[(1, 0.5)], [], [(1, 0.5)], [], [(1, 0.5)]]

        """
        self.corpus = corpus
        self.reps = reps

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:16，代码来源:word2vecReaderUtils.py

示例4: decode_htmlentities

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def decode_htmlentities(text):
    """
    Decode HTML entities in text, coded as hex, decimal or named.

    Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py

    >>> u = u'E tu vivrai nel terrore - L&#x27;aldil&#xE0; (1981)'
    >>> print(decode_htmlentities(u).encode('UTF-8'))
    E tu vivrai nel terrore - L'aldilà (1981)
    >>> print(decode_htmlentities("l&#39;eau"))
    l'eau
    >>> print(decode_htmlentities("foo &lt; bar"))
    foo < bar

    """
    def substitute_entity(match):
        ent = match.group(3)
        if match.group(1) == "#":
            # decoding by number
            if match.group(2) == '':
                # number is in decimal
                return unichr(int(ent))
            elif match.group(2) == 'x':
                # number is in hex
                return unichr(int('0x' + ent, 16))
        else:
            # they were using a name
            cp = n2cp.get(ent)
            if cp:
                return unichr(cp)
            else:
                return match.group()

    try:
        return RE_HTML_ENTITY.sub(substitute_entity, text)
    except:
        # in case of errors, return input
        # e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
        return text

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:41，代码来源:word2vecReaderUtils.py

示例5: chunkize

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def chunkize(corpus, chunksize, maxsize=0, as_numpy=False):
        """
        Split a stream of values into smaller chunks.
        Each chunk is of length `chunksize`, except the last one which may be smaller.
        A once-only input stream (`corpus` from a generator) is ok, chunking is done
        efficiently via itertools.

        If `maxsize > 1`, don't wait idly in between successive chunk `yields`, but
        rather keep filling a short queue (of size at most `maxsize`) with forthcoming
        chunks in advance. This is realized by starting a separate process, and is
        meant to reduce I/O delays, which can be significant when `corpus` comes
        from a slow medium (like harddisk).

        If `maxsize==0`, don't fool around with parallelism and simply yield the chunksize
        via `chunkize_serial()` (no I/O optimizations).

        >>> for chunk in chunkize(range(10), 4): print(chunk)
        [0, 1, 2, 3]
        [4, 5, 6, 7]
        [8, 9]

        """
        assert chunksize > 0

        if maxsize > 0:
            q = multiprocessing.Queue(maxsize=maxsize)
            worker = InputQueue(q, corpus, chunksize, maxsize=maxsize, as_numpy=as_numpy)
            worker.daemon = True
            worker.start()
            while True:
                chunk = [q.get(block=True)]
                if chunk[0] is None:
                    break
                yield chunk.pop()
        else:
            for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy):
                yield chunk

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:39，代码来源:word2vecReaderUtils.py

示例6: substitute

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            return chr(cp)
        else:
            return match.group()

开发者ID:gramps-project，项目名称:addons-source，代码行数:12，代码来源:HeadlineNewsGramplet.py

示例7: get_header_from_response

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def get_header_from_response(url, header_to_get="", post=None, headers=None):
    header_to_get = header_to_get.lower()
    response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
    return response.headers.get(header_to_get)

开发者ID:alfa-addon，项目名称:addon，代码行数:6，代码来源:scrapertools.py

示例8: decode_htmlentities

# 需要导入模块: from html.entities import name2codepoint [as 别名]
# 或者: from html.entities.name2codepoint import get [as 别名]
def decode_htmlentities(text):
    """
    Decode HTML entities in text, coded as hex, decimal or named.

    Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py

    >>> u = u'E tu vivrai nel terrore - L&#x27;aldil&#xE0; (1981)'
    >>> print(decode_htmlentities(u).encode('UTF-8'))
    E tu vivrai nel terrore - L'aldilà (1981)
    >>> print(decode_htmlentities("l&#39;eau"))
    l'eau
    >>> print(decode_htmlentities("foo &lt; bar"))
    foo < bar

    """

    def substitute_entity(match):
        ent = match.group(3)
        if match.group(1) == "#":
            # decoding by number
            if match.group(2) == '':
                # number is in decimal
                return unichr(int(ent))
            elif match.group(2) == 'x':
                # number is in hex
                return unichr(int('0x' + ent, 16))
        else:
            # they were using a name
            cp = n2cp.get(ent)
            if cp:
                return unichr(cp)
            else:
                return match.group()

    try:
        return RE_HTML_ENTITY.sub(substitute_entity, text)
    except:
        # in case of errors, return input
        # e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
        return text

开发者ID:masr，项目名称:pynlpini，代码行数:42，代码来源:utils.py

注：本文中的html.entities.name2codepoint.get方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。