當前位置: 首頁>>代碼示例>>Python>>正文


Python name2codepoint.get方法代碼示例

本文整理匯總了Python中html.entities.name2codepoint.get方法的典型用法代碼示例。如果您正苦於以下問題:Python name2codepoint.get方法的具體用法?Python name2codepoint.get怎麽用?Python name2codepoint.get使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在html.entities.name2codepoint的用法示例。


在下文中一共展示了name2codepoint.get方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: decodeHtmlentities

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0] 
開發者ID:alfa-addon,項目名稱:addon,代碼行數:23,代碼來源:scrapertools.py

示例2: get

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def get(self, val, default=None):
        if 0 <= val < self.num_terms:
            return str(val)
        return default 
開發者ID:loretoparisi,項目名稱:word2vec-twitter,代碼行數:6,代碼來源:word2vecReaderUtils.py

示例3: __init__

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def __init__(self, corpus, reps):
        """
        Wrap a `corpus` as another corpus of length `reps`. This is achieved by
        repeating documents from `corpus` over and over again, until the requested
        length `len(result)==reps` is reached. Repetition is done
        on-the-fly=efficiently, via `itertools`.

        >>> corpus = [[(1, 0.5)], []] # 2 documents
        >>> list(RepeatCorpus(corpus, 5)) # repeat 2.5 times to get 5 documents
        [[(1, 0.5)], [], [(1, 0.5)], [], [(1, 0.5)]]

        """
        self.corpus = corpus
        self.reps = reps 
開發者ID:loretoparisi,項目名稱:word2vec-twitter,代碼行數:16,代碼來源:word2vecReaderUtils.py

示例4: decode_htmlentities

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def decode_htmlentities(text):
    """
    Decode HTML entities in text, coded as hex, decimal or named.

    Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py

    >>> u = u'E tu vivrai nel terrore - L&#x27;aldil&#xE0; (1981)'
    >>> print(decode_htmlentities(u).encode('UTF-8'))
    E tu vivrai nel terrore - L'aldilà (1981)
    >>> print(decode_htmlentities("l&#39;eau"))
    l'eau
    >>> print(decode_htmlentities("foo &lt; bar"))
    foo < bar

    """
    def substitute_entity(match):
        ent = match.group(3)
        if match.group(1) == "#":
            # decoding by number
            if match.group(2) == '':
                # number is in decimal
                return unichr(int(ent))
            elif match.group(2) == 'x':
                # number is in hex
                return unichr(int('0x' + ent, 16))
        else:
            # they were using a name
            cp = n2cp.get(ent)
            if cp:
                return unichr(cp)
            else:
                return match.group()

    try:
        return RE_HTML_ENTITY.sub(substitute_entity, text)
    except:
        # in case of errors, return input
        # e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
        return text 
開發者ID:loretoparisi,項目名稱:word2vec-twitter,代碼行數:41,代碼來源:word2vecReaderUtils.py

示例5: chunkize

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def chunkize(corpus, chunksize, maxsize=0, as_numpy=False):
        """
        Split a stream of values into smaller chunks.
        Each chunk is of length `chunksize`, except the last one which may be smaller.
        A once-only input stream (`corpus` from a generator) is ok, chunking is done
        efficiently via itertools.

        If `maxsize > 1`, don't wait idly in between successive chunk `yields`, but
        rather keep filling a short queue (of size at most `maxsize`) with forthcoming
        chunks in advance. This is realized by starting a separate process, and is
        meant to reduce I/O delays, which can be significant when `corpus` comes
        from a slow medium (like harddisk).

        If `maxsize==0`, don't fool around with parallelism and simply yield the chunksize
        via `chunkize_serial()` (no I/O optimizations).

        >>> for chunk in chunkize(range(10), 4): print(chunk)
        [0, 1, 2, 3]
        [4, 5, 6, 7]
        [8, 9]

        """
        assert chunksize > 0

        if maxsize > 0:
            q = multiprocessing.Queue(maxsize=maxsize)
            worker = InputQueue(q, corpus, chunksize, maxsize=maxsize, as_numpy=as_numpy)
            worker.daemon = True
            worker.start()
            while True:
                chunk = [q.get(block=True)]
                if chunk[0] is None:
                    break
                yield chunk.pop()
        else:
            for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy):
                yield chunk 
開發者ID:loretoparisi,項目名稱:word2vec-twitter,代碼行數:39,代碼來源:word2vecReaderUtils.py

示例6: substitute

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            return chr(cp)
        else:
            return match.group() 
開發者ID:gramps-project,項目名稱:addons-source,代碼行數:12,代碼來源:HeadlineNewsGramplet.py

示例7: get_header_from_response

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def get_header_from_response(url, header_to_get="", post=None, headers=None):
    header_to_get = header_to_get.lower()
    response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
    return response.headers.get(header_to_get) 
開發者ID:alfa-addon,項目名稱:addon,代碼行數:6,代碼來源:scrapertools.py

示例8: decode_htmlentities

# 需要導入模塊: from html.entities import name2codepoint [as 別名]
# 或者: from html.entities.name2codepoint import get [as 別名]
def decode_htmlentities(text):
    """
    Decode HTML entities in text, coded as hex, decimal or named.

    Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py

    >>> u = u'E tu vivrai nel terrore - L&#x27;aldil&#xE0; (1981)'
    >>> print(decode_htmlentities(u).encode('UTF-8'))
    E tu vivrai nel terrore - L'aldilà (1981)
    >>> print(decode_htmlentities("l&#39;eau"))
    l'eau
    >>> print(decode_htmlentities("foo &lt; bar"))
    foo < bar

    """

    def substitute_entity(match):
        ent = match.group(3)
        if match.group(1) == "#":
            # decoding by number
            if match.group(2) == '':
                # number is in decimal
                return unichr(int(ent))
            elif match.group(2) == 'x':
                # number is in hex
                return unichr(int('0x' + ent, 16))
        else:
            # they were using a name
            cp = n2cp.get(ent)
            if cp:
                return unichr(cp)
            else:
                return match.group()

    try:
        return RE_HTML_ENTITY.sub(substitute_entity, text)
    except:
        # in case of errors, return input
        # e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
        return text 
開發者ID:masr,項目名稱:pynlpini,代碼行數:42,代碼來源:utils.py


注:本文中的html.entities.name2codepoint.get方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。