Python name2codepoint.get方法代码示例

本文整理汇总了Python中htmlentitydefs.name2codepoint.get方法的典型用法代码示例。如果您正苦于以下问题：Python name2codepoint.get方法的具体用法？Python name2codepoint.get怎么用？Python name2codepoint.get使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类htmlentitydefs.name2codepoint的用法示例。

在下文中一共展示了name2codepoint.get方法的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: decodeHtmlentities

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()
                
    return entity_re.subn(substitute_entity, string)[0]

开发者ID:tvalacarta，项目名称:tvalacarta，代码行数:20，代码来源:scrapertools.py

示例2: decodeHtmlentities

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0]

开发者ID:alfa-addon，项目名称:addon，代码行数:23，代码来源:scrapertools.py

示例3: decodeHtmlentities

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
    string = entitiesfix(string)
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")

    def substitute_entity(match):
        from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

    return entity_re.subn(substitute_entity, string)[0]

开发者ID:pelisalacarta-ce，项目名称:pelisalacarta-ce，代码行数:20，代码来源:scrapertools.py

示例4: exportXML

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def exportXML(self, filename, encoding="UTF-8"):
        "Export the urls and the forms found in an XML file."
        xml = minidom.Document()
        items = xml.createElement("items")
        xml.appendChild(items)

        for lien in self.browsed:
            get = xml.createElement("get")
            get.setAttribute("url", lien.url)
            items.appendChild(get)

        for form in self.forms:
            post = xml.createElement("post")
            post.setAttribute("url", form[0])
            post.setAttribute("referer", form[2])

            for k, v in form[1].items():
                var = xml.createElement("var")
                var.setAttribute("name", k)
                var.setAttribute("value", v)
                post.appendChild(var)
            items.appendChild(post)

        fd = open(filename, "w")
        xml.writexml(fd, "    ", "    ", "\n", encoding)
        fd.close()

开发者ID:flipkart-incubator，项目名称:watchdog，代码行数:28，代码来源:lswww.py

示例5: init

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __init__(self, url=""):
        HTMLParser.HTMLParser.__init__(self)
        self.liens = []
        self.forms = []
        self.form_values = []
        self.inform = 0
        self.inscript = 0
        self.current_form_url = url
        self.uploads = []
        self.current_form_method = "get"
        self.url = url
        self.__defaults = {'checkbox':       'default',
                           'color':          '%23adeadb',
                           'date':           '2011-06-08',
                           'datetime':       '2011-06-09T20:35:34.32',
                           'datetime-local': '2011-06-09T22:41',
                           'file':           ['pix.gif', 'GIF89a'],
                           'hidden':         'default',
                           'email':           'wapiti%40mailinator.com',
                           'month':          '2011-06',
                           'number':         '1337',
                           'password':       'letmein',
                           'radio':          'beton',
                           'range':          '37',
                           'search':         'default',
                           'submit':         'submit',
                           'tel':            '0606060606',
                           'text':           'default',
                           'time':           '13:37',
                           'url':            'http://wapiti.sf.net/',
                           'week':           '2011-W24'
                           }
        # This is ugly but let's keep it while there is not a js parser
        self.common_js_strings = ["Msxml2.XMLHTTP", "application/x-www-form-urlencoded", ".php", "text/xml",
                                  "about:blank", "Microsoft.XMLHTTP", "text/plain", "text/javascript",
                                  "application/x-shockwave-flash"]

开发者ID:flipkart-incubator，项目名称:watchdog，代码行数:38，代码来源:lswww.py

示例6: __substitute_entity

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __substitute_entity(self, match):
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent))
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp)
            else:
                return match.group()

开发者ID:flipkart-incubator，项目名称:watchdog，代码行数:13，代码来源:lswww.py

示例7: reset

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def reset(self):
        self.liens = []
        self.forms = []
        self.form_values = []
        self.inform = 0
        self.current_form_url = ""
        self.uploads = []
        self.current_form_method = "get"

开发者ID:flipkart-incubator，项目名称:watchdog，代码行数:10，代码来源:lswww.py

示例8: get

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def get(self, val, default=None):
        if 0 <= val < self.num_terms:
            return str(val)
        return default

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:6，代码来源:word2vecReaderUtils.py

示例9: init

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __init__(self, corpus, reps):
        """
        Wrap a `corpus` as another corpus of length `reps`. This is achieved by
        repeating documents from `corpus` over and over again, until the requested
        length `len(result)==reps` is reached. Repetition is done
        on-the-fly=efficiently, via `itertools`.

        >>> corpus = [[(1, 0.5)], []] # 2 documents
        >>> list(RepeatCorpus(corpus, 5)) # repeat 2.5 times to get 5 documents
        [[(1, 0.5)], [], [(1, 0.5)], [], [(1, 0.5)]]

        """
        self.corpus = corpus
        self.reps = reps

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:16，代码来源:word2vecReaderUtils.py

示例10: decode_htmlentities

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decode_htmlentities(text):
    """
    Decode HTML entities in text, coded as hex, decimal or named.

    Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py

    >>> u = u'E tu vivrai nel terrore - L&#x27;aldil&#xE0; (1981)'
    >>> print(decode_htmlentities(u).encode('UTF-8'))
    E tu vivrai nel terrore - L'aldilà (1981)
    >>> print(decode_htmlentities("l&#39;eau"))
    l'eau
    >>> print(decode_htmlentities("foo &lt; bar"))
    foo < bar

    """
    def substitute_entity(match):
        ent = match.group(3)
        if match.group(1) == "#":
            # decoding by number
            if match.group(2) == '':
                # number is in decimal
                return unichr(int(ent))
            elif match.group(2) == 'x':
                # number is in hex
                return unichr(int('0x' + ent, 16))
        else:
            # they were using a name
            cp = n2cp.get(ent)
            if cp:
                return unichr(cp)
            else:
                return match.group()

    try:
        return RE_HTML_ENTITY.sub(substitute_entity, text)
    except:
        # in case of errors, return input
        # e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
        return text

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:41，代码来源:word2vecReaderUtils.py

示例11: chunkize

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def chunkize(corpus, chunksize, maxsize=0, as_numpy=False):
        """
        Split a stream of values into smaller chunks.
        Each chunk is of length `chunksize`, except the last one which may be smaller.
        A once-only input stream (`corpus` from a generator) is ok, chunking is done
        efficiently via itertools.

        If `maxsize > 1`, don't wait idly in between successive chunk `yields`, but
        rather keep filling a short queue (of size at most `maxsize`) with forthcoming
        chunks in advance. This is realized by starting a separate process, and is
        meant to reduce I/O delays, which can be significant when `corpus` comes
        from a slow medium (like harddisk).

        If `maxsize==0`, don't fool around with parallelism and simply yield the chunksize
        via `chunkize_serial()` (no I/O optimizations).

        >>> for chunk in chunkize(range(10), 4): print(chunk)
        [0, 1, 2, 3]
        [4, 5, 6, 7]
        [8, 9]

        """
        assert chunksize > 0

        if maxsize > 0:
            q = multiprocessing.Queue(maxsize=maxsize)
            worker = InputQueue(q, corpus, chunksize, maxsize=maxsize, as_numpy=as_numpy)
            worker.daemon = True
            worker.start()
            while True:
                chunk = [q.get(block=True)]
                if chunk[0] is None:
                    break
                yield chunk.pop()
        else:
            for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy):
                yield chunk

开发者ID:loretoparisi，项目名称:word2vec-twitter，代码行数:39，代码来源:word2vecReaderUtils.py

示例12: substitute

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            return chr(cp)
        else:
            return match.group()

开发者ID:gramps-project，项目名称:addons-source，代码行数:12，代码来源:HeadlineNewsGramplet.py

示例13: get_header_from_response

# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def get_header_from_response(url, header_to_get="", post=None, headers=None):
    header_to_get = header_to_get.lower()
    response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
    return response.headers.get(header_to_get)

开发者ID:alfa-addon，项目名称:addon，代码行数:6，代码来源:scrapertools.py

注：本文中的htmlentitydefs.name2codepoint.get方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。