本文整理汇总了Python中htmlentitydefs.name2codepoint.get方法的典型用法代码示例。如果您正苦于以下问题:Python name2codepoint.get方法的具体用法?Python name2codepoint.get怎么用?Python name2codepoint.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类htmlentitydefs.name2codepoint
的用法示例。
在下文中一共展示了name2codepoint.get方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decodeHtmlentities
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例2: decodeHtmlentities
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
if PY3:
from html.entities import name2codepoint as n2cp
else:
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例3: decodeHtmlentities
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decodeHtmlentities(string):
string = entitiesfix(string)
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent)).encode('utf-8')
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp).encode('utf-8')
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
示例4: exportXML
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def exportXML(self, filename, encoding="UTF-8"):
"Export the urls and the forms found in an XML file."
xml = minidom.Document()
items = xml.createElement("items")
xml.appendChild(items)
for lien in self.browsed:
get = xml.createElement("get")
get.setAttribute("url", lien.url)
items.appendChild(get)
for form in self.forms:
post = xml.createElement("post")
post.setAttribute("url", form[0])
post.setAttribute("referer", form[2])
for k, v in form[1].items():
var = xml.createElement("var")
var.setAttribute("name", k)
var.setAttribute("value", v)
post.appendChild(var)
items.appendChild(post)
fd = open(filename, "w")
xml.writexml(fd, " ", " ", "\n", encoding)
fd.close()
示例5: __init__
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __init__(self, url=""):
HTMLParser.HTMLParser.__init__(self)
self.liens = []
self.forms = []
self.form_values = []
self.inform = 0
self.inscript = 0
self.current_form_url = url
self.uploads = []
self.current_form_method = "get"
self.url = url
self.__defaults = {'checkbox': 'default',
'color': '%23adeadb',
'date': '2011-06-08',
'datetime': '2011-06-09T20:35:34.32',
'datetime-local': '2011-06-09T22:41',
'file': ['pix.gif', 'GIF89a'],
'hidden': 'default',
'email': 'wapiti%40mailinator.com',
'month': '2011-06',
'number': '1337',
'password': 'letmein',
'radio': 'beton',
'range': '37',
'search': 'default',
'submit': 'submit',
'tel': '0606060606',
'text': 'default',
'time': '13:37',
'url': 'http://wapiti.sf.net/',
'week': '2011-W24'
}
# This is ugly but let's keep it while there is not a js parser
self.common_js_strings = ["Msxml2.XMLHTTP", "application/x-www-form-urlencoded", ".php", "text/xml",
"about:blank", "Microsoft.XMLHTTP", "text/plain", "text/javascript",
"application/x-shockwave-flash"]
示例6: __substitute_entity
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __substitute_entity(self, match):
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
示例7: reset
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def reset(self):
self.liens = []
self.forms = []
self.form_values = []
self.inform = 0
self.current_form_url = ""
self.uploads = []
self.current_form_method = "get"
示例8: get
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def get(self, val, default=None):
if 0 <= val < self.num_terms:
return str(val)
return default
示例9: __init__
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def __init__(self, corpus, reps):
"""
Wrap a `corpus` as another corpus of length `reps`. This is achieved by
repeating documents from `corpus` over and over again, until the requested
length `len(result)==reps` is reached. Repetition is done
on-the-fly=efficiently, via `itertools`.
>>> corpus = [[(1, 0.5)], []] # 2 documents
>>> list(RepeatCorpus(corpus, 5)) # repeat 2.5 times to get 5 documents
[[(1, 0.5)], [], [(1, 0.5)], [], [(1, 0.5)]]
"""
self.corpus = corpus
self.reps = reps
示例10: decode_htmlentities
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def decode_htmlentities(text):
"""
Decode HTML entities in text, coded as hex, decimal or named.
Adapted from http://github.com/sku/python-twitter-ircbot/blob/321d94e0e40d0acc92f5bf57d126b57369da70de/html_decode.py
>>> u = u'E tu vivrai nel terrore - L'aldilà (1981)'
>>> print(decode_htmlentities(u).encode('UTF-8'))
E tu vivrai nel terrore - L'aldilà (1981)
>>> print(decode_htmlentities("l'eau"))
l'eau
>>> print(decode_htmlentities("foo < bar"))
foo < bar
"""
def substitute_entity(match):
ent = match.group(3)
if match.group(1) == "#":
# decoding by number
if match.group(2) == '':
# number is in decimal
return unichr(int(ent))
elif match.group(2) == 'x':
# number is in hex
return unichr(int('0x' + ent, 16))
else:
# they were using a name
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
try:
return RE_HTML_ENTITY.sub(substitute_entity, text)
except:
# in case of errors, return input
# e.g., ValueError: unichr() arg not in range(0x10000) (narrow Python build)
return text
示例11: chunkize
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def chunkize(corpus, chunksize, maxsize=0, as_numpy=False):
"""
Split a stream of values into smaller chunks.
Each chunk is of length `chunksize`, except the last one which may be smaller.
A once-only input stream (`corpus` from a generator) is ok, chunking is done
efficiently via itertools.
If `maxsize > 1`, don't wait idly in between successive chunk `yields`, but
rather keep filling a short queue (of size at most `maxsize`) with forthcoming
chunks in advance. This is realized by starting a separate process, and is
meant to reduce I/O delays, which can be significant when `corpus` comes
from a slow medium (like harddisk).
If `maxsize==0`, don't fool around with parallelism and simply yield the chunksize
via `chunkize_serial()` (no I/O optimizations).
>>> for chunk in chunkize(range(10), 4): print(chunk)
[0, 1, 2, 3]
[4, 5, 6, 7]
[8, 9]
"""
assert chunksize > 0
if maxsize > 0:
q = multiprocessing.Queue(maxsize=maxsize)
worker = InputQueue(q, corpus, chunksize, maxsize=maxsize, as_numpy=as_numpy)
worker.daemon = True
worker.start()
while True:
chunk = [q.get(block=True)]
if chunk[0] is None:
break
yield chunk.pop()
else:
for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy):
yield chunk
示例12: substitute
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def substitute(match):
ent = match.group(2)
if match.group(1) == "#":
return chr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return chr(cp)
else:
return match.group()
示例13: get_header_from_response
# 需要导入模块: from htmlentitydefs import name2codepoint [as 别名]
# 或者: from htmlentitydefs.name2codepoint import get [as 别名]
def get_header_from_response(url, header_to_get="", post=None, headers=None):
header_to_get = header_to_get.lower()
response = httptools.downloadpage(url, post=post, headers=headers, only_headers=True)
return response.headers.get(header_to_get)