本文整理汇总了Python中module.utils.html_unescape函数的典型用法代码示例。如果您正苦于以下问题:Python html_unescape函数的具体用法?Python html_unescape怎么用?Python html_unescape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了html_unescape函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: handleWebLinks
def handleWebLinks(self):
self.logDebug("Search for Web links ")
package_links = []
pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"'
ids = re.findall(pattern, self.html, re.I | re.S)
self.logDebug("Decrypting %d Web links" % len(ids))
for idx, weblink_id in enumerate(ids):
try:
self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))
res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})
indexs = res.find("window.location =") + 19
indexe = res.find('"', indexs)
link2 = res[indexs:indexe]
self.logDebug(link2)
link2 = html_unescape(link2)
package_links.append(link2)
except Exception, detail:
self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
示例2: downloadFile
def downloadFile(self, pyfile):
url = pyfile.url
for i in range(5):
header = self.load(url, just_header=True)
# self.load does not raise a BadHeader on 404 responses, do it here
if 'code' in header and header['code'] == 404:
raise BadHeader(404)
if 'location' in header:
self.logDebug("Location: " + header['location'])
url = unquote(header['location'])
else:
break
name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))
if 'content-disposition' in header:
self.logDebug("Content-Disposition: " + header['content-disposition'])
m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
if m:
disp = m.groupdict()
self.logDebug(disp)
if not disp['enc']:
disp['enc'] = 'utf-8'
name = remove_chars(disp['name'], "\"';").strip()
name = unicode(unquote(name), disp['enc'])
if not name:
name = url
pyfile.name = name
self.logDebug("Filename: %s" % pyfile.name)
self.download(url, disposition=True)
示例3: handleCaptcha
def handleCaptcha(self, inputs):
found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
if found:
recaptcha_key = unquote(found.group(1))
self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
recaptcha = ReCaptcha(self)
inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key)
return 1
else:
found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
if found:
captcha_url = found.group(1)
inputs["code"] = self.decryptCaptcha(captcha_url)
return 2
else:
found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
if found:
captcha_div = found.group(1)
self.logDebug(captcha_div)
numerals = re.findall(
r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div)
)
inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
self.logDebug("CAPTCHA", inputs["code"], numerals)
return 3
else:
found = re.search(self.SOLVEMEDIA_PATTERN, self.html)
if found:
captcha_key = found.group(1)
captcha = SolveMedia(self)
inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key)
return 4
return 0
示例4: process
def process(self, pyfile):
self.prepare()
if not re.match(self.__pattern__, self.pyfile.url):
if self.premium:
self.handleOverriden()
else:
self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
else:
try:
self.html = self.load(pyfile.url, cookies = False, decode = True)
self.file_info = self.getFileInfo()
except PluginParseError:
self.file_info = None
self.location = self.getDirectDownloadLink()
if not self.file_info:
pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]))
if self.location:
self.startDownload(self.location)
elif self.premium:
self.handlePremium()
else:
self.handleFree()
示例5: handlePremium
def handlePremium(self):
found = re.search(self.PREMIUM_URL_PATTERN, self.html)
if not found: self.parseError("Premium URL")
url = html_unescape(found.group(1))
self.logDebug("Premium URL: " + url)
if not url.startswith("http://"): self.resetAccount()
self.download(url)
示例6: process
def process(self, pyfile):
self.prepare()
if not re.match(self.__pattern__, self.pyfile.url):
if self.premium:
self.handleOverriden()
else:
self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
else:
try:
# Due to a 0.4.9 core bug self.load would use cookies even if
# cookies=False. Workaround using getURL to avoid cookies.
# Can be reverted in 0.5 as the cookies bug has been fixed.
self.html = getURL(pyfile.url, decode=True)
self.file_info = self.getFileInfo()
except PluginParseError:
self.file_info = None
self.location = self.getDirectDownloadLink()
if not self.file_info:
pyfile.name = html_unescape(
unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
)
if self.location:
self.startDownload(self.location)
elif self.premium:
self.handlePremium()
else:
self.handleFree()
示例7: getFileInfo
def getFileInfo(self):
self.logDebug("URL: %s" % self.pyfile.url)
if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
self.tempOffline()
name, size, status = parseFileInfo(self)[:3]
if status == 1:
self.offline()
elif status != 2:
self.logDebug(self.file_info)
self.parseError('File info')
if name:
self.pyfile.name = name
else:
self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])
if size:
self.pyfile.size = size
else:
self.logError("File size not parsed")
self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
return self.file_info
示例8: handleCaptcha
def handleCaptcha(self, inputs):
captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
self.logDebug(captcha_div)
numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))])
self.logDebug("CAPTCHA", inputs['code'], numerals)
return 3
示例9: proceed
def proceed(self, url, location):
url = self.parent.url
self.html = self.load(url)
link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
for id in link_ids:
new_link = html_unescape(
re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
self.urls.append(new_link)
示例10: get_file_name
def get_file_name(self):
try:
name = self.api["name"]
except KeyError:
file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
return html_unescape(name)
示例11: load
def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None):
"""
Load content at url and returns it
:param url:
:param get:
:param post:
:param ref:
:param cookies:
:param just_header: If True only the header will be retrieved and returned as dict
:param decode: Wether to decode the output according to http header, should be True in most cases
:return: Loaded content
"""
if hasattr(self, 'pyfile') and self.pyfile.abort:
self.abort()
url = fixurl(url)
if not url or not isinstance(url, basestring):
self.fail(_("No url given"))
if self.pyload.debug:
self.log_debug("LOAD URL " + url,
*["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])
if req is None:
req = self.req or self.pyload.requestFactory.getRequest(self.__name__)
#@TODO: Move to network in 0.4.10
if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list):
set_cookies(req.cj, self.COOKIES)
res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True) #@TODO: Fix network multipart in 0.4.10
#@TODO: Move to network in 0.4.10
if decode:
res = html_unescape(res)
#@TODO: Move to network in 0.4.10
if isinstance(decode, basestring):
res = decode(res, decode)
if self.pyload.debug:
frame = inspect.currentframe()
framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno))
try:
if not exists(os.path.join("tmp", self.__name__)):
os.makedirs(os.path.join("tmp", self.__name__))
with open(framefile, "wb") as f:
del frame #: Delete the frame or it wont be cleaned
f.write(encode(res))
except IOError, e:
self.log_error(e)
示例12: getPackageNameAndFolder
def getPackageNameAndFolder(self):
if hasattr(self, 'TITLE_PATTERN'):
m = re.search(self.TITLE_PATTERN, self.html)
if m:
name = folder = html_unescape(m.group('title').strip())
self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
return name, folder
name = self.pyfile.package().name
folder = self.pyfile.package().folder
self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder))
return name, folder
示例13: handle_free
def handle_free(self, pyfile):
fileid = re.search(self.FILEID_PATTERN, self.html).group(1)
self.log_debug("FileID: " + fileid)
token = re.search(self.TOKEN_PATTERN, self.html).group(1)
self.log_debug("Token: " + token)
self.html = self.load("http://lolabits.es/action/License/Download",
post={'fileId' : fileid,
'__RequestVerificationToken' : token},
decode="unicode-escape")
self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))
示例14: _translateAPIFileInfo
def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
# Translate
fileInfo = {}
try:
fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
fileInfo['name'] = html_unescape(apiFileDataMap['n'])
fileInfo['size'] = int(apiFileDataMap['s'])
fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]
except:
pass
return fileInfo
示例15: getInfo
def getInfo(urls):
for chunk in chunks(urls, 80):
result = []
api = getAPIData(chunk)
for data in api.itervalues():
if data[0] == "online":
result.append((html_unescape(data[2]), data[1], 2, data[4]))
elif data[0] == "offline":
result.append((data[4], 0, 1, data[4]))
yield result