Python utils.html_unescape函数代码示例

本文整理汇总了Python中module.utils.html_unescape函数的典型用法代码示例。如果您正苦于以下问题：Python html_unescape函数的具体用法？Python html_unescape怎么用？Python html_unescape使用的例子？那么, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了html_unescape函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: handleWebLinks

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))

                res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                self.logDebug(link2)

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))

开发者ID:kurtiss，项目名称:htpc，代码行数:27，代码来源:LinkCryptWs.py

示例2: downloadFile

    def downloadFile(self, pyfile):
        url = pyfile.url

        for i in range(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if 'code' in header and header['code'] == 404:
                raise BadHeader(404)

            if 'location' in header:
                self.logDebug("Location: " + header['location'])
                url = unquote(header['location'])
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " + header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']:
                    disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

开发者ID:DasLampe，项目名称:pyload，代码行数:34，代码来源:BasePlugin.py

示例3: handleCaptcha

 def handleCaptcha(self, inputs):
     found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
     if found:
         recaptcha_key = unquote(found.group(1))
         self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
         recaptcha = ReCaptcha(self)
         inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key)
         return 1
     else:
         found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
         if found:
             captcha_url = found.group(1)
             inputs["code"] = self.decryptCaptcha(captcha_url)
             return 2
         else:
             found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
             if found:
                 captcha_div = found.group(1)
                 self.logDebug(captcha_div)
                 numerals = re.findall(
                     r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div)
                 )
                 inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
                 self.logDebug("CAPTCHA", inputs["code"], numerals)
                 return 3
             else:
                 found = re.search(self.SOLVEMEDIA_PATTERN, self.html)
                 if found:
                     captcha_key = found.group(1)
                     captcha = SolveMedia(self)
                     inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key)
                     return 4
     return 0

开发者ID:japhigu，项目名称:pyload，代码行数:33，代码来源:XFileSharingPro.py

示例4: process

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                self.html = self.load(pyfile.url, cookies = False, decode = True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]))

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

开发者ID:4Christopher，项目名称:pyload，代码行数:26，代码来源:XFileSharingPro.py

示例5: handlePremium

 def handlePremium(self):
     found = re.search(self.PREMIUM_URL_PATTERN, self.html)
     if not found: self.parseError("Premium URL")
     url = html_unescape(found.group(1))
     self.logDebug("Premium URL: " + url)        
     if not url.startswith("http://"): self.resetAccount()
     self.download(url)

开发者ID:beefone，项目名称:pyload，代码行数:7，代码来源:CoolshareCz.py

示例6: process

    def process(self, pyfile):
        self.prepare()

        if not re.match(self.__pattern__, self.pyfile.url):
            if self.premium:
                self.handleOverriden()
            else:
                self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
        else:
            try:
                # Due to a 0.4.9 core bug self.load would use cookies even if
                # cookies=False. Workaround using getURL to avoid cookies.
                # Can be reverted in 0.5 as the cookies bug has been fixed.
                self.html = getURL(pyfile.url, decode=True)
                self.file_info = self.getFileInfo()
            except PluginParseError:
                self.file_info = None

            self.location = self.getDirectDownloadLink()

            if not self.file_info:
                pyfile.name = html_unescape(
                    unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
                )

            if self.location:
                self.startDownload(self.location)
            elif self.premium:
                self.handlePremium()
            else:
                self.handleFree()

开发者ID:wangjun，项目名称:pyload，代码行数:31，代码来源:XFileSharingPro.py

示例7: getFileInfo

    def getFileInfo(self):
        self.logDebug("URL: %s" % self.pyfile.url)
        if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
            self.tempOffline()

        name, size, status = parseFileInfo(self)[:3]

        if status == 1:
            self.offline()
        elif status != 2:
            self.logDebug(self.file_info)
            self.parseError('File info')

        if name:
            self.pyfile.name = name
        else:
            self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])

        if size:
            self.pyfile.size = size
        else:
            self.logError("File size not parsed")

        self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
        return self.file_info

开发者ID:Dmanugm，项目名称:pyload，代码行数:25，代码来源:SimpleHoster.py

示例8: handleCaptcha

 def handleCaptcha(self, inputs):
     captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
     self.logDebug(captcha_div)
     numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
     inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))])
     self.logDebug("CAPTCHA", inputs['code'], numerals)
     return 3

开发者ID:4Christopher，项目名称:pyload，代码行数:7，代码来源:RarefileNet.py

示例9: proceed

 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = html_unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)

开发者ID:earthGavinLee，项目名称:pyload，代码行数:8，代码来源:OneKhDe.py

示例10: get_file_name

    def get_file_name(self):
        try:
            name =  self.api["name"]
        except KeyError:
            file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
            name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]

        return html_unescape(name)

开发者ID:beefone，项目名称:pyload，代码行数:8，代码来源:MegauploadCom.py

示例11: load

    def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None):
        """
        Load content at url and returns it

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Loaded content
        """
        if hasattr(self, 'pyfile') and self.pyfile.abort:
            self.abort()

        url = fixurl(url)

        if not url or not isinstance(url, basestring):
            self.fail(_("No url given"))

        if self.pyload.debug:
            self.log_debug("LOAD URL " + url,
                           *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])

        if req is None:
            req = self.req or self.pyload.requestFactory.getRequest(self.__name__)

        #@TODO: Move to network in 0.4.10
        if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list):
            set_cookies(req.cj, self.COOKIES)

        res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True)  #@TODO: Fix network multipart in 0.4.10

        #@TODO: Move to network in 0.4.10
        if decode:
            res = html_unescape(res)

        #@TODO: Move to network in 0.4.10
        if isinstance(decode, basestring):
            res = decode(res, decode)

        if self.pyload.debug:
            frame = inspect.currentframe()
            framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno))
            try:
                if not exists(os.path.join("tmp", self.__name__)):
                    os.makedirs(os.path.join("tmp", self.__name__))

                with open(framefile, "wb") as f:
                    del frame  #: Delete the frame or it wont be cleaned
                    f.write(encode(res))

            except IOError, e:
                self.log_error(e)

开发者ID:earthGavinLee，项目名称:pyload，代码行数:55，代码来源:Plugin.py

示例12: getPackageNameAndFolder

    def getPackageNameAndFolder(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = folder = html_unescape(m.group('title').strip())
                self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
                return name, folder

        name = self.pyfile.package().name
        folder = self.pyfile.package().folder
        self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder))
        return name, folder

开发者ID:3DMeny，项目名称:pyload，代码行数:12，代码来源:SimpleCrypter.py

示例13: handle_free

    def handle_free(self, pyfile):
        fileid = re.search(self.FILEID_PATTERN, self.html).group(1)
        self.log_debug("FileID: " + fileid)

        token = re.search(self.TOKEN_PATTERN, self.html).group(1)
        self.log_debug("Token: " + token)

        self.html = self.load("http://lolabits.es/action/License/Download",
                              post={'fileId'                     : fileid,
                                    '__RequestVerificationToken' : token},
                              decode="unicode-escape")

        self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))

开发者ID:earthGavinLee，项目名称:pyload，代码行数:13，代码来源:LolabitsEs.py

示例14: _translateAPIFileInfo

def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
    
    # Translate
    fileInfo = {}
    try:
        fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
        fileInfo['name'] = html_unescape(apiFileDataMap['n'])
        fileInfo['size'] = int(apiFileDataMap['s'])
        fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]        
    except:
        pass

    return fileInfo

开发者ID:beefone，项目名称:pyload，代码行数:13，代码来源:MegauploadCom.py

示例15: getInfo

def getInfo(urls):
    for chunk in chunks(urls, 80):
        result = []

        api = getAPIData(chunk)

        for data in api.itervalues():
            if data[0] == "online":
                result.append((html_unescape(data[2]), data[1], 2, data[4]))

            elif data[0] == "offline":
                result.append((data[4], 0, 1, data[4]))

        yield result

开发者ID:masterwaster，项目名称:pyload，代码行数:14，代码来源:UploadedTo.py

注：本文中的module.utils.html_unescape函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。