当前位置: 首页>>代码示例>>Python>>正文


Python compat.urljoin函数代码示例

本文整理汇总了Python中setuptools.compat.urljoin函数的典型用法代码示例。如果您正苦于以下问题:Python urljoin函数的具体用法?Python urljoin怎么用?Python urljoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了urljoin函数的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_url

    def process_url(self, url, retrieve=False):
        """Evaluate a URL as a possible download, and maybe retrieve it"""
        if os.getenv("CONDA_BUILD"):
            raise RuntimeError("Setuptools downloading is disabled in conda build. "
                               "Be sure to add all dependencies in the meta.yaml  url=%sr" % url)

        if url in self.scanned_urls and not retrieve:
            return
        self.scanned_urls[url] = True
        if not URL_SCHEME(url):
            self.process_filename(url)
            return
        else:
            dists = list(distros_for_url(url))
            if dists:
                if not self.url_ok(url):
                    return
                self.debug("Found link: %s", url)

        if dists or not retrieve or url in self.fetched_urls:
            list(map(self.add, dists))
            return  # don't need the actual page

        if not self.url_ok(url):
            self.fetched_urls[url] = True
            return

        self.info("Reading %s", url)
        self.fetched_urls[url] = True   # prevent multiple fetch attempts
        f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
        if f is None: return
        self.fetched_urls[f.url] = True
        if 'html' not in f.headers.get('content-type', '').lower():
            f.close()   # not html, we can't process it
            return

        base = f.url     # handle redirects
        page = f.read()
        if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
            if isinstance(f, HTTPError):
                # Errors have no charset, assume latin1:
                charset = 'latin-1'
            else:
                charset = f.headers.get_param('charset') or 'latin-1'
            page = page.decode(charset, "ignore")
        f.close()
        for match in HREF.finditer(page):
            link = urljoin(base, htmldecode(match.group(1)))
            self.process_url(link)
        if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
            page = self.process_index(url, page)
开发者ID:zyh199552,项目名称:sysu-spider,代码行数:51,代码来源:package_index.py

示例2: process_index

    def process_index(self, url, page):
        """Process the contents of a PyPI page"""

        def scan(link):
            # Process a URL to see if it's for a package page
            if link.startswith(self.index_url):
                parts = list(map(
                    unquote, link[len(self.index_url):].split('/')
                ))
                if len(parts) == 2 and '#' not in parts[1]:
                    # it's a package page, sanitize and index it
                    pkg = safe_name(parts[0])
                    ver = safe_version(parts[1])
                    self.package_pages.setdefault(pkg.lower(), {})[link] = True
                    return to_filename(pkg), to_filename(ver)
            return None, None

        # process an index page into the package-page index
        for match in HREF.finditer(page):
            try:
                scan(urljoin(url, htmldecode(match.group(1))))
            except ValueError:
                pass

        pkg, ver = scan(url)  # ensure this page is in the page index
        if pkg:
            # process individual package page
            for new_url in find_external_links(url, page):
                # Process the found URL
                base, frag = egg_info_for_url(new_url)
                if base.endswith('.py') and not frag:
                    if ver:
                        new_url += '#egg=%s-%s' % (pkg, ver)
                    else:
                        self.need_version_info(url)
                self.scan_url(new_url)

            return PYPI_MD5.sub(
                lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
            )
        else:
            return ""  # no sense double-scanning non-package pages
开发者ID:carlosaherrera,项目名称:colorkeep_repository,代码行数:42,代码来源:package_index.py

示例3: find_external_links

    return wrapper

REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
# this line is here to fix emacs' cruddy broken syntax highlighting

@unique_values
def find_external_links(url, page):
    """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""

    for match in REL.finditer(page):
        tag, rel = match.groups()
        rels = set(map(str.strip, rel.lower().split(',')))
        if 'homepage' in rels or 'download' in rels:
            for match in HREF.finditer(tag):
<<<<<<< HEAD
                yield urljoin(url, htmldecode(match.group(1)))
=======
                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
>>>>>>> 54eef0be98b1b67c8507db91f4cfa90b64991027

    for tag in ("<th>Home Page", "<th>Download URL"):
        pos = page.find(tag)
        if pos!=-1:
            match = HREF.search(page,pos)
            if match:
<<<<<<< HEAD
                yield urljoin(url, htmldecode(match.group(1)))
=======
                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
>>>>>>> 54eef0be98b1b67c8507db91f4cfa90b64991027
开发者ID:Yankur,项目名称:Web-application,代码行数:30,代码来源:package_index.py


注:本文中的setuptools.compat.urljoin函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。