本文整理匯總了Python中pip.backwardcompat.urlparse.urljoin方法的典型用法代碼示例。如果您正苦於以下問題:Python urlparse.urljoin方法的具體用法?Python urlparse.urljoin怎麽用?Python urlparse.urljoin使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pip.backwardcompat.urlparse
的用法示例。
在下文中一共展示了urlparse.urljoin方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: links
# 需要導入模塊: from pip.backwardcompat import urlparse [as 別名]
# 或者: from pip.backwardcompat.urlparse import urljoin [as 別名]
def links(self):
"""Yields all links in the page"""
for anchor in self.parsed.findall(".//a"):
if anchor.get("href"):
href = anchor.get("href")
url = self.clean_link(urlparse.urljoin(self.base_url, href))
# Determine if this link is internal. If that distinction
# doesn't make sense in this context, then we don't make
# any distinction.
internal = None
if self.api_version and self.api_version >= 2:
# Only api_versions >= 2 have a distinction between
# external and internal links
internal = bool(anchor.get("rel")
and "internal" in anchor.get("rel").split())
yield Link(url, self, internal=internal)
示例2: explicit_rel_links
# 需要導入模塊: from pip.backwardcompat import urlparse [as 別名]
# 或者: from pip.backwardcompat.urlparse import urljoin [as 別名]
def explicit_rel_links(self, rels=('homepage', 'download')):
"""Yields all links with the given relations"""
rels = set(rels)
for anchor in self.parsed.findall(".//a"):
if anchor.get("rel") and anchor.get("href"):
found_rels = set(anchor.get("rel").split())
# Determine the intersection between what rels were found and
# what rels were being looked for
if found_rels & rels:
href = anchor.get("href")
url = self.clean_link(urlparse.urljoin(self.base_url, href))
yield Link(url, self, trusted=False)
示例3: scraped_rel_links
# 需要導入模塊: from pip.backwardcompat import urlparse [as 別名]
# 或者: from pip.backwardcompat.urlparse import urljoin [as 別名]
def scraped_rel_links(self):
# Can we get rid of this horrible horrible method?
for regex in (self._homepage_re, self._download_re):
match = regex.search(self.content)
if not match:
continue
href_match = self._href_re.search(self.content, pos=match.end())
if not href_match:
continue
url = href_match.group(1) or href_match.group(2) or href_match.group(3)
if not url:
continue
url = self.clean_link(urlparse.urljoin(self.base_url, url))
yield Link(url, self, trusted=False, _deprecated_regex=True)