本文整理汇总了Python中pip.download.PipSession.get方法的典型用法代码示例。如果您正苦于以下问题:Python PipSession.get方法的具体用法?Python PipSession.get怎么用?Python PipSession.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pip.download.PipSession
的用法示例。
在下文中一共展示了PipSession.get方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_page
# 需要导入模块: from pip.download import PipSession [as 别名]
# 或者: from pip.download.PipSession import get [as 别名]
def get_page(cls, link, req, cache=None, skip_archives=True, session=None):
if session is None:
session = PipSession()
url = link.url
url = url.split('#', 1)[0]
if cache.too_many_failures(url):
return None
# Check for VCS schemes that do not support lookup as web pages.
from pip.vcs import VcsSupport
for scheme in VcsSupport.schemes:
if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
logger.debug(
'Cannot look at %(scheme)s URL %(link)s' % locals()
)
return None
if cache is not None:
inst = cache.get_page(url)
if inst is not None:
return inst
try:
if skip_archives:
if cache is not None:
if cache.is_archive(url):
return None
filename = link.filename
for bad_ext in ['.tar', '.tar.gz', '.tar.bz2', '.tgz', '.zip']:
if filename.endswith(bad_ext):
content_type = cls._get_content_type(
url, session=session,
)
if content_type.lower().startswith('text/html'):
break
else:
logger.debug(
'Skipping page %s because of Content-Type: '
'%s' % (link, content_type)
)
if cache is not None:
cache.set_is_archive(url)
return None
logger.debug('Getting page %s' % url)
# Tack index.html onto file:// URLs that point to directories
(scheme, netloc, path, params, query, fragment) = \
urlparse.urlparse(url)
if scheme == 'file' and os.path.isdir(url2pathname(path)):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith('/'):
url += '/'
url = urlparse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s' % url)
resp = session.get(url, headers={"Accept": "text/html"})
resp.raise_for_status()
# The check for archives above only works if the url ends with
# something that looks like an archive. However that is not a
# requirement of an url. Unless we issue a HEAD request on every
# url we cannot know ahead of time for sure if something is HTML
# or not. However we can check after we've downloaded it.
content_type = resp.headers.get('Content-Type', 'unknown')
if not content_type.lower().startswith("text/html"):
logger.debug(
'Skipping page %s because of Content-Type: %s' %
(link, content_type)
)
if cache is not None:
cache.set_is_archive(url)
return None
inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted)
except requests.HTTPError as exc:
level = 2 if exc.response.status_code == 404 else 1
cls._handle_fail(req, link, exc, url, cache=cache, level=level)
except requests.ConnectionError as exc:
cls._handle_fail(
req, link, "connection error: %s" % exc, url,
cache=cache,
)
except requests.Timeout:
cls._handle_fail(req, link, "timed out", url, cache=cache)
except SSLError as exc:
reason = ("There was a problem confirming the ssl certificate: "
"%s" % exc)
cls._handle_fail(
req, link, reason, url,
cache=cache,
level=2,
meth=logger.notify,
)
else:
if cache is not None:
cache.add_page([url, resp.url], inst)
return inst
示例2: get_page
# 需要导入模块: from pip.download import PipSession [as 别名]
# 或者: from pip.download.PipSession import get [as 别名]
def get_page(cls, link, req, cache=None, skip_archives=True, session=None):
if session is None:
session = PipSession()
url = link.url
url = url.split("#", 1)[0]
if cache.too_many_failures(url):
return None
# Check for VCS schemes that do not support lookup as web pages.
from pip.vcs import VcsSupport
for scheme in VcsSupport.schemes:
if url.lower().startswith(scheme) and url[len(scheme)] in "+:":
logger.debug("Cannot look at %(scheme)s URL %(link)s" % locals())
return None
if cache is not None:
inst = cache.get_page(url)
if inst is not None:
return inst
try:
if skip_archives:
if cache is not None:
if cache.is_archive(url):
return None
filename = link.filename
for bad_ext in [".tar", ".tar.gz", ".tar.bz2", ".tgz", ".zip"]:
if filename.endswith(bad_ext):
content_type = cls._get_content_type(url, session=session)
if content_type.lower().startswith("text/html"):
break
else:
logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type))
if cache is not None:
cache.set_is_archive(url)
return None
logger.debug("Getting page %s" % url)
# Tack index.html onto file:// URLs that point to directories
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
if scheme == "file" and os.path.isdir(url2pathname(path)):
# add trailing slash if not present so urljoin doesn't trim final segment
if not url.endswith("/"):
url += "/"
url = urlparse.urljoin(url, "index.html")
logger.debug(" file: URL is directory, getting %s" % url)
resp = session.get(url)
resp.raise_for_status()
# The check for archives above only works if the url ends with
# something that looks like an archive. However that is not a
# requirement. For instance http://sourceforge.net/projects/docutils/files/docutils/0.8.1/docutils-0.8.1.tar.gz/download
# redirects to http://superb-dca3.dl.sourceforge.net/project/docutils/docutils/0.8.1/docutils-0.8.1.tar.gz
# Unless we issue a HEAD request on every url we cannot know
# ahead of time for sure if something is HTML or not. However we
# can check after we've downloaded it.
content_type = resp.headers.get("Content-Type", "unknown")
if not content_type.lower().startswith("text/html"):
logger.debug("Skipping page %s because of Content-Type: %s" % (link, content_type))
if cache is not None:
cache.set_is_archive(url)
return None
inst = cls(resp.text, resp.url, resp.headers, trusted=link.trusted)
except requests.HTTPError as exc:
level = 2 if exc.response.status_code == 404 else 1
cls._handle_fail(req, link, exc, url, cache=cache, level=level)
except requests.Timeout:
cls._handle_fail(req, link, "timed out", url, cache=cache)
except SSLError as exc:
reason = "There was a problem confirming the ssl certificate: " "%s" % exc
cls._handle_fail(req, link, reason, url, cache=cache, level=2, meth=logger.notify)
else:
if cache is not None:
cache.add_page([url, resp.url], inst)
return inst