本文整理汇总了Python中qutebrowser.utils.urlutils.encoded_url函数的典型用法代码示例。如果您正苦于以下问题:Python encoded_url函数的具体用法?Python encoded_url怎么用?Python encoded_url使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了encoded_url函数的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _fetch_url
def _fetch_url(self, url):
"""Download the given url and add the file to the collection.
Args:
url: The file to download as QUrl.
"""
if url.scheme() not in {"http", "https"}:
return
# Prevent loading an asset twice
if url in self.loaded_urls:
return
self.loaded_urls.add(url)
log.downloads.debug("loading asset at {}".format(url))
# Using the download manager to download host-blocked urls might crash
# qute, see the comments/discussion on
# https://github.com/The-Compiler/qutebrowser/pull/962#discussion_r40256987
# and https://github.com/The-Compiler/qutebrowser/issues/1053
host_blocker = objreg.get("host-blocker")
if host_blocker.is_blocked(url):
log.downloads.debug("Skipping {}, host-blocked".format(url))
# We still need an empty file in the output, QWebView can be pretty
# picky about displaying a file correctly when not all assets are
# at least referenced in the mhtml file.
self.writer.add_file(urlutils.encoded_url(url), b"")
return
download_manager = objreg.get("download-manager", scope="window", window=self._win_id)
item = download_manager.get(url, fileobj=_NoCloseBytesIO(), auto_remove=True)
self.pending_downloads.add((url, item))
item.finished.connect(functools.partial(self._finished, url, item))
item.error.connect(functools.partial(self._error, url, item))
item.cancelled.connect(functools.partial(self._error, url, item))
示例2: _finished
def _finished(self, url, item):
"""Callback when a single asset is downloaded.
Args:
url: The original url of the asset as QUrl.
item: The DownloadItem given by the DownloadManager
"""
self.pending_downloads.remove((url, item))
mime = item.raw_headers.get(b'Content-Type', b'')
# Note that this decoding always works and doesn't produce errors
# RFC 7230 (https://tools.ietf.org/html/rfc7230) states:
# Historically, HTTP has allowed field content with text in the
# ISO-8859-1 charset [ISO-8859-1], supporting other charsets only
# through use of [RFC2047] encoding. In practice, most HTTP header
# field values use only a subset of the US-ASCII charset [USASCII].
# Newly defined header fields SHOULD limit their field values to
# US-ASCII octets. A recipient SHOULD treat other octets in field
# content (obs-text) as opaque data.
mime = mime.decode('iso-8859-1')
if mime.lower() == 'text/css' or url.fileName().endswith('.css'):
# We can't always assume that CSS files are UTF-8, but CSS files
# shouldn't contain many non-ASCII characters anyway (in most
# cases). Using "ignore" lets us decode the file even if it's
# invalid UTF-8 data.
# The file written to the MHTML file won't be modified by this
# decoding, since there we're taking the original bytestream.
try:
css_string = item.fileobj.getvalue().decode('utf-8')
except UnicodeDecodeError:
log.downloads.warning("Invalid UTF-8 data in {}".format(url))
css_string = item.fileobj.getvalue().decode('utf-8', 'ignore')
import_urls = _get_css_imports(css_string)
for import_url in import_urls:
absolute_url = url.resolved(QUrl(import_url))
self._fetch_url(absolute_url)
encode = E_QUOPRI if mime.startswith('text/') else E_BASE64
# Our MHTML handler refuses non-ASCII headers. This will replace every
# non-ASCII char with '?'. This is probably okay, as official Content-
# Type headers contain ASCII only anyway. Anything else is madness.
mime = utils.force_encoding(mime, 'ascii')
self.writer.add_file(urlutils.encoded_url(url),
item.fileobj.getvalue(), mime, encode)
item.fileobj.actual_close()
if self.pending_downloads:
return
self._finish_file()
示例3: _error
def _error(self, url, item, *_args):
"""Callback when a download error occurred.
Args:
url: The original url of the asset as QUrl.
item: The DownloadItem given by the DownloadManager.
"""
try:
self.pending_downloads.remove((url, item))
except KeyError:
# This might happen if .collect_zombies() calls .finished() and the
# error handler will be called after .collect_zombies
log.downloads.debug("Oops! Download already gone: {}".format(item))
return
item.fileobj.actual_close()
# Add a stub file, see comment in .fetch_url() for more information
self.writer.add_file(urlutils.encoded_url(url), b'')
if self.pending_downloads:
return
self._finish_file()
示例4: _fetch_url
def _fetch_url(self, url):
"""Download the given url and add the file to the collection.
Args:
url: The file to download as QUrl.
"""
if url.scheme() not in ['http', 'https']:
return
# Prevent loading an asset twice
if url in self.loaded_urls:
return
self.loaded_urls.add(url)
log.downloads.debug("loading asset at {}".format(url))
# Using the download manager to download host-blocked urls might crash
# qute, see the comments/discussion on
# https://github.com/qutebrowser/qutebrowser/pull/962#discussion_r40256987
# and https://github.com/qutebrowser/qutebrowser/issues/1053
request = interceptors.Request(first_party_url=None, request_url=url)
interceptors.run(request)
if request.is_blocked:
log.downloads.debug("Skipping {}, host-blocked".format(url))
# We still need an empty file in the output, QWebView can be pretty
# picky about displaying a file correctly when not all assets are
# at least referenced in the mhtml file.
self.writer.add_file(urlutils.encoded_url(url), b'')
return
download_manager = objreg.get('qtnetwork-download-manager')
target = downloads.FileObjDownloadTarget(_NoCloseBytesIO())
item = download_manager.get(url, target=target,
auto_remove=True)
self.pending_downloads.add((url, item))
item.finished.connect(functools.partial(self._finished, url, item))
item.error.connect(functools.partial(self._error, url, item))
item.cancelled.connect(functools.partial(self._cancelled, url, item))
示例5: test_encoded_url
def test_encoded_url(url, expected):
"""Test encoded_url"""
url = QUrl(url)
assert urlutils.encoded_url(url) == expected
示例6: run
def run(self):
"""Download and save the page.
The object must not be reused, you should create a new one if
you want to download another page.
"""
if self._used:
raise ValueError("Downloader already used")
self._used = True
web_url = self.web_view.url()
web_frame = self.web_view.page().mainFrame()
self.writer = MHTMLWriter(
web_frame.toHtml().encode('utf-8'),
content_location=urlutils.encoded_url(web_url),
# I've found no way of getting the content type of a QWebView, but
# since we're using .toHtml, it's probably safe to say that the
# content-type is HTML
content_type='text/html; charset="UTF-8"',
)
# Currently only downloading <link> (stylesheets), <script>
# (javascript) and <img> (image) elements.
elements = web_frame.findAllElements('link, script, img')
for element in elements:
element = webelem.WebElementWrapper(element)
# Websites are free to set whatever rel=... attribute they want.
# We just care about stylesheets and icons.
if not _check_rel(element):
continue
if 'src' in element:
element_url = element['src']
elif 'href' in element:
element_url = element['href']
else:
# Might be a local <script> tag or something else
continue
absolute_url = web_url.resolved(QUrl(element_url))
self._fetch_url(absolute_url)
styles = web_frame.findAllElements('style')
for style in styles:
style = webelem.WebElementWrapper(style)
# The Mozilla Developer Network says:
# type: This attribute defines the styling language as a MIME type
# (charset should not be specified). This attribute is optional and
# default to text/css if it's missing.
# https://developer.mozilla.org/en/docs/Web/HTML/Element/style
if 'type' in style and style['type'] != 'text/css':
continue
for element_url in _get_css_imports(str(style)):
self._fetch_url(web_url.resolved(QUrl(element_url)))
# Search for references in inline styles
for element in web_frame.findAllElements('[style]'):
element = webelem.WebElementWrapper(element)
style = element['style']
for element_url in _get_css_imports(style, inline=True):
self._fetch_url(web_url.resolved(QUrl(element_url)))
# Shortcut if no assets need to be downloaded, otherwise the file would
# never be saved. Also might happen if the downloads are fast enough to
# complete before connecting their finished signal.
self._collect_zombies()
if not self.pending_downloads and not self._finished_file:
self._finish_file()