本文整理匯總了Python中scrapy.utils.python.to_bytes方法的典型用法代碼示例。如果您正苦於以下問題:Python python.to_bytes方法的具體用法?Python python.to_bytes怎麽用?Python python.to_bytes使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scrapy.utils.python
的用法示例。
在下文中一共展示了python.to_bytes方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process_request
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def process_request(self, request, spider):
# Called for each request that goes through the downloader
# middleware.
# Must either:
# - return None: continue processing this request
# - or return a Response object
# - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called
if spider.browser:
request.meta['browser'] = self.browser # to access driver from response
self.browser.get(request.url)
# wait js eval
time.sleep(15)
body = to_bytes(self.browser.page_source) # body must be of type bytes
return HtmlResponse(self.browser.current_url, body=body, encoding='utf-8', request=request)
else:
return None
示例2: open_in_browser
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def open_in_browser(response, _openfunc=webbrowser.open):
"""Open the given response in a local web browser, populating the <base>
tag for external links to work
"""
from scrapy.http import HtmlResponse, TextResponse
# XXX: this implementation is a bit dirty and could be improved
body = response.body
if isinstance(response, HtmlResponse):
if b'<base' not in body:
repl = '<head><base href="%s">' % response.url
body = body.replace(b'<head>', to_bytes(repl))
ext = '.html'
elif isinstance(response, TextResponse):
ext = '.txt'
else:
raise TypeError("Unsupported response type: %s" %
response.__class__.__name__)
fd, fname = tempfile.mkstemp(ext)
os.write(fd, body)
os.close(fd)
return _openfunc("file://%s" % fname)
示例3: store_response
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def store_response(self, spider, request, response):
"""Store the given response in the cache."""
rpath = self._get_request_path(spider, request)
if not os.path.exists(rpath):
os.makedirs(rpath)
metadata = {
'url': request.url,
'method': request.method,
'status': response.status,
'response_url': response.url,
'timestamp': time(),
}
with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
f.write(to_bytes(repr(metadata)))
with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
pickle.dump(metadata, f, protocol=2)
with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
f.write(headers_dict_to_raw(response.headers))
with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
f.write(response.body)
with self._open(os.path.join(rpath, 'request_headers'), 'wb') as f:
f.write(headers_dict_to_raw(request.headers))
with self._open(os.path.join(rpath, 'request_body'), 'wb') as f:
f.write(request.body)
示例4: tunnel_request_data
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def tunnel_request_data(host, port, proxy_auth_header=None):
r"""
Return binary content of a CONNECT request.
>>> from scrapy.utils.python import to_native_str as s
>>> s(tunnel_request_data("example.com", 8080))
'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\n\r\n'
>>> s(tunnel_request_data("example.com", 8080, b"123"))
'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\nProxy-Authorization: 123\r\n\r\n'
>>> s(tunnel_request_data(b"example.com", "8090"))
'CONNECT example.com:8090 HTTP/1.1\r\nHost: example.com:8090\r\n\r\n'
"""
host_value = to_bytes(host, encoding='ascii') + b':' + to_bytes(str(port))
tunnel_req = b'CONNECT ' + host_value + b' HTTP/1.1\r\n'
tunnel_req += b'Host: ' + host_value + b'\r\n'
if proxy_auth_header:
tunnel_req += b'Proxy-Authorization: ' + proxy_auth_header + b'\r\n'
tunnel_req += b'\r\n'
return tunnel_req
示例5: _get_agent
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def _get_agent(self, request, timeout):
bindaddress = request.meta.get('bindaddress') or self._bindAddress
proxy = request.meta.get('proxy')
if proxy:
_, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
scheme = _parse(request.url)[0]
proxyHost = to_unicode(proxyHost)
omitConnectTunnel = b'noconnect' in proxyParams
if scheme == b'https' and not omitConnectTunnel:
proxyConf = (proxyHost, proxyPort,
request.headers.get(b'Proxy-Authorization', None))
return self._TunnelingAgent(reactor, proxyConf,
contextFactory=self._contextFactory, connectTimeout=timeout,
bindAddress=bindaddress, pool=self._pool)
else:
return self._ProxyAgent(reactor, proxyURI=to_bytes(proxy, encoding='ascii'),
connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
return self._Agent(reactor, contextFactory=self._contextFactory,
connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
示例6: binary_check
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def binary_check(fx_obj, cb_obj, encoding):
if isinstance(cb_obj, (dict, Item)):
fx_obj = {
key: binary_check(value, cb_obj[key], encoding)
for key, value in fx_obj.items()
}
if isinstance(cb_obj, list):
fx_obj = [
binary_check(fxitem, cbitem, encoding)
for fxitem, cbitem in zip(fx_obj, cb_obj)
]
if isinstance(cb_obj, Request):
headers = {}
for key, value in fx_obj['headers'].items():
key = to_bytes(key, encoding)
headers[key] = [to_bytes(v, encoding) for v in value]
fx_obj['headers'] = headers
fx_obj['body'] = to_bytes(fx_obj['body'], encoding)
if isinstance(cb_obj, six.binary_type):
fx_obj = fx_obj.encode(encoding)
return fx_obj
示例7: file_path
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def file_path(self, request, response=None, info=None):
import hashlib
from scrapy.utils.python import to_bytes
import datetime
folder = request.meta['folder']
image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
#YEAR = 2018
#filename = 'realty-sc/%s/%s/%s/%s.jpg' % (YEAR, image_guid[:2], image_guid[2:4], image_guid)
filename = datetime.datetime.now().strftime('images/%Y.%m.%d-%H.%M/{}/{}.jpg'.format(folder, image_guid))
return filename
# --- it runs without project and saves in `output.csv` ---
示例8: _request_key
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def _request_key(self, request):
return to_bytes(request_fingerprint(request))
示例9: render_GET
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def render_GET(self, request):
request.setHeader(b'content-type', to_bytes(self.content_type))
for name, value in self.extra_headers.items():
request.setHeader(to_bytes(name), to_bytes(value))
request.setResponseCode(self.status_code)
return to_bytes(self.html)
示例10: file_path
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def file_path(self, request, response=None, info=None):
# start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
'please use file_path(request, response=None, info=None) instead',
category=ScrapyDeprecationWarning, stacklevel=1)
# check if called from image_key or file_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
# detect if file_key() or image_key() methods have been overridden
if not hasattr(self.file_key, '_base'):
_warn()
return self.file_key(url)
elif not hasattr(self.image_key, '_base'):
_warn()
return self.image_key(url)
# end of deprecation warning block
image_guid = hashlib.sha1(to_bytes(url)).hexdigest()
return '%s%s/%s%s/%s.jpg' % (image_guid[9], image_guid[19], image_guid[29], image_guid[39], image_guid)
示例11: request_httprepr
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def request_httprepr(request):
"""Return the raw HTTP representation (as bytes) of the given request.
This is provided only for reference since it's not the actual stream of
bytes that will be send when performing the request (that's controlled
by Twisted).
"""
parsed = urlparse_cached(request)
path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
s += b"Host: " + to_bytes(parsed.hostname or b'') + b"\r\n"
if request.headers:
s += request.headers.to_string() + b"\r\n"
s += b"\r\n"
s += request.body
return s
示例12: response_httprepr
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def response_httprepr(response):
"""Return raw HTTP representation (as bytes) of the given response. This
is provided only for reference, since it's not the exact stream of bytes
that was received (that's not exposed by Twisted).
"""
s = b"HTTP/1.1 " + to_bytes(str(response.status)) + b" " + \
to_bytes(http.RESPONSES.get(response.status, b'')) + b"\r\n"
if response.headers:
s += response.headers.to_string() + b"\r\n"
s += b"\r\n"
s += response.body
return s
示例13: _response_from_text
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def _response_from_text(text, st):
rt = XmlResponse if st == 'xml' else HtmlResponse
return rt(url='about:blank', encoding='utf-8',
body=to_bytes(text, 'utf-8'))
示例14: _urlencode
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def _urlencode(seq, enc):
values = [(to_bytes(k, enc), to_bytes(v, enc))
for k, vs in seq
for v in (vs if is_listlike(vs) else [vs])]
return urlencode(values, doseq=1)
示例15: __init__
# 需要導入模塊: from scrapy.utils import python [as 別名]
# 或者: from scrapy.utils.python import to_bytes [as 別名]
def __init__(self, url, text='', fragment='', nofollow=False):
if not isinstance(url, str):
if six.PY2:
warnings.warn("Link urls must be str objects. "
"Assuming utf-8 encoding (which could be wrong)")
url = to_bytes(url, encoding='utf8')
else:
got = url.__class__.__name__
raise TypeError("Link urls must be str objects, got %s" % got)
self.url = url
self.text = text
self.fragment = fragment
self.nofollow = nofollow