本文整理汇总了Python中scrapy.utils.python.to_bytes方法的典型用法代码示例。如果您正苦于以下问题:Python python.to_bytes方法的具体用法?Python python.to_bytes怎么用?Python python.to_bytes使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.utils.python
的用法示例。
在下文中一共展示了python.to_bytes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_request
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def process_request(self, request, spider):
# Called for each request that goes through the downloader
# middleware.
# Must either:
# - return None: continue processing this request
# - or return a Response object
# - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called
if spider.browser:
request.meta['browser'] = self.browser # to access driver from response
self.browser.get(request.url)
# wait js eval
time.sleep(15)
body = to_bytes(self.browser.page_source) # body must be of type bytes
return HtmlResponse(self.browser.current_url, body=body, encoding='utf-8', request=request)
else:
return None
示例2: open_in_browser
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def open_in_browser(response, _openfunc=webbrowser.open):
"""Open the given response in a local web browser, populating the <base>
tag for external links to work
"""
from scrapy.http import HtmlResponse, TextResponse
# XXX: this implementation is a bit dirty and could be improved
body = response.body
if isinstance(response, HtmlResponse):
if b'<base' not in body:
repl = '<head><base href="%s">' % response.url
body = body.replace(b'<head>', to_bytes(repl))
ext = '.html'
elif isinstance(response, TextResponse):
ext = '.txt'
else:
raise TypeError("Unsupported response type: %s" %
response.__class__.__name__)
fd, fname = tempfile.mkstemp(ext)
os.write(fd, body)
os.close(fd)
return _openfunc("file://%s" % fname)
示例3: store_response
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def store_response(self, spider, request, response):
"""Store the given response in the cache."""
rpath = self._get_request_path(spider, request)
if not os.path.exists(rpath):
os.makedirs(rpath)
metadata = {
'url': request.url,
'method': request.method,
'status': response.status,
'response_url': response.url,
'timestamp': time(),
}
with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
f.write(to_bytes(repr(metadata)))
with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
pickle.dump(metadata, f, protocol=2)
with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
f.write(headers_dict_to_raw(response.headers))
with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
f.write(response.body)
with self._open(os.path.join(rpath, 'request_headers'), 'wb') as f:
f.write(headers_dict_to_raw(request.headers))
with self._open(os.path.join(rpath, 'request_body'), 'wb') as f:
f.write(request.body)
示例4: tunnel_request_data
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def tunnel_request_data(host, port, proxy_auth_header=None):
r"""
Return binary content of a CONNECT request.
>>> from scrapy.utils.python import to_native_str as s
>>> s(tunnel_request_data("example.com", 8080))
'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\n\r\n'
>>> s(tunnel_request_data("example.com", 8080, b"123"))
'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\nProxy-Authorization: 123\r\n\r\n'
>>> s(tunnel_request_data(b"example.com", "8090"))
'CONNECT example.com:8090 HTTP/1.1\r\nHost: example.com:8090\r\n\r\n'
"""
host_value = to_bytes(host, encoding='ascii') + b':' + to_bytes(str(port))
tunnel_req = b'CONNECT ' + host_value + b' HTTP/1.1\r\n'
tunnel_req += b'Host: ' + host_value + b'\r\n'
if proxy_auth_header:
tunnel_req += b'Proxy-Authorization: ' + proxy_auth_header + b'\r\n'
tunnel_req += b'\r\n'
return tunnel_req
示例5: _get_agent
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def _get_agent(self, request, timeout):
bindaddress = request.meta.get('bindaddress') or self._bindAddress
proxy = request.meta.get('proxy')
if proxy:
_, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
scheme = _parse(request.url)[0]
proxyHost = to_unicode(proxyHost)
omitConnectTunnel = b'noconnect' in proxyParams
if scheme == b'https' and not omitConnectTunnel:
proxyConf = (proxyHost, proxyPort,
request.headers.get(b'Proxy-Authorization', None))
return self._TunnelingAgent(reactor, proxyConf,
contextFactory=self._contextFactory, connectTimeout=timeout,
bindAddress=bindaddress, pool=self._pool)
else:
return self._ProxyAgent(reactor, proxyURI=to_bytes(proxy, encoding='ascii'),
connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
return self._Agent(reactor, contextFactory=self._contextFactory,
connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
示例6: binary_check
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def binary_check(fx_obj, cb_obj, encoding):
if isinstance(cb_obj, (dict, Item)):
fx_obj = {
key: binary_check(value, cb_obj[key], encoding)
for key, value in fx_obj.items()
}
if isinstance(cb_obj, list):
fx_obj = [
binary_check(fxitem, cbitem, encoding)
for fxitem, cbitem in zip(fx_obj, cb_obj)
]
if isinstance(cb_obj, Request):
headers = {}
for key, value in fx_obj['headers'].items():
key = to_bytes(key, encoding)
headers[key] = [to_bytes(v, encoding) for v in value]
fx_obj['headers'] = headers
fx_obj['body'] = to_bytes(fx_obj['body'], encoding)
if isinstance(cb_obj, six.binary_type):
fx_obj = fx_obj.encode(encoding)
return fx_obj
示例7: file_path
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def file_path(self, request, response=None, info=None):
import hashlib
from scrapy.utils.python import to_bytes
import datetime
folder = request.meta['folder']
image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
#YEAR = 2018
#filename = 'realty-sc/%s/%s/%s/%s.jpg' % (YEAR, image_guid[:2], image_guid[2:4], image_guid)
filename = datetime.datetime.now().strftime('images/%Y.%m.%d-%H.%M/{}/{}.jpg'.format(folder, image_guid))
return filename
# --- it runs without project and saves in `output.csv` ---
示例8: _request_key
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def _request_key(self, request):
return to_bytes(request_fingerprint(request))
示例9: render_GET
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def render_GET(self, request):
request.setHeader(b'content-type', to_bytes(self.content_type))
for name, value in self.extra_headers.items():
request.setHeader(to_bytes(name), to_bytes(value))
request.setResponseCode(self.status_code)
return to_bytes(self.html)
示例10: file_path
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def file_path(self, request, response=None, info=None):
# start of deprecation warning block (can be removed in the future)
def _warn():
from scrapy.exceptions import ScrapyDeprecationWarning
import warnings
warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
'please use file_path(request, response=None, info=None) instead',
category=ScrapyDeprecationWarning, stacklevel=1)
# check if called from image_key or file_key with url as first argument
if not isinstance(request, Request):
_warn()
url = request
else:
url = request.url
# detect if file_key() or image_key() methods have been overridden
if not hasattr(self.file_key, '_base'):
_warn()
return self.file_key(url)
elif not hasattr(self.image_key, '_base'):
_warn()
return self.image_key(url)
# end of deprecation warning block
image_guid = hashlib.sha1(to_bytes(url)).hexdigest()
return '%s%s/%s%s/%s.jpg' % (image_guid[9], image_guid[19], image_guid[29], image_guid[39], image_guid)
示例11: request_httprepr
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def request_httprepr(request):
"""Return the raw HTTP representation (as bytes) of the given request.
This is provided only for reference since it's not the actual stream of
bytes that will be send when performing the request (that's controlled
by Twisted).
"""
parsed = urlparse_cached(request)
path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
s += b"Host: " + to_bytes(parsed.hostname or b'') + b"\r\n"
if request.headers:
s += request.headers.to_string() + b"\r\n"
s += b"\r\n"
s += request.body
return s
示例12: response_httprepr
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def response_httprepr(response):
"""Return raw HTTP representation (as bytes) of the given response. This
is provided only for reference, since it's not the exact stream of bytes
that was received (that's not exposed by Twisted).
"""
s = b"HTTP/1.1 " + to_bytes(str(response.status)) + b" " + \
to_bytes(http.RESPONSES.get(response.status, b'')) + b"\r\n"
if response.headers:
s += response.headers.to_string() + b"\r\n"
s += b"\r\n"
s += response.body
return s
示例13: _response_from_text
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def _response_from_text(text, st):
rt = XmlResponse if st == 'xml' else HtmlResponse
return rt(url='about:blank', encoding='utf-8',
body=to_bytes(text, 'utf-8'))
示例14: _urlencode
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def _urlencode(seq, enc):
values = [(to_bytes(k, enc), to_bytes(v, enc))
for k, vs in seq
for v in (vs if is_listlike(vs) else [vs])]
return urlencode(values, doseq=1)
示例15: __init__
# 需要导入模块: from scrapy.utils import python [as 别名]
# 或者: from scrapy.utils.python import to_bytes [as 别名]
def __init__(self, url, text='', fragment='', nofollow=False):
if not isinstance(url, str):
if six.PY2:
warnings.warn("Link urls must be str objects. "
"Assuming utf-8 encoding (which could be wrong)")
url = to_bytes(url, encoding='utf8')
else:
got = url.__class__.__name__
raise TypeError("Link urls must be str objects, got %s" % got)
self.url = url
self.text = text
self.fragment = fragment
self.nofollow = nofollow