本文整理汇总了Python中scrapy.http.Response方法的典型用法代码示例。如果您正苦于以下问题:Python http.Response方法的具体用法?Python http.Response怎么用?Python http.Response使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http
的用法示例。
在下文中一共展示了http.Response方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: retry_middleware_response
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def retry_middleware_response(request):
"""
Fixture to simplify creating a crawler
with an activated middleware and going through
the request-response cycle.
Executes process_response() method of the middleware.
"""
settings, status = request.param
crawler = get_crawler(Spider, settings_dict=settings)
spider = crawler._create_spider('foo')
mw = RetryUserAgentMiddleware.from_crawler(crawler)
req = Request('http://www.scrapytest.org/')
rsp = Response(req.url, body=b'', status=status)
yield mw.process_response(req, rsp, spider)
示例2: _vnu_callback
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def _vnu_callback(self, url: str) -> Callable[[Response], None]:
def callback(response: Response) -> None:
vnu_out = json.loads(response.text)
for message in vnu_out['messages']:
if not VNU_IGNORE.fullmatch(message['message']):
self.logger.error(
'"%s":%d.%d-%d.%d: %s: %s',
url,
message.get('firstLine', message['lastLine']),
message.get('firstColumn', message['lastColumn']),
message['lastLine'],
message['lastColumn'],
message['type'],
message['message'],
)
return callback
示例3: parse
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def parse(self, response: Response) -> Iterator[Request]:
self.log(response)
if getattr(self, 'validate_html', False):
yield Request(
'http://127.0.0.1:9988/?out=json',
method='POST',
headers={'Content-Type': response.headers['Content-Type']},
body=response.body,
callback=self._vnu_callback(response.url),
errback=self.error_callback,
)
for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
tags=self.tags, attrs=self.attrs, deny=self.deny,
canonicalize=False).extract_links(response):
yield from self._make_requests(link.url)
示例4: process_request
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def process_request(self, request: Request, spider: Spider):
"""This method checks if the request is really needed and if its
download could be skipped by trying to infer if a ``Response``
is going to be used by the callback or a Page Input.
If the ``Response`` can be ignored, a ``utils.DummyResponse`` object is
returned on its place. This ``DummyResponse`` is linked to the original
``Request`` instance.
With this behavior, we're able to optimize spider executions avoiding
unnecessary downloads. That could be the case when the callback is
actually using another source like external APIs such as Scrapinghub's
Auto Extract.
"""
if utils.is_response_going_to_be_used(request, spider):
return
spider.logger.debug(f'Skipping download of {request}')
return utils.DummyResponse(url=request.url, request=request)
示例5: default
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def default(self, o):
if isinstance(o, set):
return list(o)
elif isinstance(o, datetime.datetime):
return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT))
elif isinstance(o, datetime.date):
return o.strftime(self.DATE_FORMAT)
elif isinstance(o, datetime.time):
return o.strftime(self.TIME_FORMAT)
elif isinstance(o, decimal.Decimal):
return str(o)
elif isinstance(o, defer.Deferred):
return str(o)
elif isinstance(o, BaseItem):
return dict(o)
elif isinstance(o, Request):
return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
elif isinstance(o, Response):
return "<%s %s %s>" % (type(o).__name__, o.status, o.url)
else:
return super(ScrapyJSONEncoder, self).default(o)
示例6: xmliter
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def xmliter(obj, nodename):
"""Return a iterator of Selector's over all nodes of a XML document,
given the name of the node to iterate. Useful for parsing XML feeds.
obj can be:
- a Response object
- a unicode string
- a string encoded as utf-8
"""
nodename_patt = re.escape(nodename)
HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename_patt, re.S)
HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename_patt, re.S)
text = _body_or_str(obj)
header_start = re.search(HEADER_START_RE, text)
header_start = header_start.group(1).strip() if header_start else ''
header_end = re_rsearch(HEADER_END_RE, text)
header_end = text[header_end[1]:].strip() if header_end else ''
r = re.compile(r'<%(np)s[\s>].*?</%(np)s>' % {'np': nodename_patt}, re.DOTALL)
for match in r.finditer(text):
nodetext = header_start + match.group() + header_end
yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0]
示例7: _body_or_str
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def _body_or_str(obj, unicode=True):
expected_types = (Response, six.text_type, six.binary_type)
assert isinstance(obj, expected_types), \
"obj must be %s, not %s" % (
" or ".join(t.__name__ for t in expected_types),
type(obj).__name__)
if isinstance(obj, Response):
if not unicode:
return obj.body
elif isinstance(obj, TextResponse):
return obj.text
else:
return obj.body.decode('utf-8')
elif isinstance(obj, six.text_type):
return obj if unicode else obj.encode('utf-8')
else:
return obj.decode('utf-8') if unicode else obj
示例8: process_response
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def process_response(self, request, response, spider):
if request.method == 'HEAD':
return response
if isinstance(response, Response):
content_encoding = response.headers.getlist('Content-Encoding')
if content_encoding:
encoding = content_encoding.pop()
decoded_body = self._decode(response.body, encoding.lower())
respcls = responsetypes.from_args(headers=response.headers, \
url=response.url, body=decoded_body)
kwargs = dict(cls=respcls, body=decoded_body)
if issubclass(respcls, TextResponse):
# force recalculating the encoding until we make sure the
# responsetypes guessing is reliable
kwargs['encoding'] = None
response = response.replace(**kwargs)
if not content_encoding:
del response.headers['Content-Encoding']
return response
示例9: _download
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def _download(self, request, spider):
slot = self.slot
slot.add_request(request)
def _on_success(response):
assert isinstance(response, (Response, Request))
if isinstance(response, Response):
response.request = request # tie request to response received
logkws = self.logformatter.crawled(request, response, spider)
logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
self.signals.send_catch_log(signal=signals.response_received, \
response=response, request=request, spider=spider)
return response
def _on_complete(_):
slot.nextcall.schedule()
return _
dwld = self.downloader.fetch(request, spider)
dwld.addCallbacks(_on_success)
dwld.addBoth(_on_complete)
return dwld
示例10: policy
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def policy(self, resp_or_url, request):
"""
Determine Referrer-Policy to use from a parent Response (or URL),
and a Request to be sent.
- if a valid policy is set in Request meta, it is used.
- if the policy is set in meta but is wrong (e.g. a typo error),
the policy from settings is used
- if the policy is not set in Request meta,
but there is a Referrer-policy header in the parent response,
it is used if valid
- otherwise, the policy from settings is used.
"""
policy_name = request.meta.get('referrer_policy')
if policy_name is None:
if isinstance(resp_or_url, Response):
policy_header = resp_or_url.headers.get('Referrer-Policy')
if policy_header is not None:
policy_name = to_native_str(policy_header.decode('latin1'))
if policy_name is None:
return self.default_policy()
cls = _load_policy_class(policy_name, warning_only=True)
return cls() if cls else self.default_policy()
示例11: process_response
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def process_response(self, request, response, spider):
meta = request.meta
# parse CDX requests and schedule future snapshot requests
if meta.get('wayback_machine_cdx_request'):
snapshot_requests = self.build_snapshot_requests(response, meta)
# treat empty listings as 404s
if len(snapshot_requests) < 1:
return Response(meta['wayback_machine_original_request'].url, status=404)
# schedule all of the snapshots
for snapshot_request in snapshot_requests:
self.crawler.engine.schedule(snapshot_request, spider)
# abort this request
raise UnhandledIgnoreRequest
# clean up snapshot responses
if meta.get('wayback_machine_url'):
return response.replace(url=meta['wayback_machine_original_request'].url)
return response
示例12: _assert_enabled
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def _assert_enabled(spider,
settings=None,
url='http://quotes.toscrape.com',
api_url='autoextract.scrapinghub.com',
api_auth=basic_auth_header('apikey', '')):
mw = _mock_mw(spider, settings)
req = Request(url, meta=AUTOX_META)
out = mw.process_request(req, spider)
assert api_url in out.url
assert out.meta['autoextract'].get('enabled')
assert out.headers.get('Authorization') == api_auth
assert 'User-Agent' in out.headers
resp = Response(out.url, request=out, body=b'[{}]')
proc = mw.process_response(out, resp, spider)
assert proc.meta['autoextract'].get('original_url') == url
assert isinstance(proc.meta['autoextract'].get('article'), dict)
示例13: check_existing
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def check_existing(self, response: Response) -> None:
self.log(response)
示例14: check_fragment
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def check_fragment(self, response: Response) -> None:
self.log(response)
xpath_template = "//*[@id='{fragment}' or @name='{fragment}']"
m = re.match(r".+\#(?P<fragment>.*)$", response.request.url) # Get fragment value.
if not m:
return
fragment = m.group('fragment')
# Check fragment existing on response page.
if not response.selector.xpath(xpath_template.format(fragment=fragment)):
self.logger.error(
"Fragment #%s is not found on page %s", fragment, response.request.url)
示例15: _make_requests
# 需要导入模块: from scrapy import http [as 别名]
# 或者: from scrapy.http import Response [as 别名]
def _make_requests(self, url: str) -> Iterator[Request]:
callback: Callable[[Response], Optional[Iterator[Request]]] = self.parse
dont_filter = False
method = 'GET'
if self._is_external_url(url):
callback = self.check_existing
method = 'HEAD'
elif '#' in url:
dont_filter = True
callback = self.check_fragment
if getattr(self, 'skip_external', False) and self._is_external_link(url):
return
yield Request(url, method=method, callback=callback, dont_filter=dont_filter,
errback=self.error_callback)