當前位置: 首頁>>代碼示例>>Python>>正文


Python http.Response方法代碼示例

本文整理匯總了Python中scrapy.http.Response方法的典型用法代碼示例。如果您正苦於以下問題:Python http.Response方法的具體用法?Python http.Response怎麽用?Python http.Response使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.http的用法示例。


在下文中一共展示了http.Response方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: retry_middleware_response

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def retry_middleware_response(request):
    """
    Fixture to simplify creating a crawler
    with an activated middleware and going through
    the request-response cycle.

    Executes process_response() method of the middleware.
    """
    settings, status = request.param

    crawler = get_crawler(Spider, settings_dict=settings)
    spider = crawler._create_spider('foo')
    mw = RetryUserAgentMiddleware.from_crawler(crawler)

    req = Request('http://www.scrapytest.org/')
    rsp = Response(req.url, body=b'', status=status)

    yield mw.process_response(req, rsp, spider) 
開發者ID:alecxe,項目名稱:scrapy-fake-useragent,代碼行數:20,代碼來源:test_retry_middleware.py

示例2: _vnu_callback

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def _vnu_callback(self, url: str) -> Callable[[Response], None]:
        def callback(response: Response) -> None:
            vnu_out = json.loads(response.text)
            for message in vnu_out['messages']:
                if not VNU_IGNORE.fullmatch(message['message']):
                    self.logger.error(
                        '"%s":%d.%d-%d.%d: %s: %s',
                        url,
                        message.get('firstLine', message['lastLine']),
                        message.get('firstColumn', message['lastColumn']),
                        message['lastLine'],
                        message['lastColumn'],
                        message['type'],
                        message['message'],
                    )

        return callback 
開發者ID:zulip,項目名稱:zulip,代碼行數:19,代碼來源:spiders.py

示例3: parse

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def parse(self, response: Response) -> Iterator[Request]:
        self.log(response)

        if getattr(self, 'validate_html', False):
            yield Request(
                'http://127.0.0.1:9988/?out=json',
                method='POST',
                headers={'Content-Type': response.headers['Content-Type']},
                body=response.body,
                callback=self._vnu_callback(response.url),
                errback=self.error_callback,
            )

        for link in LxmlLinkExtractor(deny_domains=self.deny_domains, deny_extensions=['doc'],
                                      tags=self.tags, attrs=self.attrs, deny=self.deny,
                                      canonicalize=False).extract_links(response):
            yield from self._make_requests(link.url) 
開發者ID:zulip,項目名稱:zulip,代碼行數:19,代碼來源:spiders.py

示例4: process_request

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def process_request(self, request: Request, spider: Spider):
        """This method checks if the request is really needed and if its
        download could be skipped by trying to infer if a ``Response``
        is going to be used by the callback or a Page Input.

        If the ``Response`` can be ignored, a ``utils.DummyResponse`` object is
        returned on its place. This ``DummyResponse`` is linked to the original
        ``Request`` instance.

        With this behavior, we're able to optimize spider executions avoiding
        unnecessary downloads. That could be the case when the callback is
        actually using another source like external APIs such as Scrapinghub's
        Auto Extract.
        """
        if utils.is_response_going_to_be_used(request, spider):
            return

        spider.logger.debug(f'Skipping download of {request}')
        return utils.DummyResponse(url=request.url, request=request) 
開發者ID:scrapinghub,項目名稱:scrapy-poet,代碼行數:21,代碼來源:middleware.py

示例5: default

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def default(self, o):
        if isinstance(o, set):
            return list(o)
        elif isinstance(o, datetime.datetime):
            return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT))
        elif isinstance(o, datetime.date):
            return o.strftime(self.DATE_FORMAT)
        elif isinstance(o, datetime.time):
            return o.strftime(self.TIME_FORMAT)
        elif isinstance(o, decimal.Decimal):
            return str(o)
        elif isinstance(o, defer.Deferred):
            return str(o)
        elif isinstance(o, BaseItem):
            return dict(o)
        elif isinstance(o, Request):
            return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
        elif isinstance(o, Response):
            return "<%s %s %s>" % (type(o).__name__, o.status, o.url)
        else:
            return super(ScrapyJSONEncoder, self).default(o) 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:23,代碼來源:serialize.py

示例6: xmliter

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def xmliter(obj, nodename):
    """Return a iterator of Selector's over all nodes of a XML document,
       given the name of the node to iterate. Useful for parsing XML feeds.

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8
    """
    nodename_patt = re.escape(nodename)

    HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename_patt, re.S)
    HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename_patt, re.S)
    text = _body_or_str(obj)

    header_start = re.search(HEADER_START_RE, text)
    header_start = header_start.group(1).strip() if header_start else ''
    header_end = re_rsearch(HEADER_END_RE, text)
    header_end = text[header_end[1]:].strip() if header_end else ''

    r = re.compile(r'<%(np)s[\s>].*?</%(np)s>' % {'np': nodename_patt}, re.DOTALL)
    for match in r.finditer(text):
        nodetext = header_start + match.group() + header_end
        yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0] 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:26,代碼來源:iterators.py

示例7: _body_or_str

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def _body_or_str(obj, unicode=True):
    expected_types = (Response, six.text_type, six.binary_type)
    assert isinstance(obj, expected_types), \
        "obj must be %s, not %s" % (
            " or ".join(t.__name__ for t in expected_types),
            type(obj).__name__)
    if isinstance(obj, Response):
        if not unicode:
            return obj.body
        elif isinstance(obj, TextResponse):
            return obj.text
        else:
            return obj.body.decode('utf-8')
    elif isinstance(obj, six.text_type):
        return obj if unicode else obj.encode('utf-8')
    else:
        return obj.decode('utf-8') if unicode else obj 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:19,代碼來源:iterators.py

示例8: process_response

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def process_response(self, request, response, spider):

        if request.method == 'HEAD':
            return response
        if isinstance(response, Response):
            content_encoding = response.headers.getlist('Content-Encoding')
            if content_encoding:
                encoding = content_encoding.pop()
                decoded_body = self._decode(response.body, encoding.lower())
                respcls = responsetypes.from_args(headers=response.headers, \
                    url=response.url, body=decoded_body)
                kwargs = dict(cls=respcls, body=decoded_body)
                if issubclass(respcls, TextResponse):
                    # force recalculating the encoding until we make sure the
                    # responsetypes guessing is reliable
                    kwargs['encoding'] = None
                response = response.replace(**kwargs)
                if not content_encoding:
                    del response.headers['Content-Encoding']

        return response 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:23,代碼來源:httpcompression.py

示例9: _download

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def _download(self, request, spider):
        slot = self.slot
        slot.add_request(request)
        def _on_success(response):
            assert isinstance(response, (Response, Request))
            if isinstance(response, Response):
                response.request = request # tie request to response received
                logkws = self.logformatter.crawled(request, response, spider)
                logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
                self.signals.send_catch_log(signal=signals.response_received, \
                    response=response, request=request, spider=spider)
            return response

        def _on_complete(_):
            slot.nextcall.schedule()
            return _

        dwld = self.downloader.fetch(request, spider)
        dwld.addCallbacks(_on_success)
        dwld.addBoth(_on_complete)
        return dwld 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:23,代碼來源:engine.py

示例10: policy

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def policy(self, resp_or_url, request):
        """
        Determine Referrer-Policy to use from a parent Response (or URL),
        and a Request to be sent.

        - if a valid policy is set in Request meta, it is used.
        - if the policy is set in meta but is wrong (e.g. a typo error),
          the policy from settings is used
        - if the policy is not set in Request meta,
          but there is a Referrer-policy header in the parent response,
          it is used if valid
        - otherwise, the policy from settings is used.
        """
        policy_name = request.meta.get('referrer_policy')
        if policy_name is None:
            if isinstance(resp_or_url, Response):
                policy_header = resp_or_url.headers.get('Referrer-Policy')
                if policy_header is not None:
                    policy_name = to_native_str(policy_header.decode('latin1'))
        if policy_name is None:
            return self.default_policy()

        cls = _load_policy_class(policy_name, warning_only=True)
        return cls() if cls else self.default_policy() 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:26,代碼來源:referer.py

示例11: process_response

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def process_response(self, request, response, spider):
        meta = request.meta

        # parse CDX requests and schedule future snapshot requests
        if meta.get('wayback_machine_cdx_request'):
            snapshot_requests = self.build_snapshot_requests(response, meta)

            # treat empty listings as 404s
            if len(snapshot_requests) < 1:
                return Response(meta['wayback_machine_original_request'].url, status=404)

            # schedule all of the snapshots
            for snapshot_request in snapshot_requests:
                self.crawler.engine.schedule(snapshot_request, spider)

            # abort this request
            raise UnhandledIgnoreRequest

        # clean up snapshot responses
        if meta.get('wayback_machine_url'):
            return response.replace(url=meta['wayback_machine_original_request'].url)

        return response 
開發者ID:sangaline,項目名稱:scrapy-wayback-machine,代碼行數:25,代碼來源:__init__.py

示例12: _assert_enabled

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def _assert_enabled(spider,
                    settings=None,
                    url='http://quotes.toscrape.com',
                    api_url='autoextract.scrapinghub.com',
                    api_auth=basic_auth_header('apikey', '')):
    mw = _mock_mw(spider, settings)

    req = Request(url, meta=AUTOX_META)
    out = mw.process_request(req, spider)
    assert api_url in out.url
    assert out.meta['autoextract'].get('enabled')
    assert out.headers.get('Authorization') == api_auth
    assert 'User-Agent' in out.headers

    resp = Response(out.url, request=out, body=b'[{}]')
    proc = mw.process_response(out, resp, spider)
    assert proc.meta['autoextract'].get('original_url') == url
    assert isinstance(proc.meta['autoextract'].get('article'), dict) 
開發者ID:scrapinghub,項目名稱:scrapy-autoextract,代碼行數:20,代碼來源:test_autoextract.py

示例13: check_existing

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def check_existing(self, response: Response) -> None:
        self.log(response) 
開發者ID:zulip,項目名稱:zulip,代碼行數:4,代碼來源:spiders.py

示例14: check_fragment

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def check_fragment(self, response: Response) -> None:
        self.log(response)
        xpath_template = "//*[@id='{fragment}' or @name='{fragment}']"
        m = re.match(r".+\#(?P<fragment>.*)$", response.request.url)  # Get fragment value.
        if not m:
            return
        fragment = m.group('fragment')
        # Check fragment existing on response page.
        if not response.selector.xpath(xpath_template.format(fragment=fragment)):
            self.logger.error(
                "Fragment #%s is not found on page %s", fragment, response.request.url) 
開發者ID:zulip,項目名稱:zulip,代碼行數:13,代碼來源:spiders.py

示例15: _make_requests

# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import Response [as 別名]
def _make_requests(self, url: str) -> Iterator[Request]:
        callback: Callable[[Response], Optional[Iterator[Request]]] = self.parse
        dont_filter = False
        method = 'GET'
        if self._is_external_url(url):
            callback = self.check_existing
            method = 'HEAD'
        elif '#' in url:
            dont_filter = True
            callback = self.check_fragment
        if getattr(self, 'skip_external', False) and self._is_external_link(url):
            return
        yield Request(url, method=method, callback=callback, dont_filter=dont_filter,
                      errback=self.error_callback) 
開發者ID:zulip,項目名稱:zulip,代碼行數:16,代碼來源:spiders.py


注:本文中的scrapy.http.Response方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。