本文整理汇总了Python中scrapy.spidermiddlewares.httperror.HttpError方法的典型用法代码示例。如果您正苦于以下问题:Python httperror.HttpError方法的具体用法?Python httperror.HttpError怎么用?Python httperror.HttpError使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.spidermiddlewares.httperror
的用法示例。
在下文中一共展示了httperror.HttpError方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: errback_httpbin
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errback_httpbin(self, failure):
# log all failures
self.logger.error(repr(failure))
# in case you want to do something special for some errors,
# you may need the failure's type:
if failure.check(HttpError):
# these exceptions come from HttpError spider middleware
# you can get the non-200 response
response = failure.value.response
self.logger.error('HttpError on %s', response.url)
elif failure.check(DNSLookupError):
# this is the original request
request = failure.request
self.logger.error('DNSLookupError on %s', request.url)
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
self.logger.error('TimeoutError on %s', request.url)
示例2: error_handler
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def error_handler(self, failure):
self.n_live_spider -= 1
if failure.check(HttpError):
response = failure.value.response
self.logger.error('[Live|{}] HttpError on {}[{}]'.format(
self.n_live_spider, response.url, response.status))
request = failure.value.response.request.meta['db_request']
request.last_status = response.status
if response.status == 599:
request.is_pending = False
request.save()
else:
self.logger.error(
'[Live|{}] Error: {}'.format(self.n_live_spider, failure))
示例3: errback_httpbin
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errback_httpbin(self, failure):
# log all failures
self.logger.error(repr(failure))
# in case you want to do something special for some errors,
# you may need the failure's type:
if failure.check(HttpError):
print("HttpError出错了")
# these exceptions come from HttpError spider middleware
# you can get the non-200 response
response = failure.value.response
self.logger.error('HttpError on %s', response.url)
elif failure.check(DNSLookupError):
# this is the original request
request = failure.request
self.logger.error('DNSLookupError on %s', request.url)
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
self.logger.error('TimeoutError on %s', request.url)
示例4: check_ip_failed
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def check_ip_failed(self, failure):
self.logger.error(repr(failure))
item = failure.request.meta['_item_obj']
key = build_key(item)
self.srv.add_failure(key)
if failure.check(HttpError):
response = failure.value.response
self.logger.error('HttpError on %s', response.url)
elif failure.check(DNSLookupError):
request = failure.request
self.logger.error('DNSLookupError on %s', request.url)
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
self.logger.error('TimeoutError on %s', request.url)
示例5: error_callback
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def error_callback(self, failure: Failure) -> Optional[Union[Failure, Iterator[Request]]]:
if failure.check(HttpError):
response = failure.value.response
if self.exclude_error(response.url):
return None
if response.status == 405 and response.request.method == 'HEAD':
# Method 'HEAD' not allowed, repeat request with 'GET'
return self.retry_request_with_get(response.request)
self.logger.error("Please check link: %s", response)
return failure
示例6: process_spider_exception
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def process_spider_exception(self, response, exception, spider):
if isinstance(exception, HttpError):
if response.status == 456:
# response.meta['fool_blocked'] = True
# return None
raise CloseSpider('catch forbidden,close for a while')
# downloader middleware
示例7: error_handler
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def error_handler(self, failure):
if failure.check(HttpError):
response = failure.value.response
self.logger.error('HttpError on {}[{}]'.format(response.url, response.status))
else:
self.logger.error(
'Error: {}'.format(failure))
示例8: process_spider_exception
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def process_spider_exception(self, response, exception, spider):
if isinstance(exception, HttpError):
if response.status >= 500:
# Transient errors usually caused by overloaded sites, updates, short
# downtimes, etc.
lgr = logger.info
else:
lgr = logger.warning
lgr(
"Ignoring response %(response)r: HTTP status code is not "
"handled or not allowed",
{"response": response},
extra={"spider": spider},
)
return []
示例9: errback_request
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errback_request(self, failure):
"""Back call when exceptions happens of the scrapy internal
This function is called after download middlewares and spider
middlewares.
NOTE: if it is a HTTP error, the response must pass through all
download middlewares.
"""
request = failure.request
item = UrlItem()
item['id'] = request.meta['url_id']
item['raw'] = request.meta['raw']
if failure.check(HttpError):
# these exceptions come from HttpError spider middleware
# you can get the non-200 response
response = failure.value.response
item['status_code'] = response.status
item['expanded'] = response.url
logger.error('HTTP ERROR % r when fetching % r',
item['status_code'], item['raw'])
else:
item['expanded'] = item['raw']
item['status_code'] = U_HTML_ERROR_NONHTTP
logger.error('NON-HTTP error when fetching url %r: %s', item['raw'],
repr(failure))
yield item
示例10: errorback
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errorback(self, failure):
loader = ItemLoader(item=Collector())
if failure.check(HttpError):
response = failure.value.response
loader.add_value("dl_slot", response.request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", response.status)
loader.add_value("ID", response.request.meta["ID"])
yield loader.load_item()
elif failure.check(DNSLookupError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "DNS")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "Timeout")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
else:
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "other")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
##################################################################
# MAIN PARSE
##################################################################
示例11: errorback
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errorback(self, failure):
loader = ItemLoader(item=LinkCollector())
if failure.check(HttpError):
response = failure.value.response
loader.add_value("dl_slot", response.request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", response.status)
loader.add_value("ID", response.request.meta["ID"])
loader.add_value("links", "")
loader.add_value("alias", "")
yield loader.load_item()
elif failure.check(DNSLookupError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "DNS")
loader.add_value("ID", request.meta["ID"])
loader.add_value("links", "")
loader.add_value("alias", "")
yield loader.load_item()
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "Timeout")
loader.add_value("ID", request.meta["ID"])
loader.add_value("links", "")
loader.add_value("alias", "")
yield loader.load_item()
else:
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("error", "other")
loader.add_value("ID", request.meta["ID"])
loader.add_value("links", "")
loader.add_value("alias", "")
yield loader.load_item()
##################################################################
# MAIN PARSE
##################################################################
示例12: errorback
# 需要导入模块: from scrapy.spidermiddlewares import httperror [as 别名]
# 或者: from scrapy.spidermiddlewares.httperror import HttpError [as 别名]
def errorback(self, failure):
loader = ItemLoader(item=Collector())
if failure.check(HttpError):
response = failure.value.response
loader.add_value("dl_slot", response.request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("title", "")
loader.add_value("description", "")
loader.add_value("keywords", "")
loader.add_value("error", response.status)
loader.add_value("ID", response.request.meta["ID"])
yield loader.load_item()
elif failure.check(DNSLookupError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("title", "")
loader.add_value("description", "")
loader.add_value("keywords", "")
loader.add_value("error", "DNS")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
elif failure.check(TimeoutError, TCPTimedOutError):
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("title", "")
loader.add_value("description", "")
loader.add_value("keywords", "")
loader.add_value("error", "Timeout")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
else:
request = failure.request
loader.add_value("dl_slot", request.meta.get('download_slot'))
loader.add_value("start_page", "")
loader.add_value("scraped_urls", "")
loader.add_value("redirect", [None])
loader.add_value("scraped_text", "")
loader.add_value("title", "")
loader.add_value("description", "")
loader.add_value("keywords", "")
loader.add_value("error", "other")
loader.add_value("ID", request.meta["ID"])
yield loader.load_item()
##################################################################
# MAIN PARSE
##################################################################