本文整理汇总了Python中scrapy.http.Response.headers方法的典型用法代码示例。如果您正苦于以下问题:Python Response.headers方法的具体用法?Python Response.headers怎么用?Python Response.headers使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.Response
的用法示例。
在下文中一共展示了Response.headers方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_useragents_spider
# 需要导入模块: from scrapy.http import Response [as 别名]
# 或者: from scrapy.http.Response import headers [as 别名]
def test_useragents_spider(spider, scrape_request, html_headers,
mock_html_nolinks):
"""Ensure multiple requests with different user agent strings emitted"""
ua1 = factories.BatchUserAgentFactory.build(ua_string='Firefox / 11.0')
ua2 = factories.BatchUserAgentFactory.build(ua_string='Chrome / 20.0')
spider.batch_user_agents = [ua1, ua2]
# Generate a mock response
mock_response = Response('http://test:12345',
body=mock_html_nolinks)
mock_response.request = scrape_request
mock_response.headers = html_headers
mock_response.status = 200
mock_response.encoding = u'utf-8'
mock_response.flags = []
# Call the spider on the mock response
pipeline_generator = spider.parse(mock_response)
# Assert that we have two requests for this linkless page, one for each
# of the user agents we inserted
request_uas = []
for new_request in pipeline_generator:
if isinstance(new_request, Request):
request_uas.append(new_request.meta['user_agent'].ua_string)
else:
# We're not expecting anything other than Requests
assert False
assert set(request_uas) == set([u'Firefox / 11.0', u'Chrome / 20.0'])
示例2: test_spider_crawls_links
# 需要导入模块: from scrapy.http import Response [as 别名]
# 或者: from scrapy.http.Response import headers [as 别名]
def test_spider_crawls_links(spider, scrape_request, html_headers,
mock_html_twolinks):
"""Ensure spider always picks up relevant links to HTML pages"""
# Use only 1 user agent for easier counting
ua = factories.BatchUserAgentFactory.build(ua_string='Firefox / 11.0')
spider.batch_user_agents = [ua]
# Generate a mock response based on html containing two links
mock_response = Response('http://test:12345',
body=mock_html_twolinks)
mock_response.request = scrape_request
mock_response.headers = html_headers
mock_response.meta['user_agent'] = ua
mock_response.status = 200
mock_response.encoding = u'utf-8'
mock_response.flags = []
# Call spider on the mock response
pipeline_generator = spider.parse(mock_response)
# Assert that we got the expected set of new requests generated in the
# spider and nothing else
sites_expected = set([
mock_response.url + '/link1.html',
mock_response.url + '/link2.html',
])
sites_collected = []
for new_request in pipeline_generator:
if isinstance(new_request, Request):
sites_collected.append(new_request.url)
else:
pass
assert sites_expected == set(sites_collected)
示例3: test_css_item_emission
# 需要导入模块: from scrapy.http import Response [as 别名]
# 或者: from scrapy.http.Response import headers [as 别名]
def test_css_item_emission(spider, linked_css_request, css_headers, mock_css):
"""CSS items are emitted correctly"""
# Use only 1 user agent for easier counting
ua1 = factories.BatchUserAgentFactory(ua_string='Firefox / 11.0')
spider.user_agents = [ua1]
# Generate a mock response based on CSS
mock_url = 'http://test:12345/default.css'
mock_response = Response(mock_url,
body=mock_css)
mock_response.request = linked_css_request
mock_response.headers = css_headers
mock_response.status = 200
mock_response.encoding = u'ascii'
mock_response.flags = []
# Generate a fake urlscan to use in our item comparison
mock_urlscan = model.URLScan.objects.create(
site_scan=linked_css_request.meta['sitescan'],
page_url_hash=sha256("http://test:12345/").hexdigest(),
page_url=mock_response.url,
timestamp=spider.get_now_time())
# Send the mocks to the spider for processing
pipeline_generator = spider.parse(mock_response)
# Verify the item returned is what we expected
item_expected = MarkupItem()
item_expected['content_type'] = spider.get_content_type(css_headers)
item_expected['filename'] = os.path.basename(urlparse(mock_url).path)
item_expected['headers'] = unicode(css_headers)
item_expected['meta'] = mock_response.meta
item_expected['raw_content'] = mock_response.body
item_expected['sitescan'] = linked_css_request.meta['sitescan']
item_expected['urlscan'] = mock_urlscan
item_expected['url'] = mock_response.url
item_expected['user_agent'] = mock_response.meta['user_agent']
item_collected = None
for item in pipeline_generator:
if isinstance(item, MarkupItem):
item_collected = item
else:
assert False
assert item_expected == item_collected
示例4: test_js_item_emission
# 需要导入模块: from scrapy.http import Response [as 别名]
# 或者: from scrapy.http.Response import headers [as 别名]
def test_js_item_emission(spider, linked_js_request, js_headers, mock_js):
"""JS items are emitted correctly"""
# Generate a mock response based on JS
mock_url = 'http://test:12345/default.js'
mock_response = Response(mock_url,
body=mock_js)
mock_response.request = linked_js_request
mock_response.headers = js_headers
mock_response.status = 200
mock_response.encoding = u'ascii'
mock_response.flags = []
# Generate a fake urlscan to use in our item comparison
mock_urlscan = model.URLScan.objects.create(
site_scan=linked_js_request.meta['sitescan'],
page_url_hash=sha256("http://test:12345/").hexdigest(),
page_url=mock_response.url,
timestamp=spider.get_now_time())
# Send the mocks to the spider for processing
pipeline_generator = spider.parse(mock_response)
# Verify the item returned is what we expected
item_expected = MarkupItem()
item_expected['content_type'] = spider.get_content_type(js_headers)
item_expected['filename'] = os.path.basename(urlparse(mock_url).path)
item_expected['headers'] = unicode(js_headers)
item_expected['meta'] = mock_response.meta
item_expected['raw_content'] = mock_response.body
item_expected['sitescan'] = linked_js_request.meta['sitescan']
item_expected['urlscan'] = mock_urlscan
item_expected['url'] = mock_response.url
item_expected['user_agent'] = mock_response.meta['user_agent']
item_expected['redirected_from'] = ''
assert list(pipeline_generator) == [item_expected]