本文整理汇总了Python中scrapy.http.Headers.setdefault方法的典型用法代码示例。如果您正苦于以下问题:Python Headers.setdefault方法的具体用法?Python Headers.setdefault怎么用?Python Headers.setdefault使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.Headers
的用法示例。
在下文中一共展示了Headers.setdefault方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_none_value
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import setdefault [as 别名]
def test_none_value(self):
h1 = Headers()
h1['foo'] = 'bar'
h1['foo'] = None
h1.setdefault('foo', 'bar')
self.assertEqual(h1.get('foo'), None)
self.assertEqual(h1.getlist('foo'), [])
示例2: ScrapyHTTPClientFactory
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import setdefault [as 别名]
class ScrapyHTTPClientFactory(HTTPClientFactory):
"""Scrapy implementation of the HTTPClientFactory overwriting the
serUrl method to make use of our Url object that cache the parse
result.
"""
protocol = ScrapyHTTPPageGetter
waiting = 1
noisy = False
followRedirect = False
afterFoundGet = False
def __init__(self, request, timeout=180):
self.url = urldefrag(request.url)[0]
self.method = request.method
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
# Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
# to have _disconnectedDeferred. See Twisted r32329.
# As Scrapy implements it's own logic to handle redirects is not
# needed to add the callback _waitForDisconnect.
# Specifically this avoids the AttributeError exception when
# clientConnectionFailed method is called.
self._disconnectedDeferred = defer.Deferred()
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
def _build_response(self, body, request):
request.meta['download_latency'] = self.headers_time-self.start_time
status = int(self.status)
headers = Headers(self.response_headers)
respcls = responsetypes.from_args(headers=headers, url=self.url)
return respcls(url=self.url, status=status, headers=headers, body=body)
def _set_connection_attributes(self, request):
parsed = urlparse_cached(request)
self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
self.bind_address = request.meta.get("bind_address")
proxy = request.meta.get('proxy')
if proxy:
self.scheme, _, self.host, self.port, _ = _parse(proxy)
self.path = self.url
def gotHeaders(self, headers):
self.headers_time = time()
self.response_headers = headers
示例3: ScrapyHTTPClientFactory
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import setdefault [as 别名]
class ScrapyHTTPClientFactory(HTTPClientFactory):
"""Scrapy implementation of the HTTPClientFactory overwriting the
serUrl method to make use of our Url object that cache the parse
result.
"""
protocol = ScrapyHTTPPageGetter
waiting = 1
noisy = False
followRedirect = False
afterFoundGet = False
def __init__(self, request, timeout=180):
self.url = urldefrag(request.url)[0]
self.method = request.method
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
def _build_response(self, body, request):
request.meta['download_latency'] = self.headers_time-self.start_time
status = int(self.status)
headers = Headers(self.response_headers)
respcls = responsetypes.from_args(headers=headers, url=self.url)
return respcls(url=self.url, status=status, headers=headers, body=body)
def _set_connection_attributes(self, request):
parsed = urlparse_cached(request)
self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
self.use_tunnel = False
proxy = request.meta.get('proxy')
if proxy:
old_scheme, old_host, old_port = self.scheme, self.host, self.port
self.scheme, _, self.host, self.port, _ = _parse(proxy)
self.path = self.url
if old_scheme=="https":
self.headers['Proxy-Connection'] = 'keep-alive'
self.use_tunnel = True
self.tunnel_to_host = old_host
self.tunnel_to_port = old_port
def gotHeaders(self, headers):
self.headers_time = time()
self.response_headers = headers
示例4: test_int_value
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import setdefault [as 别名]
def test_int_value(self):
h1 = Headers({'hey': 5})
h1['foo'] = 1
h1.setdefault('bar', 2)
h1.setlist('buz', [1, 'dos', 3])
self.assertEqual(h1.getlist('foo'), [b'1'])
self.assertEqual(h1.getlist('bar'), [b'2'])
self.assertEqual(h1.getlist('buz'), [b'1', b'dos', b'3'])
self.assertEqual(h1.getlist('hey'), [b'5'])
示例5: test_setdefault
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import setdefault [as 别名]
def test_setdefault(self):
h = Headers()
hlist = ['ip1', 'ip2']
olist = h.setdefault('X-Forwarded-For', hlist)
assert h.getlist('X-Forwarded-For') is not hlist
assert h.getlist('X-Forwarded-For') is olist
h = Headers()
olist = h.setdefault('X-Forwarded-For', 'ip1')
self.assertEqual(h.getlist('X-Forwarded-For'), ['ip1'])
assert h.getlist('X-Forwarded-For') is olist