本文整理汇总了Python中scrapy.http.Headers类的典型用法代码示例。如果您正苦于以下问题:Python Headers类的具体用法?Python Headers怎么用?Python Headers使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Headers类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_none_value
def test_none_value(self):
h1 = Headers()
h1['foo'] = 'bar'
h1['foo'] = None
h1.setdefault('foo', 'bar')
self.assertEqual(h1.get('foo'), None)
self.assertEqual(h1.getlist('foo'), [])
示例2: test_multivalue
def test_multivalue(self):
h = Headers()
h['X-Forwarded-For'] = hlist = ['ip1', 'ip2']
self.assertEqual(h['X-Forwarded-For'], b'ip2')
self.assertEqual(h.get('X-Forwarded-For'), b'ip2')
self.assertEqual(h.getlist('X-Forwarded-For'), [b'ip1', b'ip2'])
assert h.getlist('X-Forwarded-For') is not hlist
示例3: test_copy
def test_copy(self):
h1 = Headers({'header1': ['value1', 'value2']})
h2 = copy.copy(h1)
self.assertEqual(h1, h2)
self.assertEqual(h1.getlist('header1'), h2.getlist('header1'))
assert h1.getlist('header1') is not h2.getlist('header1')
assert isinstance(h2, Headers)
示例4: ScrapyHTTPClientFactory
class ScrapyHTTPClientFactory(HTTPClientFactory):
"""Scrapy implementation of the HTTPClientFactory overwriting the
serUrl method to make use of our Url object that cache the parse
result.
"""
protocol = ScrapyHTTPPageGetter
waiting = 1
noisy = False
followRedirect = False
afterFoundGet = False
def __init__(self, request, timeout=180):
self.url = urldefrag(request.url)[0]
self.method = request.method
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
# Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
# to have _disconnectedDeferred. See Twisted r32329.
# As Scrapy implements it's own logic to handle redirects is not
# needed to add the callback _waitForDisconnect.
# Specifically this avoids the AttributeError exception when
# clientConnectionFailed method is called.
self._disconnectedDeferred = defer.Deferred()
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
def _build_response(self, body, request):
request.meta['download_latency'] = self.headers_time-self.start_time
status = int(self.status)
headers = Headers(self.response_headers)
respcls = responsetypes.from_args(headers=headers, url=self.url)
return respcls(url=self.url, status=status, headers=headers, body=body)
def _set_connection_attributes(self, request):
parsed = urlparse_cached(request)
self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
self.bind_address = request.meta.get("bind_address")
proxy = request.meta.get('proxy')
if proxy:
self.scheme, _, self.host, self.port, _ = _parse(proxy)
self.path = self.url
def gotHeaders(self, headers):
self.headers_time = time()
self.response_headers = headers
示例5: ScrapyHTTPClientFactory
class ScrapyHTTPClientFactory(HTTPClientFactory):
"""Scrapy implementation of the HTTPClientFactory overwriting the
serUrl method to make use of our Url object that cache the parse
result.
"""
protocol = ScrapyHTTPPageGetter
waiting = 1
noisy = False
followRedirect = False
afterFoundGet = False
def __init__(self, request, timeout=180):
self.url = urldefrag(request.url)[0]
self.method = request.method
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
def _build_response(self, body, request):
request.meta['download_latency'] = self.headers_time-self.start_time
status = int(self.status)
headers = Headers(self.response_headers)
respcls = responsetypes.from_args(headers=headers, url=self.url)
return respcls(url=self.url, status=status, headers=headers, body=body)
def _set_connection_attributes(self, request):
parsed = urlparse_cached(request)
self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
self.use_tunnel = False
proxy = request.meta.get('proxy')
if proxy:
old_scheme, old_host, old_port = self.scheme, self.host, self.port
self.scheme, _, self.host, self.port, _ = _parse(proxy)
self.path = self.url
if old_scheme=="https":
self.headers['Proxy-Connection'] = 'keep-alive'
self.use_tunnel = True
self.tunnel_to_host = old_host
self.tunnel_to_port = old_port
def gotHeaders(self, headers):
self.headers_time = time()
self.response_headers = headers
示例6: ScrapyHTTPPageGetter
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = '\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def extractHeader(self, header):
key, val = header.split(':', 1)
val = val.lstrip()
self.handleHeader(key, val)
if key.lower() == 'content-length':
self.length = int(val)
def lineReceived(self, line):
try:
HTTPClient.lineReceived(self, line.rstrip())
except:
self.factory.add_invalid_header(line)
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == 'HEAD':
self.factory.page('')
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例7: test_iterables
def test_iterables(self):
idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}
h = Headers(idict)
self.assertEqual(dict(h), {'Content-Type': ['text/html'], 'X-Forwarded-For': ['ip1', 'ip2']})
self.assertEqual(h.keys(), ['X-Forwarded-For', 'Content-Type'])
self.assertEqual(h.items(), [('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertEqual(list(h.iteritems()),
[('X-Forwarded-For', ['ip1', 'ip2']), ('Content-Type', ['text/html'])])
self.assertEqual(h.values(), ['ip2', 'text/html'])
示例8: ScrapyHTTPPageGetter
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = b'\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def lineReceived(self, line):
return HTTPClient.lineReceived(self, line.rstrip())
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
self._connection_lost_reason = reason
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == b'HEAD':
self.factory.page(b'')
elif self.length is not None and self.length > 0:
self.factory.noPage(self._connection_lost_reason)
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
# transport cleanup needed for HTTPS connections
if self.factory.url.startswith(b'https'):
self.transport.stopProducing()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例9: ScrapyHTTPPageGetter
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = '\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def lineReceived(self, line):
return HTTPClient.lineReceived(self, line.rstrip())
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == 'HEAD':
self.factory.page('')
elif self.length != None and self.length != 0:
self.factory.noPage(failure.Failure(
PartialDownloadError(self.factory.status, None, response)))
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例10: __init__
def __init__(self, request, timeout=180):
self.url = urldefrag(request.url)[0]
self.method = request.method
self.body = request.body or None
self.headers = Headers(request.headers)
self.response_headers = None
self.timeout = request.meta.get('download_timeout') or timeout
self.start_time = time()
self.deferred = defer.Deferred().addCallback(self._build_response, request)
# Fixes Twisted 11.1.0+ support as HTTPClientFactory is expected
# to have _disconnectedDeferred. See Twisted r32329.
# As Scrapy implements it's own logic to handle redirects is not
# needed to add the callback _waitForDisconnect.
# Specifically this avoids the AttributeError exception when
# clientConnectionFailed method is called.
self._disconnectedDeferred = defer.Deferred()
self._set_connection_attributes(request)
# set Host header based on url
self.headers.setdefault('Host', self.netloc)
# set Content-Length based len of body
if self.body is not None:
self.headers['Content-Length'] = len(self.body)
# just in case a broken http/1.1 decides to keep connection alive
self.headers.setdefault("Connection", "close")
示例11: test_iterables
def test_iterables(self):
idict = {'Content-Type': 'text/html', 'X-Forwarded-For': ['ip1', 'ip2']}
h = Headers(idict)
self.assertDictEqual(dict(h),
{b'Content-Type': [b'text/html'],
b'X-Forwarded-For': [b'ip1', b'ip2']})
self.assertSortedEqual(h.keys(),
[b'X-Forwarded-For', b'Content-Type'])
self.assertSortedEqual(h.items(),
[(b'X-Forwarded-For', [b'ip1', b'ip2']),
(b'Content-Type', [b'text/html'])])
self.assertSortedEqual(h.iteritems(),
[(b'X-Forwarded-For', [b'ip1', b'ip2']),
(b'Content-Type', [b'text/html'])])
self.assertSortedEqual(h.values(), [b'ip2', b'text/html'])
示例12: test_appendlist
def test_appendlist(self):
h1 = Headers({'header1': 'value1'})
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), ['value1', 'value3'])
h1 = Headers()
h1.appendlist('header1', 'value1')
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), ['value1', 'value3'])
示例13: test_netscape_example_2
def test_netscape_example_2(self):
# Second Example transaction sequence:
#
# Assume all mappings from above have been cleared.
#
# Client receives:
#
# Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/
#
# When client requests a URL in path "/" on this server, it sends:
#
# Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001
#
# Client receives:
#
# Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo
#
# When client requests a URL in path "/ammo" on this server, it sends:
#
# Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001
#
# NOTE: There are two name/value pairs named "PART_NUMBER" due to
# the inheritance of the "/" mapping in addition to the "/ammo" mapping.
c = CookieJar()
headers = Headers({'Set-Cookie': 'PART_NUMBER=ROCKET_LAUNCHER_0001; path=/'})
req = Request("http://www.acme.com/")
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEquals(req.headers.get("Cookie"), "PART_NUMBER=ROCKET_LAUNCHER_0001")
headers.appendlist("Set-Cookie", "PART_NUMBER=RIDING_ROCKET_0023; path=/ammo")
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/ammo")
c.add_cookie_header(req)
self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*"
"PART_NUMBER=ROCKET_LAUNCHER_0001",
req.headers.get("Cookie")))
示例14: test_headers
def test_headers(self):
# Different ways of setting headers attribute
url = 'http://www.scrapy.org'
headers = {'Accept':'gzip', 'Custom-Header':'nothing to tell you'}
r = self.request_class(url=url, headers=headers)
p = self.request_class(url=url, headers=r.headers)
self.assertEqual(r.headers, p.headers)
self.assertFalse(r.headers is headers)
self.assertFalse(p.headers is r.headers)
# headers must not be unicode
h = Headers({'key1': u'val1', u'key2': 'val2'})
h[u'newkey'] = u'newval'
for k, v in h.iteritems():
self.assert_(isinstance(k, str))
for s in v:
self.assert_(isinstance(s, str))
示例15: test_int_value
def test_int_value(self):
h1 = Headers({'hey': 5})
h1['foo'] = 1
h1.setdefault('bar', 2)
h1.setlist('buz', [1, 'dos', 3])
self.assertEqual(h1.getlist('foo'), [b'1'])
self.assertEqual(h1.getlist('bar'), [b'2'])
self.assertEqual(h1.getlist('buz'), [b'1', b'dos', b'3'])
self.assertEqual(h1.getlist('hey'), [b'5'])