本文整理汇总了Python中scrapy.http.Headers.appendlist方法的典型用法代码示例。如果您正苦于以下问题:Python Headers.appendlist方法的具体用法?Python Headers.appendlist怎么用?Python Headers.appendlist使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.Headers
的用法示例。
在下文中一共展示了Headers.appendlist方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_appendlist
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
def test_appendlist(self):
h1 = Headers({'header1': 'value1'})
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), ['value1', 'value3'])
h1 = Headers()
h1.appendlist('header1', 'value1')
h1.appendlist('header1', 'value3')
self.assertEqual(h1.getlist('header1'), ['value1', 'value3'])
示例2: ScrapyHTTPPageGetter
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = '\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def extractHeader(self, header):
key, val = header.split(':', 1)
val = val.lstrip()
self.handleHeader(key, val)
if key.lower() == 'content-length':
self.length = int(val)
def lineReceived(self, line):
try:
HTTPClient.lineReceived(self, line.rstrip())
except:
self.factory.add_invalid_header(line)
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == 'HEAD':
self.factory.page('')
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例3: ScrapyHTTPPageGetter
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = b'\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def lineReceived(self, line):
return HTTPClient.lineReceived(self, line.rstrip())
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
self._connection_lost_reason = reason
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == b'HEAD':
self.factory.page(b'')
elif self.length is not None and self.length > 0:
self.factory.noPage(self._connection_lost_reason)
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
# transport cleanup needed for HTTPS connections
if self.factory.url.startswith(b'https'):
self.transport.stopProducing()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例4: ScrapyHTTPPageGetter
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = '\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
# Method command
self.sendCommand(self.factory.method, self.factory.path)
# Headers
for key, values in self.factory.headers.items():
for value in values:
self.sendHeader(key, value)
self.endHeaders()
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def lineReceived(self, line):
return HTTPClient.lineReceived(self, line.rstrip())
def handleHeader(self, key, value):
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == 'HEAD':
self.factory.page('')
elif self.length != None and self.length != 0:
self.factory.noPage(failure.Failure(
PartialDownloadError(self.factory.status, None, response)))
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
self.transport.loseConnection()
self.factory.noPage(\
defer.TimeoutError("Getting %s took longer than %s seconds." % \
(self.factory.url, self.factory.timeout)))
示例5: test_netscape_example_2
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
def test_netscape_example_2(self):
# Second Example transaction sequence:
#
# Assume all mappings from above have been cleared.
#
# Client receives:
#
# Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/
#
# When client requests a URL in path "/" on this server, it sends:
#
# Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001
#
# Client receives:
#
# Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo
#
# When client requests a URL in path "/ammo" on this server, it sends:
#
# Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001
#
# NOTE: There are two name/value pairs named "PART_NUMBER" due to
# the inheritance of the "/" mapping in addition to the "/ammo" mapping.
c = CookieJar()
headers = Headers({'Set-Cookie': 'PART_NUMBER=ROCKET_LAUNCHER_0001; path=/'})
req = Request("http://www.acme.com/")
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEquals(req.headers.get("Cookie"), "PART_NUMBER=ROCKET_LAUNCHER_0001")
headers.appendlist("Set-Cookie", "PART_NUMBER=RIDING_ROCKET_0023; path=/ammo")
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/ammo")
c.add_cookie_header(req)
self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*"
"PART_NUMBER=ROCKET_LAUNCHER_0001",
req.headers.get("Cookie")))
示例6: test_session_cookies
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
def test_session_cookies(self):
year_plus_one = time.localtime()[0] + 1
# Check session cookies are deleted properly by
# CookieJar.clear_session_cookies method
req = Request('http://www.perlmeister.com/scripts')
headers = Headers()
headers.appendlist("Set-Cookie", "s1=session;Path=/scripts")
headers.appendlist("Set-Cookie", "p1=perm; Domain=.perlmeister.com;Path=/;expires=Fri, 02-Feb-%d 23:24:20 GMT" % year_plus_one)
headers.appendlist("Set-Cookie", "p2=perm;Path=/;expires=Fri, 02-Feb-%d 23:24:20 GMT" % year_plus_one)
headers.appendlist("Set-Cookie", "s2=session;Path=/scripts;" "Domain=.perlmeister.com")
headers.appendlist('Set-Cookie2', 's3=session;Version=1;Discard;Path="/"')
res = Response('http://www.perlmeister.com/scripts', headers=headers)
c = CookieJar()
c.extract_cookies(res, req)
# How many session/permanent cookies do we have?
counter = {"session_after": 0,
"perm_after": 0,
"session_before": 0,
"perm_before": 0}
for cookie in c:
key = "%s_before" % cookie.value
counter[key] = counter[key] + 1
c.clear_session_cookies()
# How many now?
for cookie in c:
key = "%s_after" % cookie.value
counter[key] = counter[key] + 1
self.assert_(not (
# a permanent cookie got lost accidently
counter["perm_after"] != counter["perm_before"] or
# a session cookie hasn't been cleared
counter["session_after"] != 0 or
# we didn't have session cookies in the first place
counter["session_before"] == 0))
示例7: test_netscape_misc
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
def test_netscape_misc(self):
# Some additional Netscape cookies tests.
c = CookieJar()
headers = Headers()
req = Request("http://foo.bar.acme.com/foo")
# Netscape allows a host part that contains dots
headers.appendlist("Set-Cookie", "Customer=WILE_E_COYOTE; domain=.acme.com")
res = Response("http://www.acme.com/foo", headers=headers)
c.extract_cookies(res, req)
# and that the domain is the same as the host without adding a leading
# dot to the domain. Should not quote even if strange chars are used
# in the cookie value.
headers.appendlist("Set-Cookie", "PART_NUMBER=3,4; domain=foo.bar.acme.com")
res = Response("http://www.acme.com/foo", headers=headers)
c.extract_cookies(res, req)
req = Request("http://foo.bar.acme.com/foo")
c.add_cookie_header(req)
self.assert_(
"PART_NUMBER=3,4" in req.headers.get("Cookie") and
"Customer=WILE_E_COYOTE" in req.headers.get("Cookie"))
示例8: ScrapyHTTPPageGetter
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
class ScrapyHTTPPageGetter(HTTPClient):
delimiter = '\n'
def connectionMade(self):
self.headers = Headers() # bucket for response headers
if self.factory.use_tunnel:
log.msg("Sending CONNECT", log.DEBUG)
self.tunnel_started = False
self.sendCommand("CONNECT", "%s:%s"
% (self.factory.tunnel_to_host, self.factory.tunnel_to_port))
self.sendHeaders(only=['Host','Proxy-Connection', 'User-Agent'])
del self.factory.headers['Proxy-Connection']
else:
self.sendEverything()
def sendCommand(self, command, path):
if self.factory.use_tunnel and not self.tunnel_started:
http_version = "1.1"
else:
http_version = "1.0"
self.transport.write('%s %s HTTP/%s\r\n' % (command, path, http_version))
def sendEverything(self):
self.sendMethod()
self.sendHeaders()
self.sendBody()
def sendMethod(self):
# Method command
self.sendCommand(self.factory.method, self.factory.path)
def sendHeaders(self, only=None):
# Note: it's a Headers object, not a dict
keys = only if only is not None else self.factory.headers.keys()
for key in keys:
for value in self.factory.headers.getlist(key):
self.sendHeader(key, value)
self.endHeaders()
def sendBody(self):
# Body
if self.factory.body is not None:
self.transport.write(self.factory.body)
def lineReceived(self, line):
if self.factory.use_tunnel and not self.tunnel_started: log.msg("LINE: %s" % line)
if self.factory.use_tunnel and not self.tunnel_started and not line.rstrip():
# End of headers from the proxy in response to our CONNECT request
# Skip the call to HTTPClient.lienReceived for now, since otherwise
# it would switch to row mode.
self.startTunnel()
else:
return HTTPClient.lineReceived(self, line.rstrip())
def startTunnel(self):
log.msg("starting Tunnel")
# We'll get a new batch of headers through the tunnel. This sets us
# up to capture them.
self.firstLine = True
self.tunnel_started = True
# Switch to SSL
ctx = ClientContextFactory()
self.transport.startTLS(ctx, self.factory)
# And send the normal request:
self.sendEverything()
def handleHeader(self, key, value):
if self.factory.use_tunnel and not self.tunnel_started:
pass # maybe log headers for CONNECT request?
else:
self.headers.appendlist(key, value)
def handleStatus(self, version, status, message):
if self.factory.use_tunnel and not self.tunnel_started:
self.tunnel_status = status
else:
self.factory.gotStatus(version, status, message)
def handleEndHeaders(self):
self.factory.gotHeaders(self.headers)
def connectionLost(self, reason):
HTTPClient.connectionLost(self, reason)
self.factory.noPage(reason)
def handleResponse(self, response):
if self.factory.method.upper() == 'HEAD':
self.factory.page('')
else:
self.factory.page(response)
self.transport.loseConnection()
def timeout(self):
#.........这里部分代码省略.........
示例9: test_netscape_example_1
# 需要导入模块: from scrapy.http import Headers [as 别名]
# 或者: from scrapy.http.Headers import appendlist [as 别名]
def test_netscape_example_1(self):
#-------------------------------------------------------------------
# First we check that it works for the original example at
# http://www.netscape.com/newsref/std/cookie_spec.html
# Client requests a document, and receives in the response:
#
# Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT
#
# When client requests a URL in path "/" on this server, it sends:
#
# Cookie: CUSTOMER=WILE_E_COYOTE
#
# Client requests a document, and receives in the response:
#
# Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/
#
# When client requests a URL in path "/" on this server, it sends:
#
# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001
#
# Client receives:
#
# Set-Cookie: SHIPPING=FEDEX; path=/fo
#
# When client requests a URL in path "/" on this server, it sends:
#
# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001
#
# When client requests a URL in path "/foo" on this server, it sends:
#
# Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001; SHIPPING=FEDEX
#
# The last Cookie is buggy, because both specifications say that the
# most specific cookie must be sent first. SHIPPING=FEDEX is the
# most specific and should thus be first.
year_plus_one = time.localtime()[0] + 1
c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
#req = Request("http://1.1.1.1/",
# headers={"Host": "www.acme.com:80"})
req = Request("http://www.acme.com:80/", headers={"Host": "www.acme.com:80"})
headers = Headers()
headers['Set-Cookie'] = 'CUSTOMER=WILE_E_COYOTE; path=/ ; expires=Wednesday, 09-Nov-%d 23:12:40 GMT' % year_plus_one
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEqual(req.headers.get("Cookie"), "CUSTOMER=WILE_E_COYOTE")
self.assertEqual(req.headers.get("Cookie2"), '$Version="1"')
headers.appendlist("Set-Cookie", "PART_NUMBER=ROCKET_LAUNCHER_0001; path=/")
res = Response("http://www.acme.com/", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/foo/bar")
c.add_cookie_header(req)
h = req.headers.get("Cookie")
self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and
"CUSTOMER=WILE_E_COYOTE" in h)
headers.appendlist('Set-Cookie', 'SHIPPING=FEDEX; path=/foo')
res = Response("http://www.acme.com", headers=headers)
c.extract_cookies(res, req)
req = Request("http://www.acme.com/")
c.add_cookie_header(req)
h = req.headers.get("Cookie")
self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and
"CUSTOMER=WILE_E_COYOTE" in h and
"SHIPPING=FEDEX" not in h)
req = Request("http://www.acme.com/foo/")
c.add_cookie_header(req)
h = req.headers.get("Cookie")
self.assert_(("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and
"CUSTOMER=WILE_E_COYOTE" in h and
h.startswith("SHIPPING=FEDEX;")))