本文整理匯總了Python中scrapy.http.TextResponse方法的典型用法代碼示例。如果您正苦於以下問題:Python http.TextResponse方法的具體用法?Python http.TextResponse怎麽用?Python http.TextResponse使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scrapy.http
的用法示例。
在下文中一共展示了http.TextResponse方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _body_or_str
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def _body_or_str(obj, unicode=True):
expected_types = (Response, six.text_type, six.binary_type)
assert isinstance(obj, expected_types), \
"obj must be %s, not %s" % (
" or ".join(t.__name__ for t in expected_types),
type(obj).__name__)
if isinstance(obj, Response):
if not unicode:
return obj.body
elif isinstance(obj, TextResponse):
return obj.text
else:
return obj.body.decode('utf-8')
elif isinstance(obj, six.text_type):
return obj if unicode else obj.encode('utf-8')
else:
return obj.decode('utf-8') if unicode else obj
示例2: open_in_browser
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def open_in_browser(response, _openfunc=webbrowser.open):
"""Open the given response in a local web browser, populating the <base>
tag for external links to work
"""
from scrapy.http import HtmlResponse, TextResponse
# XXX: this implementation is a bit dirty and could be improved
body = response.body
if isinstance(response, HtmlResponse):
if b'<base' not in body:
repl = '<head><base href="%s">' % response.url
body = body.replace(b'<head>', to_bytes(repl))
ext = '.html'
elif isinstance(response, TextResponse):
ext = '.txt'
else:
raise TypeError("Unsupported response type: %s" %
response.__class__.__name__)
fd, fname = tempfile.mkstemp(ext)
os.write(fd, body)
os.close(fd)
return _openfunc("file://%s" % fname)
示例3: process_response
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def process_response(self, request, response, spider):
if request.method == 'HEAD':
return response
if isinstance(response, Response):
content_encoding = response.headers.getlist('Content-Encoding')
if content_encoding:
encoding = content_encoding.pop()
decoded_body = self._decode(response.body, encoding.lower())
respcls = responsetypes.from_args(headers=response.headers, \
url=response.url, body=decoded_body)
kwargs = dict(cls=respcls, body=decoded_body)
if issubclass(respcls, TextResponse):
# force recalculating the encoding until we make sure the
# responsetypes guessing is reliable
kwargs['encoding'] = None
response = response.replace(**kwargs)
if not content_encoding:
del response.headers['Content-Encoding']
return response
示例4: test_save_response_with_trim
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def test_save_response_with_trim(self):
self.instance._writer.maxitemsize = 26
self.instance.hsref.job.key = '123/45/67'
resp = TextResponse(
'http://resp', request=Request('http://req'), encoding='cp1251',
body='\r\n\r\n<html><body></body></html>\r\n \0\0\0\0\0')
with mock.patch.object(Spider, 'logger') as log:
spider = Spider('default')
self.instance.save_response(resp, self.spider)
log.warning.assert_called_with(
"Page not saved, body too large: <http://resp>")
self.instance.trim_html = True
self.instance.save_response(resp, spider)
self.instance._writer.write.assert_called_with(
{u'body': u'<html><body></body></html>', u'_encoding': u'cp1251',
u'_type': u'_pageitem',
u'_key': u'9b4bed7e56103ddf63455ed39145f61f53b3c702',
u'url': u'http://resp', '_jobid': '123/45/67'})
示例5: __init__
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def __init__(self, response: TextResponse):
self.response = response
示例6: parse11
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def parse11(self, response: TextResponse):
pass
示例7: parse12
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def parse12(self, response: TextResponse, book_page: DummyProductPage):
pass
示例8: test_is_provider_using_response
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def test_is_provider_using_response():
assert is_provider_using_response(PageObjectInputProvider) is False
assert is_provider_using_response(ResponseDataProvider) is True
# TextProductProvider wrongly annotates response dependency as
# TextResponse, instead of using the Response type.
assert is_provider_using_response(TextProductProvider) is False
assert is_provider_using_response(DummyProductProvider) is False
assert is_provider_using_response(FakeProductProvider) is False
assert is_provider_using_response(StringProductProvider) is False
示例9: download_request
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def download_request(self, request, spider):
uri = parse_data_uri(request.url)
respcls = responsetypes.from_mimetype(uri.media_type)
resp_kwargs = {}
if (issubclass(respcls, TextResponse) and
uri.media_type.split('/')[0] == 'text'):
charset = uri.media_type_parameters.get('charset')
resp_kwargs['encoding'] = charset
return respcls(url=request.url, body=uri.data, **resp_kwargs)
示例10: process_exception
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def process_exception(self, request, exception, spider):
if isinstance(exception, self.DONT_RETRY_ERRORS):
return TextResponse(url=request.meta['proxy'])
示例11: save_response
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def save_response(self, response, spider):
if isinstance(response, TextResponse):
fp = request_fingerprint(response.request)
payload = {
"_key": fp,
"_jobid": self.hsref.job.key,
"_type": "_pageitem",
"_encoding": response.encoding,
"url": response.url,
}
self._set_cookies(payload, response)
if response.request.method == 'POST':
payload["postdata"] = dict(parse_qsl(response.request.body.decode()))
payload["body"] = response.body_as_unicode()
if self.trim_html:
payload['body'] = payload['body'].strip(' \r\n\0')
if len(payload['body']) > self._writer.maxitemsize:
spider.logger.warning("Page not saved, body too large: <%s>" %
response.url)
return
try:
self._writer.write(payload)
except ValueTooLarge as exc:
spider.logger.warning("Page not saved, %s: <%s>" %
(exc, response.url))
示例12: test_save_response
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def test_save_response(self):
self.instance._writer = mock.MagicMock()
self.instance._writer.maxitemsize = 10
# wrong response type
self.instance.save_response(
Response('http://resp', request=Request('http://req')),
self.spider)
assert not self.instance._writer.write.called
# get request with large body
resp1 = TextResponse('http://resp1',
request=Request('http://req1'),
body='looong loong body',
encoding='cp1251')
self.instance.save_response(resp1, self.spider)
assert not self.instance._writer.write.called
# get request with ok-body
self.instance.hsref = mock.Mock()
self.instance.hsref.job.key = '123/45/67'
resp2 = TextResponse('http://resp2', request=Request('http://req2'),
body='body', encoding='cp1251',
headers={'Set-Cookie': [b'coo1=test;abc=1',
b'coo2=tes1;cbd=2']})
self.instance.save_response(resp2, self.spider)
self.instance._writer.write.assert_called_with(
{'body': u'body', '_encoding': 'cp1251', '_type': '_pageitem',
'_key': 'bad42100b1d34e29973a79e512aabb4db885b712',
'cookies': ['coo1=test', 'coo2=tes1'],
'url': 'http://resp2', '_jobid': '123/45/67'})
示例13: collect
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def collect(conf, conn):
"""Collect ICD-XX-CM conditions.
"""
# For more information see:
# https://www.cms.gov/Medicare/Coding/ICD10/2016-ICD-10-CM-and-GEMs.html
URL = 'https://www.cms.gov/Medicare/Coding/ICD10/Downloads/2016-CM-Code-Tables-and-Index.zip'
FILE = 'Tabular.xml'
VERSION = 'ICD-10-CM'
LAST_UPDATED = '2015-10-01'
# Prepare xml
zip = requests.get(URL).content
xml = zipfile.ZipFile(io.BytesIO(zip)).open(FILE).read()
res = TextResponse(url=URL, body=xml, encoding='utf-8')
count = 0
for diag in res.xpath('//diag'):
# We need only leafs
childs = diag.xpath('./diag')
if not childs:
continue
# Get data
data = {
'name': diag.xpath('./name/text()').extract_first(),
'desc': diag.xpath('./desc/text()').extract_first(),
'terms': diag.xpath('.//note/text()').extract(),
'version': VERSION,
'last_updated': LAST_UPDATED,
}
# Create record
record = Record.create(URL, data)
# Write record
record.write(conf, conn)
# Log info
count += 1
if not count % 100:
logger.info('Collected %s "%s" conditions', count, record.table)
示例14: csviter
# 需要導入模塊: from scrapy import http [as 別名]
# 或者: from scrapy.http import TextResponse [as 別名]
def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
""" Returns an iterator of dictionaries from the given csv object
obj can be:
- a Response object
- a unicode string
- a string encoded as utf-8
delimiter is the character used to separate fields on the given obj.
headers is an iterable that when provided offers the keys
for the returned dictionaries, if not the first row is used.
quotechar is the character used to enclosure fields on the given obj.
"""
encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
def row_to_unicode(row_):
return [to_unicode(field, encoding) for field in row_]
# Python 3 csv reader input object needs to return strings
if six.PY3:
lines = StringIO(_body_or_str(obj, unicode=True))
else:
lines = BytesIO(_body_or_str(obj, unicode=False))
kwargs = {}
if delimiter: kwargs["delimiter"] = delimiter
if quotechar: kwargs["quotechar"] = quotechar
csv_r = csv.reader(lines, **kwargs)
if not headers:
try:
row = next(csv_r)
except StopIteration:
return
headers = row_to_unicode(row)
for row in csv_r:
row = row_to_unicode(row)
if len(row) != len(headers):
logger.warning("ignoring row %(csvlnum)d (length: %(csvrow)d, "
"should be: %(csvheader)d)",
{'csvlnum': csv_r.line_num, 'csvrow': len(row),
'csvheader': len(headers)})
continue
else:
yield dict(zip(headers, row))