本文整理汇总了Python中wpull.http.request.Response.body方法的典型用法代码示例。如果您正苦于以下问题:Python Response.body方法的具体用法?Python Response.body怎么用?Python Response.body使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wpull.http.request.Response
的用法示例。
在下文中一共展示了Response.body方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_to_dict_body
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_to_dict_body(self):
request = Request()
request.body = Body()
request_dict = request.to_dict()
self.assertTrue(request_dict['body'])
request.body.close()
request = Request()
request.body = NotImplemented
request_dict = request.to_dict()
self.assertFalse(request_dict['body'])
response = Response()
response.body = Body()
response_dict = response.to_dict()
self.assertTrue(response_dict['body'])
response.body.close()
response = Response()
response.body = NotImplemented
response_dict = response.to_dict()
self.assertFalse(response_dict['body'])
示例2: test_sitemap_scraper_xml
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_sitemap_scraper_xml(self):
scraper = SitemapScraper(self.get_html_parser())
request = Request('http://example.com/sitemap.xml')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(
b'''<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://www.example.com/</loc>
<lastmod>2005-01-01</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
'''
)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual({
'http://www.example.com/',
},
linked_urls
)
self.assertFalse(inline_urls)
示例3: test_xhtml_invalid
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_xhtml_invalid(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'xhtml_invalid.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual(
{
'http://example.com/image.png',
'http://example.com/script.js',
},
inline_urls
)
self.assertEqual(
{
'http://example.com/link'
},
linked_urls
)
示例4: test_html_soup
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_html_soup(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['Refresh'] = 'yes'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples', 'soup.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual(
{'http://example.com/ABOUTM~1.JPG'},
inline_urls
)
self.assertEqual(
{
'http://example.com/BLOG',
'http://example.com/web ring/Join.htm',
},
linked_urls
)
示例5: test_html_not_quite_charset
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_html_not_quite_charset(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'videogame_top.htm')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertIn(
'http://example.com/copyright_2001_2006_rtype.gif',
inline_urls
)
self.assertIn(
'http://www.geocities.jp/gamehouse_grindcrusher/',
linked_urls
)
示例6: test_rss_as_html
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_rss_as_html(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'application/rss+xml'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples', 'rss.xml')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
self.assertTrue(scrape_result)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertFalse(
inline_urls
)
self.assertEqual(
{
'http://www.someexamplerssdomain.com/main.html',
'http://www.wikipedia.org/'
},
linked_urls
)
示例7: test_sitemap_scraper_xml_index
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_sitemap_scraper_xml_index(self):
scraper = SitemapScraper(self.get_html_parser())
request = Request('http://example.com/sitemap.xml')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(
b'''<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>http://www.example.com/sitemap1.xml.gz</loc>
<lastmod>2004-10-01T18:23:17+00:00</lastmod>
</sitemap>
</sitemapindex>
'''
)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual({
'http://www.example.com/sitemap1.xml.gz',
},
linked_urls
)
self.assertFalse(inline_urls)
示例8: test_javascript_heavy_inline_monstrosity
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_javascript_heavy_inline_monstrosity(self):
scraper = JavaScriptScraper()
request = Request('http://example.com/test.js')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'twitchplayspokemonfirered.html')
with open(html_file_path, 'rb') as in_file:
in_file.seek(0x147)
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertIn(
'http://cdn.bulbagarden.net/upload/archive/a/a4/'
'20090718115357%21195Quagsire.png',
inline_urls
)
self.assertIn(
'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
'user%2FGoldenSandslash15&sa=D&sntz=1&'
'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A',
linked_urls
)
print('\n'.join(inline_urls))
print('\n'.join(linked_urls))
示例9: test_html_krokozyabry
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_html_krokozyabry(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'text/html; charset=KOI8-R'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'krokozyabry.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual('koi8-r', scrape_result.encoding)
self.assertEqual(
set(),
inline_urls
)
self.assertEqual(
{'http://example.com/Кракозябры'},
linked_urls
)
示例10: test_warc_recorder_rollback
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_warc_recorder_rollback(self):
warc_filename = 'asdf.warc'
warc_prefix = 'asdf'
with open(warc_filename, 'wb') as warc_file:
warc_file.write(b'a' * 10)
warc_recorder = WARCRecorder(
warc_prefix,
params=WARCRecorderParams(
compress=False,
)
)
request = HTTPRequest('http://example.com/')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
class BadRecord(WARCRecord):
def __init__(self, original_record):
super().__init__()
self.block_file = original_record.block_file
self.fields = original_record.fields
def __iter__(self):
for dummy in range(1000):
yield b"where's my elephant?"
raise OSError('Oops')
session._child_session._request_record = \
BadRecord(session._child_session._request_record)
original_offset = os.path.getsize(warc_filename)
with self.assertRaises((OSError, IOError)):
session.request(request)
new_offset = os.path.getsize(warc_filename)
self.assertEqual(new_offset, original_offset)
self.assertFalse(os.path.exists(warc_filename + '-wpullinc'))
_logger.debug('original offset {0}'.format(original_offset))
示例11: test_css_scraper_reject_type
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_css_scraper_reject_type(self):
scraper = CSSScraper()
request = Request('http://example.com/styles.css')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples', 'styles.css')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response,
link_type=LinkType.html)
self.assertFalse(scrape_result)
示例12: test_warc_max_size_and_append
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_warc_max_size_and_append(self):
file_prefix = 'asdf'
with open('asdf-00000.warc', 'w'):
pass
with open('asdf-00001.warc', 'w'):
pass
warc_recorder = WARCRecorder(
file_prefix,
params=WARCRecorderParams(
compress=False,
max_size=1,
appending=True
),
)
request = HTTPRequest('http://example.com/1')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'BLAH')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
session.request(request)
session.pre_response(response)
session.response_data(response.to_bytes())
session.response_data(response.body.content())
session.response(response)
warc_recorder.close()
self.assertTrue(os.path.exists('asdf-00000.warc'))
self.assertTrue(os.path.exists('asdf-00001.warc'))
self.assertTrue(os.path.exists('asdf-00002.warc'))
self.assertTrue(os.path.exists('asdf-00003.warc'))
self.assertTrue(os.path.exists('asdf-meta.warc'))
self.assertEqual(0, os.path.getsize('asdf-00000.warc'))
self.assertEqual(0, os.path.getsize('asdf-00001.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-00002.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-00003.warc'))
self.assertNotEqual(0, os.path.getsize('asdf-meta.warc'))
示例13: test_sitemap_scraper_invalid_robots
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_sitemap_scraper_invalid_robots(self):
scraper = SitemapScraper(self.get_html_parser())
request = Request('http://example.com/robots.txt')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(
b'dsfju3wrji kjasSItemapsdmjfkl wekie;er :Ads fkj3m /Dk'
)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertFalse(linked_urls)
self.assertFalse(inline_urls)
示例14: test_warc_recorder_journal
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_warc_recorder_journal(self):
warc_filename = 'asdf.warc'
warc_prefix = 'asdf'
warc_recorder = WARCRecorder(
warc_prefix,
params=WARCRecorderParams(
compress=False,
)
)
request = HTTPRequest('http://example.com/')
request.address = ('0.0.0.0', 80)
response = HTTPResponse(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
response.body.write(b'KITTEH DOGE')
with warc_recorder.session() as session:
session.pre_request(request)
session.request_data(request.to_bytes())
test_instance = self
class MockRecord(WARCRecord):
def __init__(self, original_record):
super().__init__()
self.block_file = original_record.block_file
self.fields = original_record.fields
def __iter__(self):
print(list(os.walk('.')))
test_instance.assertTrue(
os.path.exists(warc_filename + '-wpullinc')
)
for dummy in range(1000):
yield b"where's my elephant?"
session._child_session._request_record = \
MockRecord(session._child_session._request_record)
session.request(request)
self.assertFalse(os.path.exists(warc_filename + '-wpullinc'))
示例15: test_html_scraper_reject_type
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import body [as 别名]
def test_html_scraper_reject_type(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, 'OK')
response.body = Body()
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'many_urls.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response,
link_type=LinkType.css)
self.assertFalse(scrape_result)