本文整理汇总了Python中wpull.http.request.Response.fields['content-type']方法的典型用法代码示例。如果您正苦于以下问题:Python Response.fields['content-type']方法的具体用法?Python Response.fields['content-type']怎么用?Python Response.fields['content-type']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wpull.http.request.Response
的用法示例。
在下文中一共展示了Response.fields['content-type']方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_rss_as_html
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_rss_as_html(self):
scraper = HTMLScraper()
request = Request.new('http://example.com/')
response = Response('HTTP/1.0', 200, '')
response.fields['content-type'] = 'application/rss+xml'
with wpull.util.reset_file_offset(response.body.content_file):
html_file_path = os.path.join(os.path.dirname(__file__),
'testing', 'samples', 'rss.xml')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body.content_file)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
inline_urls = scrape_info['inline_urls']
linked_urls = scrape_info['linked_urls']
self.assertFalse(
inline_urls
)
self.assertEqual(
{
'http://www.someexamplerssdomain.com/main.html',
'http://www.wikipedia.org/'
},
linked_urls
)
示例2: test_rss_as_html
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_rss_as_html(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'application/rss+xml'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples', 'rss.xml')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
self.assertTrue(scrape_result)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertFalse(
inline_urls
)
self.assertEqual(
{
'http://www.someexamplerssdomain.com/main.html',
'http://www.wikipedia.org/'
},
linked_urls
)
示例3: test_html_krokozyabry
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_krokozyabry(self):
scraper = HTMLScraper()
request = Request.new('http://example.com/')
response = Response('HTTP/1.0', 200, '')
response.fields['content-type'] = 'text/html; charset=KOI8-R'
with wpull.util.reset_file_offset(response.body.content_file):
html_file_path = os.path.join(os.path.dirname(__file__),
'testing', 'samples',
'krokozyabry.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body.content_file)
scrape_info = scraper.scrape(request, response)
inline_urls = scrape_info['inline_urls']
linked_urls = scrape_info['linked_urls']
self.assertEqual('koi8-r', scrape_info['encoding'])
self.assertEqual(
set(),
inline_urls
)
self.assertEqual(
{'http://example.com/Кракозябры'},
linked_urls
)
示例4: test_html_krokozyabry
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_krokozyabry(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'text/html; charset=KOI8-R'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'krokozyabry.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_result = scraper.scrape(request, response)
inline_urls = scrape_result.inline_links
linked_urls = scrape_result.linked_links
self.assertEqual('koi8-r', scrape_result.encoding)
self.assertEqual(
set(),
inline_urls
)
self.assertEqual(
{'http://example.com/Кракозябры'},
linked_urls
)
示例5: test_html_serious_bad_encoding
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_serious_bad_encoding(self):
scraper = HTMLScraper(encoding_override='utf8')
request = Request.new('http://example.com/')
response = Response('HTTP/1.0', 200, '')
response.fields['content-type'] = 'text/html; charset=utf8'
with wpull.util.reset_file_offset(response.body.content_file):
html_file_path = os.path.join(os.path.dirname(__file__),
'testing', 'samples', 'xkcd_1_evil.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body.content_file)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
示例6: test_html_encoding_lxml_name_mismatch
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_encoding_lxml_name_mismatch(self):
'''It should accept encoding names with underscore.'''
scraper = HTMLScraper()
request = Request.new('http://example.com/')
response = Response('HTTP/1.0', 200, '')
response.fields['content-type'] = 'text/html; charset=EUC_KR'
with wpull.util.reset_file_offset(response.body.content_file):
response.body.content_file.write(
'힖'.encode('euc_kr')
)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
self.assertEqual('euc_kr', scrape_info['encoding'])
示例7: test_html_garbage
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_garbage(self):
scraper = HTMLScraper()
request = Request.new('http://example.com/')
response = Response('HTTP/1.0', 200, '')
response.fields['content-type'] = 'text/html'
with wpull.util.reset_file_offset(response.body.content_file):
response.body.content_file.write(
b'\x01\x00\x01\x00l~Z\xff\x0f`y\x80\x00p<\x7f'
b'\xffndo\xff\xff-\x83{d\xec</\xfe\x80\x00\xb4Bo'
b'\x7f\xff\xff\xffV\xc1\xff\x7f\xff7'
)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
示例8: test_html_encoding_lxml_name_mismatch
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_encoding_lxml_name_mismatch(self):
'''It should accept encoding names with underscore.'''
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'text/html; charset=EUC_KR'
with wpull.util.reset_file_offset(response.body):
response.body.write(
'힖'.encode('euc_kr')
)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
self.assertEqual('euc_kr', scrape_info['encoding'])
示例9: test_html_garbage
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_garbage(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker)
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'text/html'
with wpull.util.reset_file_offset(response.body):
response.body.write(
b'\x01\x00\x01\x00l~Z\xff\x0f`y\x80\x00p<\x7f'
b'\xffndo\xff\xff-\x83{d\xec</\xfe\x80\x00\xb4Bo'
b'\x7f\xff\xff\xffV\xc1\xff\x7f\xff7'
)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)
示例10: test_html_serious_bad_encoding
# 需要导入模块: from wpull.http.request import Response [as 别名]
# 或者: from wpull.http.request.Response import fields['content-type'] [as 别名]
def test_html_serious_bad_encoding(self):
element_walker = ElementWalker(
css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
scraper = HTMLScraper(self.get_html_parser(), element_walker,
encoding_override='utf8')
request = Request('http://example.com/')
response = Response(200, '')
response.body = Body()
response.fields['content-type'] = 'text/html; charset=utf8'
with wpull.util.reset_file_offset(response.body):
html_file_path = os.path.join(ROOT_PATH,
'testing', 'samples',
'xkcd_1_evil.html')
with open(html_file_path, 'rb') as in_file:
shutil.copyfileobj(in_file, response.body)
scrape_info = scraper.scrape(request, response)
self.assertTrue(scrape_info)