本文整理汇总了Python中tests.get_testdata函数的典型用法代码示例。如果您正苦于以下问题:Python get_testdata函数的具体用法?Python get_testdata怎么用?Python get_testdata使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_testdata函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_extraction_encoding
def test_extraction_encoding(self):
body = get_testdata('link_extractor', 'linkextractor_noenc.html')
response_utf8 = HtmlResponse(url='http://example.com/utf8', body=body, headers={'Content-Type': ['text/html; charset=utf-8']})
response_noenc = HtmlResponse(url='http://example.com/noenc', body=body)
body = get_testdata('link_extractor', 'linkextractor_latin1.html')
response_latin1 = HtmlResponse(url='http://example.com/latin1', body=body)
lx = BaseSgmlLinkExtractor()
self.assertEqual(lx.extract_links(response_utf8), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
])
self.assertEqual(lx.extract_links(response_noenc), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
])
# document encoding does not affect URL path component, only query part
# >>> u'sample_ñ.html'.encode('utf8')
# b'sample_\xc3\xb1.html'
# >>> u"sample_á.html".encode('utf8')
# b'sample_\xc3\xa1.html'
# >>> u"sample_ö.html".encode('utf8')
# b'sample_\xc3\xb6.html'
# >>> u"£32".encode('latin1')
# b'\xa332'
# >>> u"µ".encode('latin1')
# b'\xb5'
self.assertEqual(lx.extract_links(response_latin1), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%C3%A1.html', text='sample \xe1 text'.decode('latin1')),
Link(url='http://example.com/sample_%C3%B6.html?price=%A332&%B5=unit', text=''),
])
示例2: test_w3c_5_5
def test_w3c_5_5(self):
body = get_testdata("w3c", "microdata.5.5.html")
expected = json.loads(get_testdata("w3c", "microdata.5.5.json").decode("UTF-8"))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
示例3: test_w3c_7_1
def test_w3c_7_1(self):
body = get_testdata("w3c", "microdata.7.1.html")
expected = json.loads(get_testdata("w3c", "microdata.7.1.json").decode("UTF-8"))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body, "http://blog.example.com/progress-report")
self.assertDictEqual(data, expected)
示例4: test_extraction_encoding
def test_extraction_encoding(self):
body = get_testdata("link_extractor", "linkextractor_noenc.html")
response_utf8 = HtmlResponse(
url="http://example.com/utf8", body=body, headers={"Content-Type": ["text/html; charset=utf-8"]}
)
response_noenc = HtmlResponse(url="http://example.com/noenc", body=body)
body = get_testdata("link_extractor", "linkextractor_latin1.html")
response_latin1 = HtmlResponse(url="http://example.com/latin1", body=body)
lx = BaseSgmlLinkExtractor()
self.assertEqual(
lx.extract_links(response_utf8),
[
Link(url="http://example.com/sample_%C3%B1.html", text=""),
Link(url="http://example.com/sample_%E2%82%AC.html", text="sample \xe2\x82\xac text".decode("utf-8")),
],
)
self.assertEqual(
lx.extract_links(response_noenc),
[
Link(url="http://example.com/sample_%C3%B1.html", text=""),
Link(url="http://example.com/sample_%E2%82%AC.html", text="sample \xe2\x82\xac text".decode("utf-8")),
],
)
self.assertEqual(
lx.extract_links(response_latin1),
[
Link(url="http://example.com/sample_%F1.html", text=""),
Link(url="http://example.com/sample_%E1.html", text="sample \xe1 text".decode("latin1")),
],
)
示例5: test_w3c_object_element
def test_w3c_object_element(self):
body = get_testdata('w3c', 'microdata.object.html')
expected = json.loads(get_testdata('w3c', 'microdata.object.json').decode('UTF-8'))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body, 'http://www.example.com/microdata/test')
self.assertDictEqual(data, expected)
示例6: _test_data
def _test_data(formats):
uncompressed_body = get_testdata('compressed', 'feed-sample1.xml')
test_responses = {}
for format in formats:
body = get_testdata('compressed', 'feed-sample1.' + format)
test_responses[format] = Response('http://foo.com/bar', body=body)
return uncompressed_body, test_responses
示例7: test_w3c_data_element
def test_w3c_data_element(self):
body = get_testdata('w3c', 'microdata.4.2.data.html')
expected = json.loads(get_testdata('w3c', 'microdata.4.2.data.json').decode('UTF-8'))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
示例8: test_w3c_5_2
def test_w3c_5_2(self):
body = get_testdata('w3c', 'microdata.5.2.html')
expected = json.loads(get_testdata('w3c', 'microdata.5.2.withtext.json').decode('UTF-8'))
mde = MicrodataExtractor(add_text_content=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
示例9: test_schemaorg_Event
def test_schemaorg_Event(self):
for i in [1, 2, 3, 4, 8]:
body = get_testdata("schema.org", "Event.{:03d}.html".format(i))
expected = json.loads(get_testdata("schema.org", "Event.{:03d}.json".format(i)).decode("UTF-8"))
mde = MicrodataExtractor()
data = mde.extract(body)
self.assertDictEqual(data, expected)
示例10: test_schemaorg_MusicRecording
def test_schemaorg_MusicRecording(self):
for i in [1]:
body = get_testdata('schema.org', 'MusicRecording.{:03d}.html'.format(i))
expected = json.loads(get_testdata('schema.org', 'MusicRecording.{:03d}.json'.format(i)).decode('UTF-8'))
mde = MicrodataExtractor()
data = mde.extract(body)
self.assertDictEqual(data, expected)
示例11: test_schemaorg_CreativeWork
def test_schemaorg_CreativeWork(self):
for i in [1]:
body = get_testdata('schema.org', 'CreativeWork.{:03d}.html'.format(i))
expected = json.loads(get_testdata('schema.org', 'CreativeWork.{:03d}.jsonld'.format(i)).decode('UTF-8'))
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertDictEqual(data, expected)
示例12: test_w3c_rdf11primer
def test_w3c_rdf11primer(self):
for i in [14]:
fileprefix = 'w3c.rdf11primer.example{:03d}'.format(i)
body = get_testdata('w3crdfa', fileprefix + '.html').decode('UTF-8')
expected = json.loads(
get_testdata('w3crdfa', fileprefix + '.expanded.json'
).decode('UTF-8'))
rdfae = RDFaExtractor()
data = rdfae.extract(body, url='http://www.exaple.com/index.html')
self.assertJsonLDEqual(data, expected)
示例13: test_wikipedia_xhtml_rdfa
def test_wikipedia_xhtml_rdfa(self):
fileprefix = 'xhtml+rdfa'
body = get_testdata('wikipedia', fileprefix + '.html').decode('UTF-8')
expected = json.loads(
get_testdata('wikipedia', fileprefix + '.expanded.json'
).decode('UTF-8'))
rdfae = RDFaExtractor()
data = rdfae.extract(body, url='http://www.exaple.com/index.html')
self.assertJsonLDEqual(data, expected)
示例14: test_songkick
def test_songkick(self):
for page in [
"Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015",
#"Maxïmo Park Gigography, Tour History & Past Concerts",
#"Years & Years Tickets, Tour Dates 2015 & Concerts",
]:
body = get_testdata('songkick', '{}.html'.format(page))
expected = json.loads(get_testdata('songkick', '{}.jsonld'.format(page)).decode('UTF-8'))
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertDictEqual(data, expected)
示例15: test_csviter_encoding
def test_csviter_encoding(self):
body1 = get_testdata('feeds', 'feed-sample4.csv')
body2 = get_testdata('feeds', 'feed-sample5.csv')
response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'latin1', u'value': u'test'},
{u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'}])
response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'cp852', u'value': u'test'},
{u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])