本文整理汇总了Python中wpull.url.URLInfo.parse方法的典型用法代码示例。如果您正苦于以下问题:Python URLInfo.parse方法的具体用法?Python URLInfo.parse怎么用?Python URLInfo.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wpull.url.URLInfo
的用法示例。
在下文中一共展示了URLInfo.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_directory_filter
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_directory_filter(self):
mock_record = MockURLTableRecord()
mock_record.url = 'http://example.com/blog/'
url_filter = DirectoryFilter()
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com'),
mock_record
))
url_filter = DirectoryFilter(accepted=['/blog'])
self.assertFalse(url_filter.test(
URLInfo.parse('http://example.com'),
mock_record
))
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com/blog/'),
mock_record
))
url_filter = DirectoryFilter(rejected=['/cgi-bin/'])
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com/blog/'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example.com/cgi-bin'),
mock_record
))
示例2: test_url_info_ipv6
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_ipv6(self):
self.assertEqual(
'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6',
URLInfo.parse(
'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
).url
)
self.assertEqual(
'[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080',
URLInfo.parse(
'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
).hostname_with_port
)
self.assertEqual(
'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6',
URLInfo.parse(
'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6'
).url
)
self.assertEqual(
'[2001:db8:85a3:8d3:1319:8a2e:370:7348]',
URLInfo.parse(
'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6'
).hostname_with_port
)
示例3: test_url_info_invalids
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_invalids(self):
self.assertRaises(ValueError, URLInfo.parse, '')
self.assertRaises(ValueError, URLInfo.parse, '#')
self.assertRaises(ValueError, URLInfo.parse, 'http://')
self.assertRaises(ValueError, URLInfo.parse, 'example....com')
self.assertRaises(ValueError, URLInfo.parse, 'http://example....com')
self.assertRaises(ValueError, URLInfo.parse, 'http://example…com')
self.assertRaises(ValueError, URLInfo.parse, 'http://[34.4kf]::4')
self.assertRaises(ValueError, URLInfo.parse, 'http://[34.4kf::4')
self.assertRaises(ValueError, URLInfo.parse, 'http://dmn3]:3a:45')
self.assertRaises(ValueError, URLInfo.parse, ':38/3')
self.assertRaises(ValueError, URLInfo.parse, 'http://][a:@1]')
self.assertRaises(ValueError, URLInfo.parse, 'http://[[aa]]:4:]6')
self.assertNotIn('[', URLInfo.parse('http://[a]').hostname)
self.assertNotIn(']', URLInfo.parse('http://[a]').hostname)
self.assertRaises(ValueError, URLInfo.parse, 'http://[[a]')
self.assertRaises(ValueError, URLInfo.parse, 'http://[[a]]a]')
self.assertRaises(ValueError, URLInfo.parse, 'http://[[a:a]]')
self.assertRaises(ValueError, URLInfo.parse, 'http:///')
self.assertRaises(ValueError, URLInfo.parse, 'http:///horse')
self.assertRaises(ValueError, URLInfo.parse, 'http://?what?')
self.assertRaises(ValueError, URLInfo.parse, 'http://#egg=wpull')
self.assertRaises(ValueError, URLInfo.parse,
'http://:@example.com:[email protected]/')
self.assertRaises(ValueError, URLInfo.parse, 'http://\x00/')
self.assertRaises(ValueError, URLInfo.parse, 'http:/a')
self.assertRaises(ValueError, URLInfo.parse, 'http://@@example.com/@')
self.assertRaises(
ValueError, URLInfo.parse,
'http://fat32defragmenter.internets::80')
self.assertRaises(
ValueError, URLInfo.parse,
'http://fat32defragmenter.internets:80/')
self.assertRaises(ValueError, URLInfo.parse, 'http:// /spaaaace')
self.assertRaises(
ValueError, URLInfo.parse,
'http://a-long-long-time-ago-the-earth-was-ruled-by-dinosaurs-'
'they-were-big-so-not-a-lot-of-people-went-around-hassling-them-'
'actually-no-people-went-around-hassling-them-'
'because-there-weren-t-any-people-yet-'
'just-the-first-tiny-mammals-'
'basically-life-was-good-'
'lou-it-just-dont-get-no-better-than-this-'
'yeah-'
'then-something-happened-'
'a-giant-meteorite-struck-the-earth-'
'goodbye-dinosaurs-'
'but-what-if-the-dinosaurs-werent-all-destroyed-'
'what-if-the-impact-of-that-meteorite-created-a-parallel-dimension-'
'where-the-dinosaurs-continue-to-thrive-'
'and-evolved-into-intelligent-vicious-aggressive-beings-'
'just-like-us-'
'and-hey-what-if-they-found-their-way-back.movie'
)
self.assertRaises(
ValueError, URLInfo.parse, 'http://[...]/python.xml%22')
self.assertRaises(
ValueError, URLInfo.parse, 'http://[…]/python.xml%22')
self.assertRaises(
ValueError, URLInfo.parse, 'http://[.]/python.xml%22')
示例4: test_backward_filename_filter
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_backward_filename_filter(self):
url_filter = BackwardFilenameFilter(
accepted=['html', 'image.*.png'],
rejected=['bmp', 'jp[eg]', 'image.123.png']
)
mock_record = MockURLTableRecord()
mock_record.url = 'http://example.com/'
self.assertTrue(url_filter.test(
URLInfo.parse('http://example/index.html'),
mock_record
))
self.assertTrue(url_filter.test(
URLInfo.parse('http://example/myimage.1003.png'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example/myimage.123.png'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example/blah.png'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example/image.1003.png.bmp'),
mock_record
))
示例5: test_regex_filter
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_regex_filter(self):
mock_record = MockURLTableRecord()
mock_record.url = 'http://example.com/blog/'
url_filter = RegexFilter()
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.net'),
mock_record
))
url_filter = RegexFilter(accepted=r'blo[a-z]/$')
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.net/blob/'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example.net/blob/123'),
mock_record
))
url_filter = RegexFilter(rejected=r'\.gif$')
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.net/blob/'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example.net/blob/123.gif'),
mock_record
))
示例6: test_parent_filter
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_parent_filter(self):
mock_record = MockURLTableRecord()
mock_record.inline = False
url_filter = ParentFilter()
mock_record.top_url = 'http://example.com/blog/topic2/'
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com/blog/topic2/'),
mock_record
))
mock_record.top_url = 'http://example.com/blog/topic1/'
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com/blog/topic1/blah.html'),
mock_record
))
self.assertFalse(url_filter.test(
URLInfo.parse('http://example.com/blog/'),
mock_record
))
mock_record.inline = True
self.assertTrue(url_filter.test(
URLInfo.parse('http://example.com/styles.css'),
mock_record
))
示例7: test_url_info_trailing_dot
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_trailing_dot(self):
self.assertEqual(
'http://example.com./',
URLInfo.parse('http://example.com./').url
)
self.assertEqual(
'http://example.com.:81/',
URLInfo.parse('http://example.com.:81/').url
)
示例8: test_url_info_round_trip
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_round_trip(self):
urls = [
'http://example.com/blah%20blah/',
'example.com:81?blah=%c3%B0',
'http://example.com/a/../../b/style.css',
'http://example.com/'
'?blah=http%3A%2F%2Fexample.com%2F%3Ffail%3Dtrue',
'http://example.com/??blah=blah[0:]=bl%61h?blah"&d%26_',
'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/ipv6',
]
for url in urls:
URLInfo.parse(URLInfo.parse(url).url)
示例9: test_to_dir_path_url
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_to_dir_path_url(self):
self.assertEqual(
'ftp://putfile.com/',
to_dir_path_url(URLInfo.parse('ftp://putfile.com/'))
)
self.assertEqual(
'ftp://putfile.com/',
to_dir_path_url(URLInfo.parse('ftp://putfile.com/asdf'))
)
self.assertEqual(
'ftp://putfile.com/asdf/',
to_dir_path_url(URLInfo.parse('ftp://putfile.com/asdf/qwer'))
)
示例10: test_url_info_misleading_parts
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_misleading_parts(self):
self.assertEqual(
'http://example.com/?a',
URLInfo.parse('http://example.com?a').url
)
self.assertEqual(
'http://example.com/?a?',
URLInfo.parse('http://example.com?a?').url
)
self.assertEqual(
'http://example.com/',
URLInfo.parse('http://example.com#a').url
)
self.assertEqual(
'http://example.com/',
URLInfo.parse('http://example.com#a?').url
)
self.assertEqual(
'http://example.com/?a',
URLInfo.parse('http://example.com?a#').url
)
self.assertEqual(
'http://example.com/:10',
URLInfo.parse('http://example.com/:10').url
)
self.assertEqual(
'http://example.com/[email protected]/',
URLInfo.parse('http://:@[email protected]/').url
)
self.assertEqual(
'http://example.com/http:/example.com',
URLInfo.parse('http://:@example.com/http://example.com').url
)
示例11: test_ip_address_normalization
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_ip_address_normalization(self):
self.assertEqual(
'http://192.0.2.235/',
URLInfo.parse('https://0xC0.0x00.0x02.0xEB').url
)
self.assertEqual(
'http://192.0.2.235/',
URLInfo.parse('https://0301.1680.0002.0353').url
)
self.assertEqual(
'http://192.0.2.235/',
URLInfo.parse('https://0xC00002EB/').url
)
self.assertEqual(
'http://192.0.2.235/',
URLInfo.parse('https://3221226219/').url
)
self.assertEqual(
'http://192.0.2.235/',
URLInfo.parse('https://030000001353/').url
)
self.assertEqual(
'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6',
URLInfo.parse(
'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:8080/ipv6'
).url
)
self.assertEqual(
'https://[::1]/',
URLInfo.parse('https://[0:0:0:0:0:0:0:1]').url
)
self.assertEqual(
'https://[::ffff:192.0.2.128]/',
URLInfo.parse('https://[::ffff:c000:0280]').url
)
示例12: parse_url
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def parse_url(cls, url, encoding):
'''Parse and return a URLInfo.
This function logs a warning if the URL cannot be parsed and returns
None.
'''
try:
url_info = URLInfo.parse(url, encoding=encoding)
# FIXME: workaround detection of bad URL unsplit. See issue #132.
URLInfo.parse(url_info.url, encoding=encoding)
except ValueError as error:
_logger.warning(__(_('Discarding malformed URL ‘{url}’: {error}.'),
url=url, error=error))
else:
return url_info
示例13: test_url_info_reserved_char_is_ok
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_reserved_char_is_ok(self):
self.assertEqual(
'http://example.com/@49IMG.DLL/$SESSION$/image.png;large',
URLInfo.parse(
'http://example.com/@49IMG.DLL/$SESSION$/image.png;large').url
)
self.assertEqual(
'http://example.com/@49IMG.DLL/$SESSION$/imag%C3%A9.png;large',
URLInfo.parse(
'http://example.com/@49IMG.DLL/$SESSION$/imagé.png;large').url
)
self.assertEqual(
'http://example.com/$c/%system.exe/',
URLInfo.parse('http://example.com/$c/%system.exe/').url
)
示例14: test_url_info_parts
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_url_info_parts(self):
url_info = URLInfo.parse(
'HTTP://userName:pass%[email protected][A::1]:81/ásdF\u200C/ghjK?a=b=c&D#/?')
self.assertEqual(
'http://userName:pass:[email protected][a::1]:81/'
'%C3%A1sdF%E2%80%8C/ghjK?a=b=c&D',
url_info.url
)
self.assertEqual('http', url_info.scheme)
self.assertEqual('userName:pass%[email protected][A::1]:81',
url_info.authority)
self.assertEqual('/ásdF\u200C/ghjK?a=b=c&D#/?', url_info.resource)
self.assertEqual('userName', url_info.username)
self.assertEqual('pass:word', url_info.password)
self.assertEqual('[A::1]:81', url_info.host)
self.assertEqual('[a::1]:81', url_info.hostname_with_port)
self.assertEqual('a::1', url_info.hostname)
self.assertEqual(81, url_info.port)
self.assertEqual('/%C3%A1sdF%E2%80%8C/ghjK', url_info.path)
self.assertEqual('a=b=c&D', url_info.query)
self.assertEqual('/?', url_info.fragment)
self.assertEqual('utf-8', url_info.encoding)
self.assertEqual(
'HTTP://userName:pass%[email protected][A::1]:81/ásdF\u200C/ghjK?a=b=c&D#/?',
url_info.raw)
self.assertEqual(('/%C3%A1sdF%E2%80%8C', 'ghjK'), url_info.split_path())
url_info = URLInfo.parse(
'Ftp://N00B:[email protected]/mydocs/'
)
self.assertEqual('ftp', url_info.scheme)
self.assertEqual('N00B:[email protected]',
url_info.authority)
self.assertEqual('/mydocs/', url_info.resource)
self.assertEqual('N00B', url_info.username)
self.assertEqual('hunter2', url_info.password)
self.assertEqual('LocalHost.Example', url_info.host)
self.assertEqual('localhost.example', url_info.hostname_with_port)
self.assertEqual('localhost.example', url_info.hostname)
self.assertEqual(21, url_info.port)
self.assertEqual('/mydocs/', url_info.path)
self.assertFalse(url_info.query)
self.assertFalse(url_info.fragment)
self.assertEqual('utf-8', url_info.encoding)
self.assertEqual(
'Ftp://N00B:[email protected]/mydocs/',
url_info.raw)
self.assertEqual(('/mydocs', ''), url_info.split_path())
示例15: test_append_slash_to_path_url
# 需要导入模块: from wpull.url import URLInfo [as 别名]
# 或者: from wpull.url.URLInfo import parse [as 别名]
def test_append_slash_to_path_url(self):
self.assertEqual(
'ftp://putfile.com/example/',
append_slash_to_path_url(
URLInfo.parse('ftp://putfile.com/example')
)
)