本文整理汇总了Python中urllib.request.Request.get_selector方法的典型用法代码示例。如果您正苦于以下问题:Python Request.get_selector方法的具体用法?Python Request.get_selector怎么用?Python Request.get_selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类urllib.request.Request
的用法示例。
在下文中一共展示了Request.get_selector方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getURLInfo
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
def getURLInfo(self, url=None):
'''
@see: IURLInfoService.getURLInfo
'''
if not url: raise InputError('Invalid URL %s' % url)
assert isinstance(url, str), 'Invalid URL %s' % url
url = unquote(url)
try:
with urlopen(url) as conn:
urlInfo = URLInfo()
urlInfo.URL = url
urlInfo.Date = datetime.now()
contentType = None
for tag, val in conn.info().items():
if tag == 'Content-Type': contentType = val.split(';')[0].strip().lower(); break
if not contentType or contentType != 'text/html':
req = Request(url)
selector = req.get_selector().strip('/')
if selector:
parts = selector.split('/')
if parts: urlInfo.Title = parts[len(parts) - 1]
else:
urlInfo.Title = req.get_host()
return urlInfo
elif contentType == 'text/html': urlInfo.ContentType = contentType
extr = HTMLInfoExtractor(urlInfo)
try: extr.feed(conn.read().decode())
except (AssertionError, HTMLParseError, UnicodeDecodeError): pass
return extr.urlInfo
except (URLError, ValueError): raise InputError('Invalid URL %s' % url)
示例2: RequestTests
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
class RequestTests(unittest.TestCase):
def setUp(self):
self.get = Request("http://www.python.org/~jeremy/")
self.post = Request("http://www.python.org/~jeremy/",
"data",
headers={"X-Test": "test"})
def test_method(self):
self.assertEqual("POST", self.post.get_method())
self.assertEqual("GET", self.get.get_method())
def test_add_data(self):
self.assertFalse(self.get.has_data())
self.assertEqual("GET", self.get.get_method())
self.get.add_data("spam")
self.assertTrue(self.get.has_data())
self.assertEqual("POST", self.get.get_method())
def test_get_full_url(self):
self.assertEqual("http://www.python.org/~jeremy/",
self.get.get_full_url())
def test_selector(self):
self.assertEqual("/~jeremy/", self.get.get_selector())
req = Request("http://www.python.org/")
self.assertEqual("/", req.get_selector())
def test_get_type(self):
self.assertEqual("http", self.get.get_type())
def test_get_host(self):
self.assertEqual("www.python.org", self.get.get_host())
def test_get_host_unquote(self):
req = Request("http://www.%70ython.org/")
self.assertEqual("www.python.org", req.get_host())
def test_proxy(self):
self.assertFalse(self.get.has_proxy())
self.get.set_proxy("www.perl.org", "http")
self.assertTrue(self.get.has_proxy())
self.assertEqual("www.python.org", self.get.get_origin_req_host())
self.assertEqual("www.perl.org", self.get.get_host())
def test_wrapped_url(self):
req = Request("<URL:http://www.python.org>")
self.assertEqual("www.python.org", req.get_host())
def test_urlwith_fragment(self):
req = Request("http://www.python.org/?qs=query#fragment=true")
self.assertEqual("/?qs=query", req.get_selector())
req = Request("http://www.python.org/#fun=true")
self.assertEqual("/", req.get_selector())
示例3: getURLInfo
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
def getURLInfo(self, url=None):
'''
@see: IURLInfoService.getURLInfo
'''
if not url: raise InputError('Invalid URL %s' % url)
assert isinstance(url, str), 'Invalid URL %s' % url
url = unquote(url)
try:
with urlopen(url) as conn:
urlInfo = URLInfo()
urlInfo.URL = url
urlInfo.Date = datetime.now()
contentType = None
charset = 'utf_8'
for tag, val in conn.info().items():
if tag == 'Content-Type':
contentTypeInfo = val.split(';')
contentType = contentTypeInfo[0].strip().lower();
if 2 == len(contentTypeInfo):
charset = contentTypeInfo[1].split('=')[1]
break
if not contentType or contentType != 'text/html':
req = Request(url)
selector = req.get_selector().strip('/')
if selector:
parts = selector.split('/')
if parts: urlInfo.Title = parts[len(parts) - 1]
else:
urlInfo.Title = req.get_host()
return urlInfo
elif contentType == 'text/html': urlInfo.ContentType = contentType
extr = HTMLInfoExtractor(urlInfo)
try:
readData = conn.read()
decodedData = ''
try:
decodedData = readData.decode(charset, 'ignore')
except Exception:
decodedData = readData.decode('utf_8', 'ignore')
for onePair in self.html_fixes:
decodedData = re.sub(onePair['from'], onePair['to'], decodedData)
extr.feed(decodedData)
except (AssertionError, HTMLParseError, UnicodeDecodeError): pass
return extr.urlInfo
except (URLError, ValueError): raise InputError('Invalid URL %s' % url)
示例4: test_urlwith_fragment
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
def test_urlwith_fragment(self):
req = Request("http://www.python.org/?qs=query#fragment=true")
self.assertEqual("/?qs=query", req.get_selector())
req = Request("http://www.python.org/#fun=true")
self.assertEqual("/", req.get_selector())
示例5: test_selector
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
def test_selector(self):
self.assertEqual("/~jeremy/", self.get.get_selector())
req = Request("http://www.python.org/")
self.assertEqual("/", req.get_selector())
示例6: _open_tag_file
# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
def _open_tag_file(mirror, suffix):
"""Download an apt tag file if needed, then open it."""
if not mirror.endswith('/'):
mirror += '/'
url = (mirror + "dists/" + dist + "/" + component + "/" + ftppath +
suffix)
req = Request(url)
filename = None
if req.get_type() != "file":
filename = "%s_%s_%s_%s" % (quote(mirror, safe=""), dist,
component, tagfile_type)
else:
# Make a more or less dummy filename for local URLs.
filename = os.path.split(req.get_selector())[0].replace(os.sep, "_")
fullname = os.path.join(dirname, filename)
if req.get_type() == "file":
# Always refresh. TODO: we should use If-Modified-Since for
# remote HTTP tag files.
try:
os.unlink(fullname)
except OSError:
pass
if not os.path.exists(fullname):
_progress("Downloading %s file ...", req.get_full_url())
compressed = os.path.join(dirname, filename + suffix)
try:
url_f = urlopen(req)
try:
with open(compressed, "wb") as compressed_f:
compressed_f.write(url_f.read())
finally:
url_f.close()
# apt_pkg is weird and won't accept GzipFile
if suffix:
_progress("Decompressing %s file ...",
req.get_full_url())
if suffix == ".gz":
import gzip
compressed_f = gzip.GzipFile(compressed)
elif suffix == ".bz2":
import bz2
compressed_f = bz2.BZ2File(compressed)
else:
raise RuntimeError("Unknown suffix '%s'" % suffix)
# This can be simplified once we can require Python
# 2.7, where gzip.GzipFile and bz2.BZ2File are
# context managers.
try:
with open(fullname, "wb") as f:
f.write(compressed_f.read())
f.flush()
finally:
compressed_f.close()
finally:
if suffix:
try:
os.unlink(compressed)
except OSError:
pass
return open(fullname, "r")