当前位置: 首页>>代码示例>>Python>>正文


Python Request.get_selector方法代码示例

本文整理汇总了Python中urllib.request.Request.get_selector方法的典型用法代码示例。如果您正苦于以下问题:Python Request.get_selector方法的具体用法?Python Request.get_selector怎么用?Python Request.get_selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在urllib.request.Request的用法示例。


在下文中一共展示了Request.get_selector方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getURLInfo

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
    def getURLInfo(self, url=None):
        '''
        @see: IURLInfoService.getURLInfo
        '''
        if not url: raise InputError('Invalid URL %s' % url)
        assert isinstance(url, str), 'Invalid URL %s' % url
        url = unquote(url)

        try:
            with urlopen(url) as conn:
                urlInfo = URLInfo()
                urlInfo.URL = url
                urlInfo.Date = datetime.now()
                contentType = None
                for tag, val in conn.info().items():
                    if tag == 'Content-Type': contentType = val.split(';')[0].strip().lower(); break
                if not contentType or contentType != 'text/html':
                    req = Request(url)
                    selector = req.get_selector().strip('/')
                    if selector:
                        parts = selector.split('/')
                        if parts: urlInfo.Title = parts[len(parts) - 1]
                    else:
                        urlInfo.Title = req.get_host()
                    return urlInfo
                elif contentType == 'text/html': urlInfo.ContentType = contentType
                extr = HTMLInfoExtractor(urlInfo)
                try: extr.feed(conn.read().decode())
                except (AssertionError, HTMLParseError, UnicodeDecodeError): pass
                return extr.urlInfo
        except (URLError, ValueError): raise InputError('Invalid URL %s' % url)
开发者ID:Halfnhav4,项目名称:Superdesk,代码行数:33,代码来源:url_info.py

示例2: RequestTests

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
class RequestTests(unittest.TestCase):

    def setUp(self):
        self.get = Request("http://www.python.org/~jeremy/")
        self.post = Request("http://www.python.org/~jeremy/",
                            "data",
                            headers={"X-Test": "test"})

    def test_method(self):
        self.assertEqual("POST", self.post.get_method())
        self.assertEqual("GET", self.get.get_method())

    def test_add_data(self):
        self.assertFalse(self.get.has_data())
        self.assertEqual("GET", self.get.get_method())
        self.get.add_data("spam")
        self.assertTrue(self.get.has_data())
        self.assertEqual("POST", self.get.get_method())

    def test_get_full_url(self):
        self.assertEqual("http://www.python.org/~jeremy/",
                         self.get.get_full_url())

    def test_selector(self):
        self.assertEqual("/~jeremy/", self.get.get_selector())
        req = Request("http://www.python.org/")
        self.assertEqual("/", req.get_selector())

    def test_get_type(self):
        self.assertEqual("http", self.get.get_type())

    def test_get_host(self):
        self.assertEqual("www.python.org", self.get.get_host())

    def test_get_host_unquote(self):
        req = Request("http://www.%70ython.org/")
        self.assertEqual("www.python.org", req.get_host())

    def test_proxy(self):
        self.assertFalse(self.get.has_proxy())
        self.get.set_proxy("www.perl.org", "http")
        self.assertTrue(self.get.has_proxy())
        self.assertEqual("www.python.org", self.get.get_origin_req_host())
        self.assertEqual("www.perl.org", self.get.get_host())

    def test_wrapped_url(self):
        req = Request("<URL:http://www.python.org>")
        self.assertEqual("www.python.org", req.get_host())

    def test_urlwith_fragment(self):
        req = Request("http://www.python.org/?qs=query#fragment=true")
        self.assertEqual("/?qs=query", req.get_selector())
        req = Request("http://www.python.org/#fun=true")
        self.assertEqual("/", req.get_selector())
开发者ID:pogigroo,项目名称:py3k-__format__,代码行数:56,代码来源:test_urllib2.py

示例3: getURLInfo

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
    def getURLInfo(self, url=None):
        '''
        @see: IURLInfoService.getURLInfo
        '''
        if not url: raise InputError('Invalid URL %s' % url)
        assert isinstance(url, str), 'Invalid URL %s' % url
        url = unquote(url)

        try:
            with urlopen(url) as conn:
                urlInfo = URLInfo()
                urlInfo.URL = url
                urlInfo.Date = datetime.now()
                contentType = None
                charset = 'utf_8'
                for tag, val in conn.info().items():
                    if tag == 'Content-Type':
                        contentTypeInfo = val.split(';')
                        contentType = contentTypeInfo[0].strip().lower();
                        if 2 == len(contentTypeInfo):
                            charset = contentTypeInfo[1].split('=')[1]
                        break
                if not contentType or contentType != 'text/html':
                    req = Request(url)
                    selector = req.get_selector().strip('/')
                    if selector:
                        parts = selector.split('/')
                        if parts: urlInfo.Title = parts[len(parts) - 1]
                    else:
                        urlInfo.Title = req.get_host()
                    return urlInfo
                elif contentType == 'text/html': urlInfo.ContentType = contentType
                extr = HTMLInfoExtractor(urlInfo)
                try:
                    readData = conn.read()
                    decodedData = ''
                    try:
                        decodedData = readData.decode(charset, 'ignore')
                    except Exception:
                        decodedData = readData.decode('utf_8', 'ignore')
                    for onePair in self.html_fixes:
                        decodedData = re.sub(onePair['from'], onePair['to'], decodedData)
                    extr.feed(decodedData)
                except (AssertionError, HTMLParseError, UnicodeDecodeError): pass
                return extr.urlInfo
        except (URLError, ValueError): raise InputError('Invalid URL %s' % url)
开发者ID:AtomLaw,项目名称:Superdesk,代码行数:48,代码来源:url_info.py

示例4: test_urlwith_fragment

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
 def test_urlwith_fragment(self):
     req = Request("http://www.python.org/?qs=query#fragment=true")
     self.assertEqual("/?qs=query", req.get_selector())
     req = Request("http://www.python.org/#fun=true")
     self.assertEqual("/", req.get_selector())
开发者ID:pogigroo,项目名称:py3k-__format__,代码行数:7,代码来源:test_urllib2.py

示例5: test_selector

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
 def test_selector(self):
     self.assertEqual("/~jeremy/", self.get.get_selector())
     req = Request("http://www.python.org/")
     self.assertEqual("/", req.get_selector())
开发者ID:pogigroo,项目名称:py3k-__format__,代码行数:6,代码来源:test_urllib2.py

示例6: _open_tag_file

# 需要导入模块: from urllib.request import Request [as 别名]
# 或者: from urllib.request.Request import get_selector [as 别名]
        def _open_tag_file(mirror, suffix):
            """Download an apt tag file if needed, then open it."""
            if not mirror.endswith('/'):
                mirror += '/'
            url = (mirror + "dists/" + dist + "/" + component + "/" + ftppath +
                   suffix)
            req = Request(url)
            filename = None

            if req.get_type() != "file":
                filename = "%s_%s_%s_%s" % (quote(mirror, safe=""), dist,
                                            component, tagfile_type)
            else:
                # Make a more or less dummy filename for local URLs.
                filename = os.path.split(req.get_selector())[0].replace(os.sep, "_")

            fullname = os.path.join(dirname, filename)
            if req.get_type() == "file":
                # Always refresh.  TODO: we should use If-Modified-Since for
                # remote HTTP tag files.
                try:
                    os.unlink(fullname)
                except OSError:
                    pass
            if not os.path.exists(fullname):
                _progress("Downloading %s file ...", req.get_full_url())

                compressed = os.path.join(dirname, filename + suffix)
                try:
                    url_f = urlopen(req)
                    try:
                        with open(compressed, "wb") as compressed_f:
                            compressed_f.write(url_f.read())
                    finally:
                        url_f.close()

                    # apt_pkg is weird and won't accept GzipFile
                    if suffix:
                        _progress("Decompressing %s file ...",
                                  req.get_full_url())

                        if suffix == ".gz":
                            import gzip
                            compressed_f = gzip.GzipFile(compressed)
                        elif suffix == ".bz2":
                            import bz2
                            compressed_f = bz2.BZ2File(compressed)
                        else:
                            raise RuntimeError("Unknown suffix '%s'" % suffix)

                        # This can be simplified once we can require Python
                        # 2.7, where gzip.GzipFile and bz2.BZ2File are
                        # context managers.
                        try:
                            with open(fullname, "wb") as f:
                                f.write(compressed_f.read())
                                f.flush()
                        finally:
                            compressed_f.close()
                finally:
                    if suffix:
                        try:
                            os.unlink(compressed)
                        except OSError:
                            pass

            return open(fullname, "r")
开发者ID:luisgg,项目名称:germinate,代码行数:69,代码来源:archive.py


注:本文中的urllib.request.Request.get_selector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。