當前位置: 首頁>>代碼示例>>Python>>正文


Python URLopener.info方法代碼示例

本文整理匯總了Python中urllib.URLopener.info方法的典型用法代碼示例。如果您正苦於以下問題:Python URLopener.info方法的具體用法?Python URLopener.info怎麽用?Python URLopener.info使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在urllib.URLopener的用法示例。


在下文中一共展示了URLopener.info方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: read_component_sitemap

# 需要導入模塊: from urllib import URLopener [as 別名]
# 或者: from urllib.URLopener import info [as 別名]
    def read_component_sitemap(self, sitemapindex_uri, sitemap_uri, sitemap, sitemapindex_is_file):
        """Read a component sitemap of a Resource List with index

        Each component must be a sitemap with the 
        """
        if (sitemapindex_is_file):
            if (not self.is_file_uri(sitemap_uri)):
                # Attempt to map URI to local file
                remote_uri = sitemap_uri
                sitemap_uri = self.mapper.src_to_dst(remote_uri)
                self.logger.info("Mapped %s to local file %s" % (remote_uri, sitemap_uri))
            else:
                # The individual sitemaps should be at a URL (scheme/server/path)
                # that the sitemapindex URL can speak authoritatively about
                if (self.check_url_authority and
                    not UrlAuthority(sitemapindex_uri).has_authority_over(sitemap_uri)):
                    raise ListBaseIndexError("The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)" % (sitemapindex_uri,sitemap_uri))
        try:
            fh = URLopener().open(sitemap_uri)
            self.num_files += 1
        except IOError as e:
            raise ListBaseIndexError("Failed to load sitemap from %s listed in sitemap index %s (%s)" % (sitemap_uri,sitemapindex_uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info( "Reading sitemap from %s (%d bytes)" % (sitemap_uri,self.content_length) )
        component = sitemap.parse_xml( fh=fh, sitemapindex=False )
        # Copy resources into self, check any metadata
        for r in component:
            self.resources.add(r)
開發者ID:EHRI,項目名稱:resync,代碼行數:36,代碼來源:list_base_with_index.py

示例2: read

# 需要導入模塊: from urllib import URLopener [as 別名]
# 或者: from urllib.URLopener import info [as 別名]
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a 
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug( "Read %d bytes from %s" % (self.content_length,uri) )
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug( "Read ????? bytes from %s" % (uri) )
            pass
        self.logger.info( "Read sitemap/sitemapindex from %s" % (uri) )
        s = self.new_sitemap()
        s.parse_xml(fh=fh,resources=self,capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError("Got sitemapindex from %s but support for sitemapindex disabled" % (uri))
            self.logger.info( "Parsed as sitemapindex, %d sitemaps" % (len(self.resources)) )
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info( "Now reading %d sitemaps" % len(sitemaps.uris()) )
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(uri,sitemap_uri,s,sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info( "Parsed as sitemap, %d resources" % (len(self.resources)) )
開發者ID:EHRI,項目名稱:resync,代碼行數:50,代碼來源:list_base_with_index.py

示例3: read

# 需要導入模塊: from urllib import URLopener [as 別名]
# 或者: from urllib.URLopener import info [as 別名]
    def read(self, uri=None, resources=None):
        """Read sitemap from a URI including handling sitemapindexes

        Returns the inventory or changeset. If resources is not specified then
        it is assumed that an Inventory is to be read, pass in a ChangeSet object
        to read a changeset.

        Includes the subtlety that if the input URI is a local file and the 
        """
        try:
            fh = URLopener().open(uri)
        except IOError as e:
            raise Exception("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info( "Read sitemap/sitemapindex from %s" % (uri) )
        etree = parse(fh)
        # check root element: urlset (for sitemap), sitemapindex or bad
        self.sitemaps_created=0
        root = etree.getroot()
        # assume inventory but look to see whether this is a changeset 
        # as indicated with rs:type="changeset" on the root
        resources_class = self.inventory_class
        sitemap_xml_parser = self.inventory_parse_xml
        self.changeset_read = False
        root_type = root.attrib.get('{'+RS_NS+'}type',None)
        if (root_type is not None):
            if (root_type == 'changeset'):
                resources_class = self.changeset_class
                sitemap_xml_parser = self.changeset_parse_xml
                self.changeset_read = True
            else:
                self.logger.info("Bad value of rs:type on root element (%s), ignoring" % (root_type))
        # now have make sure we have a place to put the data we read
        if (resources is None):
            resources=resources_class()
        # sitemap or sitemapindex?
        if (root.tag == '{'+SITEMAP_NS+"}urlset"):
            self.logger.info( "Parsing as sitemap" )
            sitemap_xml_parser(etree=etree, resources=resources)
            self.sitemaps_created+=1
        elif (root.tag == '{'+SITEMAP_NS+"}sitemapindex"):
            if (not self.allow_multifile):
                raise Exception("Got sitemapindex from %s but support for sitemapindex disabled" % (uri))
            self.logger.info( "Parsing as sitemapindex" )
            sitemaps=self.sitemapindex_parse_xml(etree=etree)
            sitemapindex_is_file = self.is_file_uri(uri)
            # now loop over all entries to read each sitemap and add to resources
            self.logger.info( "Now reading %d sitemaps" % len(sitemaps) )
            for sitemap_uri in sorted(sitemaps.resources.keys()):
                if (sitemapindex_is_file):
                    if (not self.is_file_uri(sitemap_uri)):
                        # Attempt to map URI to local file
                        remote_uri = sitemap_uri
                        sitemap_uri = self.mapper.src_to_dst(remote_uri)
                else:
                    # The individual sitemaps should be at a URL (scheme/server/path)
                    # that the sitemapindex URL can speak authoritatively about
                    if (not UrlAuthority(uri).has_authority_over(sitemap_uri)):
                        raise Exception("The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)" % (uri,sitemap_uri))
                try:
                    fh = URLopener().open(sitemap_uri)
                except IOError as e:
                    raise Exception("Failed to load sitemap from %s listed in sitemap index %s (%s)" % (sitemap_uri,uri,str(e)))
                # Get the Content-Length if we can (works fine for local files)
                try:
                    self.content_length = int(fh.info()['Content-Length'])
                    self.bytes_read += self.content_length
                except KeyError:
                    # If we don't get a length then c'est la vie
                    pass
                self.logger.info( "Read sitemap from %s (%d)" % (sitemap_uri,self.content_length) )
                sitemap_xml_parser( fh=fh, resources=resources )
                self.sitemaps_created+=1
        else:
            raise ValueError("XML read from %s is not a sitemap or sitemapindex" % (sitemap_uri))
        return(resources)
開發者ID:semantalytics,項目名稱:simulator,代碼行數:84,代碼來源:sitemap.py

示例4: read

# 需要導入模塊: from urllib import URLopener [as 別名]
# 或者: from urllib.URLopener import info [as 別名]
    def read(self, uri=None, resources=None, changelist=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes

        Returns the resourcelist or changelist. If changelist is not specified (None)
        then it is assumed that an ResourceList is to be read, unless the XML
        indicates a Changelist.

        If changelist is True then a Changelist if expected; if changelist if False
        then an ResourceList is expected.

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the capabilities and metadata listed in the sitemapindex.

        Will set self.read_type to a string value sitemap/sitemapindex/changelist/changelistindex
        depleding on the type of the file expected/read.

        Includes the subtlety that if the input URI is a local file and is a 
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
        except IOError as e:
            raise Exception("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug( "Read %d bytes from %s" % (self.content_length,uri) )
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug( "Read ????? bytes from %s" % (uri) )
            pass
        self.logger.info( "Read sitemap/sitemapindex from %s" % (uri) )
        etree = parse(fh)
        # check root element: urlset (for sitemap), sitemapindex or bad
        self.sitemaps_created=0
        root = etree.getroot()
        # assume resourcelist but look to see whether this is a changelist 
        # as indicated with rs:type="changelist" on the root
        resources_class = self.resourcelist_class
        sitemap_xml_parser = self.resourcelist_parse_xml
        self.changelist_read = False
        self.read_type = 'sitemap'
        root_type = root.attrib.get('{'+RS_NS+'}type',None)
        if (root_type is not None):
            if (root_type == 'changelist'):
                self.changelist_read = True
            else:
                self.logger.info("Bad value of rs:type on root element (%s), ignoring" % (root_type))
        elif (changelist is True):
            self.changelist_read = True
        if (self.changelist_read):
            self.read_type = 'changelist'
            resources_class = self.changelist_class
            sitemap_xml_parser = self.changelist_parse_xml
        # now have make sure we have a place to put the data we read
        if (resources is None):
            resources=resources_class()
        # sitemap or sitemapindex?
        if (root.tag == '{'+SITEMAP_NS+"}urlset"):
            self.logger.info( "Parsing as sitemap" )
            sitemap_xml_parser(etree=etree, resources=resources)
            self.sitemaps_created+=1
        elif (root.tag == '{'+SITEMAP_NS+"}sitemapindex"):
            self.read_type += 'index'
            if (not self.allow_multifile):
                raise Exception("Got sitemapindex from %s but support for sitemapindex disabled" % (uri))
            self.logger.info( "Parsing as sitemapindex" )
            sitemaps=self.sitemapindex_parse_xml(etree=etree)
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                return(resources)
            # now loop over all entries to read each sitemap and add to resources
            self.logger.info( "Now reading %d sitemaps" % len(sitemaps) )
            for sitemap_uri in sorted(sitemaps.resources.keys()):
                if (sitemapindex_is_file):
                    if (not self.is_file_uri(sitemap_uri)):
                        # Attempt to map URI to local file
                        remote_uri = sitemap_uri
                        sitemap_uri = self.mapper.src_to_dst(remote_uri)
                else:
                    # The individual sitemaps should be at a URL (scheme/server/path)
                    # that the sitemapindex URL can speak authoritatively about
                    if (not UrlAuthority(uri).has_authority_over(sitemap_uri)):
                        raise Exception("The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)" % (uri,sitemap_uri))
                try:
                    fh = URLopener().open(sitemap_uri)
                except IOError as e:
                    raise Exception("Failed to load sitemap from %s listed in sitemap index %s (%s)" % (sitemap_uri,uri,str(e)))
                # Get the Content-Length if we can (works fine for local files)
                try:
                    self.content_length = int(fh.info()['Content-Length'])
                    self.bytes_read += self.content_length
                except KeyError:
                    # If we don't get a length then c'est la vie
                    pass
                self.logger.info( "Read sitemap from %s (%d)" % (sitemap_uri,self.content_length) )
                sitemap_xml_parser( fh=fh, resources=resources )
#.........這裏部分代碼省略.........
開發者ID:JordanReiter,項目名稱:resync,代碼行數:103,代碼來源:sitemap.py


注:本文中的urllib.URLopener.info方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。