当前位置: 首页>>代码示例>>Python>>正文


Python sitemap.Sitemap类代码示例

本文整理汇总了Python中resync.sitemap.Sitemap的典型用法代码示例。如果您正苦于以下问题:Python Sitemap类的具体用法?Python Sitemap怎么用?Python Sitemap使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Sitemap类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_state_published

    def get_state_published(self):
        """
        See if publish_dir has a zip end file. If so, return the path of the zip end file and the resourcelist
        (with local paths) of resources published in the zip end file.
        :return:    - the path to the zip end file or None if there is no zip end file.
                    - the resourcelist of resources published in zip end file or an empty list if there is no zip end file.
        """
        path_zip_end_old = None
        rl_end_old = ResourceList()

        zip_end_files = glob(os.path.join(self.publish_dir, PREFIX_END_PART + "*.zip"))
        if len(zip_end_files) > 1:
            raise RuntimeError(
                "Found more than one %s*.zip files. Inconsistent structure of %s." % (PREFIX_END_PART, self.publish_dir)
            )
        elif len(zip_end_files) == 1:
            path_zip_end_old = zip_end_files[0]

        if path_zip_end_old:
            rl_file = open(os.path.splitext(path_zip_end_old)[0] + ".xml", "r")
            sm = Sitemap()
            sm.parse_xml(rl_file, resources=rl_end_old)
            rl_file.close()

        return path_zip_end_old, rl_end_old
开发者ID:CLARIAH,项目名称:virtuoso-quad-log,代码行数:25,代码来源:zipsynchronizer.py

示例2: test_20_parse_sitemapindex_empty

 def test_20_parse_sitemapindex_empty(self):
     s = Sitemap()
     si = s.sitemapindex_parse_xml(
         fh=StringIO.StringIO('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> </sitemapindex>')
     )
     self.assertEqual(s.sitemaps_created, 0, "0 sitemaps in sitemapindex")
     self.assertEqual(len(si.resources), 0, "0 sitemaps")
开发者ID:pedak,项目名称:resdbp,代码行数:7,代码来源:test_sitemap.py

示例3: test3_with_md5

 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     i = ib.from_disk('resync/test/testdata/dir1','http://example.org/t')
     s = Sitemap()
     xml = s.inventory_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',xml), 'size/checksum for file_a')
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',xml), 'size/checksum for file_b' )
开发者ID:edsu,项目名称:resync-simulator,代码行数:7,代码来源:test_inventory_builder.py

示例4: test2_pretty_output

 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.resources_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
开发者ID:pedak,项目名称:sync-oai,代码行数:7,代码来源:test_inventory_builder.py

示例5: parse_document

 def parse_document(self):
     """Parse any ResourceSync document and show information
     
     Will use sitemap URI taken either from explicit self.sitemap_name
     or derived from the mappings supplied.
     """
     s=Sitemap()
     self.logger.info("Reading sitemap(s) from %s ..." % (self.sitemap))
     try:
         list = s.parse_xml(urllib.urlopen(self.sitemap))
     except IOError as e:
         raise ClientFatalError("Cannot read document (%s)" % str(e))
     num_entries = len(list.resources)
     capability = '(unknown capability)'
     if ('capability' in list.md):
         capability = list.md['capability']
     print "Parsed %s document with %d entries" % (capability,num_entries)
     if (self.verbose):
         to_show = 100
         override_str = ' (override with --max-sitemap-entries)'
         if (self.max_sitemap_entries):
             to_show = self.max_sitemap_entries
             override_str = ''
         if (num_entries>to_show):
             print "Showing first %d entries sorted by URI%s..." % (to_show,override_str)
         n=0
         for resource in list:
             print '[%d] %s' % (n,str(resource))
             n+=1
             if ( n >= to_show ):
                 break
开发者ID:semantalytics,项目名称:resync,代码行数:31,代码来源:client.py

示例6: read_reference_sitemap

    def read_reference_sitemap(self, ref_sitemap, name="reference"):
        """Read reference sitemap and return the inventory

        name parameter just uses in output messages to say what type
        of sitemap is being read.
        """
        sitemap = Sitemap(allow_multifile=self.allow_multifile, mapper=self.mapper)
        self.logger.info("Reading %s sitemap(s) from %s ..." % (name, ref_sitemap))
        i = sitemap.read(ref_sitemap)
        num_entries = len(i)
        self.logger.warning(
            "Read %s sitemap with %d entries in %d sitemaps" % (name, num_entries, sitemap.sitemaps_created)
        )
        if self.verbose:
            to_show = 100
            override_str = " (override with --max-sitemap-entries)"
            if self.max_sitemap_entries:
                to_show = self.max_sitemap_entries
                override_str = ""
            if num_entries > to_show:
                print "Showing first %d entries sorted by URI%s..." % (to_show, override_str)
            n = 0
            for r in i:
                print r
                n += 1
                if n >= to_show:
                    break
        return i
开发者ID:pedak,项目名称:sync-oai,代码行数:28,代码来源:client.py

示例7: write_static_inventory

 def write_static_inventory(self):
     """Writes the inventory to the filesystem"""
     # Generate sitemap in temp directory
     then = time.time()
     self.ensure_temp_dir(Source.TEMP_FILE_PATH)
     inventory = self.generate()
     basename = Source.TEMP_FILE_PATH + "/sitemap.xml"
     s=Sitemap()
     s.max_sitemap_entries=self.config['max_sitemap_entries']
     s.mapper=Mapper([self.source.base_uri, Source.TEMP_FILE_PATH])
     s.write(inventory, basename)
     # Delete old sitemap files; move the new ones; delete the temp dir
     self.rm_sitemap_files(Source.STATIC_FILE_PATH)
     self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH)
     shutil.rmtree(Source.TEMP_FILE_PATH)
     now = time.time()
     # Log Sitemap create start event
     sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH)
     log_data = {'time': (now-then), 
                 'no_resources': self.source.resource_count}
     self.logger.info("Wrote static sitemap inventory. %s" % log_data)
     sm_write_end = ResourceChange(
             resource = ResourceChange(self.uri, 
                             size=sitemap_size,
                             timestamp=then),
                             changetype = "UPDATED")
     self.source.notify_observers(sm_write_end)
开发者ID:pedak,项目名称:sync-oai,代码行数:27,代码来源:source.py

示例8: test2_pretty_output

 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.inventory_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
开发者ID:pedak,项目名称:resdbp,代码行数:7,代码来源:test_inventory_builder.py

示例9: explore_uri

    def explore_uri(self, uri, caps):
        """Interactive exploration of document at uri

        Will flag warnings if the document is not of type listed in caps
        """
        s=Sitemap()
        print "Reading %s" % (uri)
        try:
            list = s.parse_xml(urllib.urlopen(uri))
        except IOError as e:
            raise ClientFatalError("Cannot read %s (%s)" % (uri,str(e)))
        num_entries = len(list.resources)
        capability = '(unknown capability)'
        if ('capability' in list.md):
            capability = list.md['capability']
        if (s.parsed_index):
            capability += 'index'
        print "Parsed %s document with %d entries:" % (capability,num_entries)
        if (caps is not None and capability not in caps):
            print "WARNING - expected a %s document" % (','.join(caps))
        to_show = num_entries
        if (num_entries>21):
            to_show = 20
        # What entries are allowed? 
        # FIXME - not complete
        if (capability == 'capabilitylistindex'):
            entry_caps = ['capabilitylist']
        elif (capability == 'capabilitylist'):
            entry_caps = ['resourcelist','changelist','resourcedump','changedump','changelistindex']
        elif (capability == 'changelistindex'):
            entry_caps = ['changelist']
        n = 0
        options = {}
        for r in list.resources:
            if (n>=to_show):
                print "(not showing remaining %d entries)" % (num_entries-n)
                last
            n+=1
            options[str(n)]=r
            print "[%d] %s" % (n,r.uri)
            if (r.capability is not None):
                warning = ''
                if (r.capability not in entry_caps):
                    warning = " (EXPECTED %s)" % (' or '.join(entry_caps))
                print "  %s%s" % (r.capability,warning)
            elif (len(entry_caps)==1):
                r.capability=entry_caps[0]
                print "  capability not specified, should be %s" % (r.capability)
        while (True):
            inp = raw_input( "Follow [number or q(uit)]?" )
            if (inp in options.keys()):
                break
            if (inp == 'q'):
                return('','',inp)
        caps = [ options[inp].capability ]
        if (capability == 'capabilitylistindex'):
            # all links should be to capabilitylist documents
            if (caps is None):
                caps = ['capabilitylist']
        return( options[inp].uri, caps, inp )
开发者ID:semantalytics,项目名称:resync,代码行数:60,代码来源:client.py

示例10: test3_with_md5

 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     xml = s.resources_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',xml) ) #must escape + in md5
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',xml) )
开发者ID:pedak,项目名称:sync-oai,代码行数:8,代码来源:test_inventory_builder.py

示例11: publish_metadata

    def publish_metadata(self, new_zips, exluded_zip=None):
        """
        (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
        metadata.
        :param new_zips: a resourcelist with newly created zip resources
        :param exluded_zip: local path to zip file that will be removed from previously published metadata.
        """
        rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
        rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
        capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
        capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)

        rs_dump = ResourceDump()

        # Load existing resource-dump, if any. Else set start time.
        if os.path.isfile(rs_dump_path):
            with open(rs_dump_path, "r") as rs_dump_file:
                sm = Sitemap()
                sm.parse_xml(rs_dump_file, resources=rs_dump)

        else:
            rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
            rs_dump.link_set(rel="up", href=capa_list_url)

        # Remove excluded zip, if any
        if exluded_zip:
            loc = self.publish_url + os.path.basename(exluded_zip)
            if loc in rs_dump.resources:
                del rs_dump.resources[loc]
            else:
                raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path))

        # Add new zips
        for resource in new_zips:
            rs_dump.add(resource)

        # Write resource-dump.xml
        rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
        with open(rs_dump_path, "w") as rs_dump_file:
            rs_dump_file.write(rs_dump.as_xml())

        # There are several ways to decode base64, among them
        # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
        # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
        iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n")

        print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
        print "See %s" % rs_dump_url

        # Write capability-list.xml
        if not os.path.isfile(capa_list_path):
            capa_list = CapabilityList()
            capa_list.link_set(rel="up", href=self.src_desc_url)
            capa_list.add_capability(rs_dump, rs_dump_url)
            with open(capa_list_path, "w") as capa_list_file:
                capa_list_file.write(capa_list.as_xml())

            print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
开发者ID:CLARIAH,项目名称:virtuoso-quad-log,代码行数:58,代码来源:zipsynchronizer.py

示例12: test_11_parse_2

    def test_11_parse_2(self):
        xml = "<?xml version='1.0' encoding='UTF-8'?>\n\
<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://resourcesync.org/change/0.1\">\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36</lastmod><rs:size>12</rs:size></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36</lastmod><rs:size>32</rs:size></url>\
</urlset>"
        s = Sitemap()
        i = s.inventory_parse_xml(fh=StringIO.StringIO(xml))
        self.assertEqual(s.resources_created, 2, "got 2 resources")
开发者ID:pedak,项目名称:resdbp,代码行数:9,代码来源:test_sitemap.py

示例13: test_ex2_1

 def test_ex2_1(self):
     """ex2_1 is a simple resourcelist with 2 resources, no metadata"""
     s=Sitemap()
     fh=open('resync/test/testdata/examples_from_spec/ex2_1.xml')
     si = s.resourcelist_parse_xml( fh=fh )
     self.assertEqual( len(si.resources), 2, '2 resources')
     sms = sorted(si.resources.keys())
     self.assertEqual( sms, ['http://example.com/res1','http://example.com/res2'] )
     self.assertEqual( si.resources['http://example.com/res1'].lastmod, None )
开发者ID:JordanReiter,项目名称:resync,代码行数:9,代码来源:test_examples_from_spec.py

示例14: test_11_parse_2

    def test_11_parse_2(self):
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:size>12</rs:size></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:size>32</rs:size></url>\
</urlset>'
        s=Sitemap()
        i=s.resourcelist_parse_xml(fh=StringIO.StringIO(xml))
        self.assertEqual( s.resources_created, 2, 'got 2 resources')
开发者ID:JordanReiter,项目名称:resync,代码行数:9,代码来源:test_sitemap.py

示例15: test_22_parse_sitemapindex_file

 def test_22_parse_sitemapindex_file(self):
     s=Sitemap()
     fh=open('resync/test/testdata/sitemapindex1/sitemap.xml')
     si = s.sitemapindex_parse_xml( fh=fh )
     self.assertEqual( s.sitemaps_created, 3, '3 sitemaps in sitemapindex')
     self.assertEqual( len(si.resources), 3, '3 sitemaps')
     sms = sorted(si.resources.keys())
     self.assertEqual( sms, ['http://localhost:8888/sitemap00000.xml','http://localhost:8888/sitemap00001.xml','http://localhost:8888/sitemap00002.xml'] )
     self.assertEqual( si.resources['http://localhost:8888/sitemap00000.xml'].lastmod, '2012-06-13T18:09:13Z' )
开发者ID:JordanReiter,项目名称:resync,代码行数:9,代码来源:test_sitemap.py


注:本文中的resync.sitemap.Sitemap类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。