本文整理汇总了Python中resync.sitemap.Sitemap类的典型用法代码示例。如果您正苦于以下问题:Python Sitemap类的具体用法?Python Sitemap怎么用?Python Sitemap使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Sitemap类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_state_published
def get_state_published(self):
"""
See if publish_dir has a zip end file. If so, return the path of the zip end file and the resourcelist
(with local paths) of resources published in the zip end file.
:return: - the path to the zip end file or None if there is no zip end file.
- the resourcelist of resources published in zip end file or an empty list if there is no zip end file.
"""
path_zip_end_old = None
rl_end_old = ResourceList()
zip_end_files = glob(os.path.join(self.publish_dir, PREFIX_END_PART + "*.zip"))
if len(zip_end_files) > 1:
raise RuntimeError(
"Found more than one %s*.zip files. Inconsistent structure of %s." % (PREFIX_END_PART, self.publish_dir)
)
elif len(zip_end_files) == 1:
path_zip_end_old = zip_end_files[0]
if path_zip_end_old:
rl_file = open(os.path.splitext(path_zip_end_old)[0] + ".xml", "r")
sm = Sitemap()
sm.parse_xml(rl_file, resources=rl_end_old)
rl_file.close()
return path_zip_end_old, rl_end_old
示例2: test_20_parse_sitemapindex_empty
def test_20_parse_sitemapindex_empty(self):
s = Sitemap()
si = s.sitemapindex_parse_xml(
fh=StringIO.StringIO('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> </sitemapindex>')
)
self.assertEqual(s.sitemaps_created, 0, "0 sitemaps in sitemapindex")
self.assertEqual(len(si.resources), 0, "0 sitemaps")
示例3: test3_with_md5
def test3_with_md5(self):
ib = InventoryBuilder(do_md5=True)
i = ib.from_disk('resync/test/testdata/dir1','http://example.org/t')
s = Sitemap()
xml = s.inventory_as_xml(i)
self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',xml), 'size/checksum for file_a')
self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',xml), 'size/checksum for file_b' )
示例4: test2_pretty_output
def test2_pretty_output(self):
ib = InventoryBuilder()
ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
i = ib.from_disk()
s = Sitemap()
s.pretty_xml=True
self.assertEqual(s.resources_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
示例5: parse_document
def parse_document(self):
"""Parse any ResourceSync document and show information
Will use sitemap URI taken either from explicit self.sitemap_name
or derived from the mappings supplied.
"""
s=Sitemap()
self.logger.info("Reading sitemap(s) from %s ..." % (self.sitemap))
try:
list = s.parse_xml(urllib.urlopen(self.sitemap))
except IOError as e:
raise ClientFatalError("Cannot read document (%s)" % str(e))
num_entries = len(list.resources)
capability = '(unknown capability)'
if ('capability' in list.md):
capability = list.md['capability']
print "Parsed %s document with %d entries" % (capability,num_entries)
if (self.verbose):
to_show = 100
override_str = ' (override with --max-sitemap-entries)'
if (self.max_sitemap_entries):
to_show = self.max_sitemap_entries
override_str = ''
if (num_entries>to_show):
print "Showing first %d entries sorted by URI%s..." % (to_show,override_str)
n=0
for resource in list:
print '[%d] %s' % (n,str(resource))
n+=1
if ( n >= to_show ):
break
示例6: read_reference_sitemap
def read_reference_sitemap(self, ref_sitemap, name="reference"):
"""Read reference sitemap and return the inventory
name parameter just uses in output messages to say what type
of sitemap is being read.
"""
sitemap = Sitemap(allow_multifile=self.allow_multifile, mapper=self.mapper)
self.logger.info("Reading %s sitemap(s) from %s ..." % (name, ref_sitemap))
i = sitemap.read(ref_sitemap)
num_entries = len(i)
self.logger.warning(
"Read %s sitemap with %d entries in %d sitemaps" % (name, num_entries, sitemap.sitemaps_created)
)
if self.verbose:
to_show = 100
override_str = " (override with --max-sitemap-entries)"
if self.max_sitemap_entries:
to_show = self.max_sitemap_entries
override_str = ""
if num_entries > to_show:
print "Showing first %d entries sorted by URI%s..." % (to_show, override_str)
n = 0
for r in i:
print r
n += 1
if n >= to_show:
break
return i
示例7: write_static_inventory
def write_static_inventory(self):
"""Writes the inventory to the filesystem"""
# Generate sitemap in temp directory
then = time.time()
self.ensure_temp_dir(Source.TEMP_FILE_PATH)
inventory = self.generate()
basename = Source.TEMP_FILE_PATH + "/sitemap.xml"
s=Sitemap()
s.max_sitemap_entries=self.config['max_sitemap_entries']
s.mapper=Mapper([self.source.base_uri, Source.TEMP_FILE_PATH])
s.write(inventory, basename)
# Delete old sitemap files; move the new ones; delete the temp dir
self.rm_sitemap_files(Source.STATIC_FILE_PATH)
self.mv_sitemap_files(Source.TEMP_FILE_PATH, Source.STATIC_FILE_PATH)
shutil.rmtree(Source.TEMP_FILE_PATH)
now = time.time()
# Log Sitemap create start event
sitemap_size = self.compute_sitemap_size(Source.STATIC_FILE_PATH)
log_data = {'time': (now-then),
'no_resources': self.source.resource_count}
self.logger.info("Wrote static sitemap inventory. %s" % log_data)
sm_write_end = ResourceChange(
resource = ResourceChange(self.uri,
size=sitemap_size,
timestamp=then),
changetype = "UPDATED")
self.source.notify_observers(sm_write_end)
示例8: test2_pretty_output
def test2_pretty_output(self):
ib = InventoryBuilder()
ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
i = ib.from_disk()
s = Sitemap()
s.pretty_xml=True
self.assertEqual(s.inventory_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
示例9: explore_uri
def explore_uri(self, uri, caps):
"""Interactive exploration of document at uri
Will flag warnings if the document is not of type listed in caps
"""
s=Sitemap()
print "Reading %s" % (uri)
try:
list = s.parse_xml(urllib.urlopen(uri))
except IOError as e:
raise ClientFatalError("Cannot read %s (%s)" % (uri,str(e)))
num_entries = len(list.resources)
capability = '(unknown capability)'
if ('capability' in list.md):
capability = list.md['capability']
if (s.parsed_index):
capability += 'index'
print "Parsed %s document with %d entries:" % (capability,num_entries)
if (caps is not None and capability not in caps):
print "WARNING - expected a %s document" % (','.join(caps))
to_show = num_entries
if (num_entries>21):
to_show = 20
# What entries are allowed?
# FIXME - not complete
if (capability == 'capabilitylistindex'):
entry_caps = ['capabilitylist']
elif (capability == 'capabilitylist'):
entry_caps = ['resourcelist','changelist','resourcedump','changedump','changelistindex']
elif (capability == 'changelistindex'):
entry_caps = ['changelist']
n = 0
options = {}
for r in list.resources:
if (n>=to_show):
print "(not showing remaining %d entries)" % (num_entries-n)
last
n+=1
options[str(n)]=r
print "[%d] %s" % (n,r.uri)
if (r.capability is not None):
warning = ''
if (r.capability not in entry_caps):
warning = " (EXPECTED %s)" % (' or '.join(entry_caps))
print " %s%s" % (r.capability,warning)
elif (len(entry_caps)==1):
r.capability=entry_caps[0]
print " capability not specified, should be %s" % (r.capability)
while (True):
inp = raw_input( "Follow [number or q(uit)]?" )
if (inp in options.keys()):
break
if (inp == 'q'):
return('','',inp)
caps = [ options[inp].capability ]
if (capability == 'capabilitylistindex'):
# all links should be to capabilitylist documents
if (caps is None):
caps = ['capabilitylist']
return( options[inp].uri, caps, inp )
示例10: test3_with_md5
def test3_with_md5(self):
ib = InventoryBuilder(do_md5=True)
ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
i = ib.from_disk()
s = Sitemap()
xml = s.resources_as_xml(i)
self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',xml) ) #must escape + in md5
self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',xml) )
示例11: publish_metadata
def publish_metadata(self, new_zips, exluded_zip=None):
"""
(Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
metadata.
:param new_zips: a resourcelist with newly created zip resources
:param exluded_zip: local path to zip file that will be removed from previously published metadata.
"""
rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)
rs_dump = ResourceDump()
# Load existing resource-dump, if any. Else set start time.
if os.path.isfile(rs_dump_path):
with open(rs_dump_path, "r") as rs_dump_file:
sm = Sitemap()
sm.parse_xml(rs_dump_file, resources=rs_dump)
else:
rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
rs_dump.link_set(rel="up", href=capa_list_url)
# Remove excluded zip, if any
if exluded_zip:
loc = self.publish_url + os.path.basename(exluded_zip)
if loc in rs_dump.resources:
del rs_dump.resources[loc]
else:
raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path))
# Add new zips
for resource in new_zips:
rs_dump.add(resource)
# Write resource-dump.xml
rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
with open(rs_dump_path, "w") as rs_dump_file:
rs_dump_file.write(rs_dump.as_xml())
# There are several ways to decode base64, among them
# iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
# iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n")
print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
print "See %s" % rs_dump_url
# Write capability-list.xml
if not os.path.isfile(capa_list_path):
capa_list = CapabilityList()
capa_list.link_set(rel="up", href=self.src_desc_url)
capa_list.add_capability(rs_dump, rs_dump_url)
with open(capa_list_path, "w") as capa_list_file:
capa_list_file.write(capa_list.as_xml())
print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
示例12: test_11_parse_2
def test_11_parse_2(self):
xml = "<?xml version='1.0' encoding='UTF-8'?>\n\
<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://resourcesync.org/change/0.1\">\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36</lastmod><rs:size>12</rs:size></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36</lastmod><rs:size>32</rs:size></url>\
</urlset>"
s = Sitemap()
i = s.inventory_parse_xml(fh=StringIO.StringIO(xml))
self.assertEqual(s.resources_created, 2, "got 2 resources")
示例13: test_ex2_1
def test_ex2_1(self):
"""ex2_1 is a simple resourcelist with 2 resources, no metadata"""
s=Sitemap()
fh=open('resync/test/testdata/examples_from_spec/ex2_1.xml')
si = s.resourcelist_parse_xml( fh=fh )
self.assertEqual( len(si.resources), 2, '2 resources')
sms = sorted(si.resources.keys())
self.assertEqual( sms, ['http://example.com/res1','http://example.com/res2'] )
self.assertEqual( si.resources['http://example.com/res1'].lastmod, None )
示例14: test_11_parse_2
def test_11_parse_2(self):
xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:size>12</rs:size></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:size>32</rs:size></url>\
</urlset>'
s=Sitemap()
i=s.resourcelist_parse_xml(fh=StringIO.StringIO(xml))
self.assertEqual( s.resources_created, 2, 'got 2 resources')
示例15: test_22_parse_sitemapindex_file
def test_22_parse_sitemapindex_file(self):
s=Sitemap()
fh=open('resync/test/testdata/sitemapindex1/sitemap.xml')
si = s.sitemapindex_parse_xml( fh=fh )
self.assertEqual( s.sitemaps_created, 3, '3 sitemaps in sitemapindex')
self.assertEqual( len(si.resources), 3, '3 sitemaps')
sms = sorted(si.resources.keys())
self.assertEqual( sms, ['http://localhost:8888/sitemap00000.xml','http://localhost:8888/sitemap00001.xml','http://localhost:8888/sitemap00002.xml'] )
self.assertEqual( si.resources['http://localhost:8888/sitemap00000.xml'].lastmod, '2012-06-13T18:09:13Z' )