本文整理匯總了Python中oaipmh.client.Client.handleVerb方法的典型用法代碼示例。如果您正苦於以下問題:Python Client.handleVerb方法的具體用法?Python Client.handleVerb怎麽用?Python Client.handleVerb使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類oaipmh.client.Client
的用法示例。
在下文中一共展示了Client.handleVerb方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: OAIPMHScraper
# 需要導入模塊: from oaipmh.client import Client [as 別名]
# 或者: from oaipmh.client.Client import handleVerb [as 別名]
class OAIPMHScraper(object):
"""Class to scrape the metadata from an OAIPMH target (given a metadataPrefix) and store the resultant sections in a Pairtree store as XML files. To get more than this, it is expected to subclass this class and overwrite the 'process(...)' method through which all the sections pass through."""
def __init__(self, storage_dir, base_oai_url=None, identifier_uri_prefix=None):
self.store = Silo(storage_dir, uri_base=base_oai_url,
base_oai_url=base_oai_url, identifier_uri_prefix=identifier_uri_prefix)
self.state = PersistentState(storage_dir, "oaipmh_harvester.json")
self._init_clients()
def logactivity(self, **kw):
try:
jsonmsg = simplejson.dumps(kw)
logger.debug(jsonmsg)
return jsonmsg
except:
logger.info("Failed to serialise as JSON using simplejson: %s" % msg)
def _init_clients(self):
try:
self._c = OaipmhClient(self.store.state['base_oai_url']) #, metadata_registry = dumbMetadataRegistry)
self.identify()
except OSError:
logger.error("Cannot make OAIPMH client")
raise Exception("Cannot make OAIPMH client")
def identify(self, refresh_cache=False):
if self.state.has_key("identify") and not refresh_cache:
return self.state['identify']
else:
i = self._c.handleVerb("Identify", {})
identify = {}
identify['repositoryName'] = i._repositoryName
identify['baseURL'] = i._baseURL
identify['protocolVersion'] = i._protocolVersion
identify['adminEmails'] = i._adminEmails
identify['earliestDatestamp'] = str(i._earliestDatestamp)
identify['deletedRecord'] = i._deletedRecord
identify['granularity'] = i._granularity
identify['compression'] = i._compression
identify['descriptions'] = i._descriptions
self.state['identify'] = identify
self.state['lastidentified'] = datetime.now().isoformat()
self.state.sync()
return self.state['identify']
def getSets(self, refresh_cache=False):
if self.state.has_key("sets") and not refresh_cache:
return self.state["sets"]
else:
sets_gen = self._c.handleVerb("ListSets", {})
sets = {}
for set_tuple in sets_gen:
sets[set_tuple[0]] = set_tuple[1:]
self.state["sets"] = sets
self.state.sync()
return sets
def getMetadataPrefixes(self, refresh_cache=False):
if self.state.has_key("metadataPrefixes") and not refresh_cache:
return self.state['metadataPrefixes']
else:
metadataPrefixes = self._c.handleVerb("ListMetadataFormats", {})
self.state['metadataPrefixes'] = dict([ (a,(b,c)) for (a,b,c) in metadataPrefixes])
self.state['lastcheckmetadataformats'] = datetime.now().isoformat()[:19] # YYYY-mm-DDTHH:MM:ss
self.state.sync()
return self.state['metadataPrefixes']
def getIdentifiers(self, update=False, set_id=None):
args = {'metadataPrefix':'oai_dc'}
if not self.state.has_key('harvests'):
self.state['harvests'] = []
if set_id:
self.state['set'] = set_id
self.state['harvests'].append(datetime.now().isoformat()[:19]) # YYYY-mm-DDTHH:MM:ss
self.state.sync()
if self.state.has_key("set"):
args['set'] = self.state['set']
for header in self._c.handleVerb("ListIdentifiers", args):
pid = header.identifier()
date=header.datestamp().isoformat()
logger.info("Found identifier %s - adding header metadata to harvested record" % pid)
obj = self.store.get_item(pid, date)
obj.metadata['identifier'] = pid
obj.metadata['firstSeen'] = date
obj.metadata['setSpec'] = header.setSpec()
if header.isDeleted():
obj.metadata['deleted_at_version'] = obj.currentversion
obj.metadata['deleted_at_date'] = date
logger.info("Object with identifier: %s has an isDeleted flag." % (pid))
yield (pid, date)
@mdprefixcheck
def getRecords(self, metadataPrefix=None, update=False, set_id=None, _from=None, _until=None):
if metadataPrefix in self.state['metadataPrefixes']:
# if not global_metadata_registry.hasReader(metadataPrefix):
class DumbReader(object):
def __call__(self, element):
return element
global_metadata_registry.registerReader(metadataPrefix, DumbReader())
args = {'metadataPrefix':metadataPrefix}
if _from:
#.........這裏部分代碼省略.........