本文整理匯總了Python中oaipmh.client.Client類的典型用法代碼示例。如果您正苦於以下問題:Python Client類的具體用法?Python Client怎麽用?Python Client使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Client類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: arxiv_oai_scraper
def arxiv_oai_scraper(subject, start, end, sleep_time=0):
base_url = "http://export.arxiv.org/oai2"
output = list()
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(base_url, registry)
client.updateGranularity()
records = client.listRecords(metadataPrefix='oai_dc', set="{}".format(subject), from_=start, until=end)
for _, md, _ in records:
# print md.getField("title")
# checks for the case in 2010 when there is no title for something
if md is not None:
txt_dict = {"title": md["title"],
"abstract": md["description"],
"date": md["date"],
"subject": md["subject"],
"url": md["identifier"],
"authors": md['creator']}
output.append(txt_dict)
time.sleep(sleep_time)
return output
示例2: __init__
def __init__(
self, base_url, metadata_registry=None, applicationContext=None, dayGranularity=False, credentials=None
):
Client.__init__(self, base_url, metadata_registry, credentials)
SwissbibPreImportProcessor.__init__(self, applicationContext)
self._day_granularity = dayGranularity
# self.writeContext = writeContext
# was ist hier anders als bei Aleph!
if not self.context.getConfiguration().getIteratorOAIStructure() is None:
self.pIterSingleRecord = re.compile(
self.context.getConfiguration().getIteratorOAIStructure(), re.UNICODE | re.DOTALL | re.IGNORECASE
)
else:
self.pIterSingleRecord = re.compile("<record>.*?</record>", re.UNICODE | re.DOTALL | re.IGNORECASE)
# GH: 16.10.2015 this works for Nebis because we are looking for the outer 'shell' of all <record>...</record> not qualified with additional namespaces.
# we can use this for deleted as well as for full records. Compare example in exampleContentStructures/alma/deletedAndUpdatedRecords.xml
# with Aleph this isn't as easy.. .
# self.pIterSingleRecordNebis = re.compile('<record>.*?</record>',re.UNICODE | re.DOTALL | re.IGNORECASE)
self.pResumptionToken = re.compile(
"<resumptionToken.*?>(.{1,}?)</resumptionToken>", re.UNICODE | re.DOTALL | re.IGNORECASE
)
self.harvestingErrorPattern = re.compile(
"(<error.*?>.*?</error>|<html>.*?HTTP.*?Status.*?4\d\d)", re.UNICODE | re.DOTALL | re.IGNORECASE
)
示例3: list_oai_collections
def list_oai_collections(self, community):
""" Retrieve the header data for each record in the current community repo """
try:
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(community.repository.base_url, registry)
records = client.listIdentifiers(
metadataPrefix='oai_dc', set=community.identifier)
except:
community_collections = set()
return
""" Filter records to build list of collections in the community set """
community_collections = set()
for i in records:
for j in i.setSpec():
if j[:3] == 'col':
community_collections.add(j)
print len(community_collections)
""" Build collection tuples (identifier, name) """
for i in community_collections:
# print i
# print community_collections
set_data = []
set_data.append(i) # Store identifier
set_data.append('Collection: %s'%i) # Store human readable name
# print set_data
self.collections.append(set_data)
示例4: insertAll
def insertAll(time, time2):
registry = MetadataRegistry()
registry.registerReader('arXivRaw', arXivRaw_reader)
client = Client(URL, registry)
client.updateGranularity()
list = client.listRecords(metadataPrefix='arXivRaw', from_=time, until=time2)
errors = 0
for a in list:
#a = list.next()
try:
title = '\n'.join(a[1]['title'])
sr2 = str(' '.join(a[1]['categories']).replace('-','_')).split(' ')
abstract = '\n'.join(a[1]['abstract'])
url = 'http://arxiv.org/abs/' + a[1]['id'][0]
date = datetime.strptime(a[1]['created'][0], '%a, %d %b %Y %H:%M:%S %Z')
authors = a[1]['authors'][0]# '; '.join(a[1]['keynames'])
abstract = abstract + '\nBy: ' + authors + '\nIn: ' + ', '.join(sr2)
print title
print sr2
print abstract
print url
print date
print authors
insert(title + ' (' + authors + ')', str("fullarxiv"), url, abstract, date=date, cross_srs=sr2)
except:
print 'ERROR'
print a
errors = errors+1
print 'Completed with %s errors' % errors
示例5: scrape
def scrape(self):
raise Exception("not finished")
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
url = self.setting('pmh-endpoint')
client = Client(url, registry)
print " OAI Repository", url
print " Available sets:"
for s in client.listSets():
print " ", s
oai_set = self.setting('set')
oai_from = self.setting('from')
oai_until = self.setting('until')
kwargs = {}
if oai_set:
kwargs['set'] = oai_set
if oai_from is not None:
date_args = [int(arg) for arg in oai_from.split("-")]
kwargs['from_'] = datetime.datetime(*date_args)
if oai_until is not None:
date_args = [int(arg) for arg in oai_until.split("-")]
kwargs['until'] = datetime.datetime(*date_args)
records = [r for r in client.listRecords(metadataPrefix='oai_dc', **kwargs)]
data_filepath = os.path.join(self.work_dir(), self.setting('data-file'))
with open(data_filepath, 'wb') as f:
print " picking", len(records), "records"
pickle.dump(records, f)
示例6: getClient
def getClient(self):
if self.format == 'citeproc':
return FakeOaiClientForCrossref()
registry = MetadataRegistry()
client = Client(self.url, registry)
client.get_method = self.get_method
client._day_granularity = self.day_granularity
return client
示例7: list_sets
def list_sets(target):
if target is not None:
client = Client(target['url'], registry)
setspecs = client.listSets()
results = []
if setspecs is not None:
for setspec in setspecs:
results.append(convert_setspec(setspec))
return results
示例8: list_identifiers
def list_identifiers(target, date_from, date_until, setspec):
if target is not None:
client = Client(target['url'], registry)
headers = client.listIdentifiers(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until, set=setspec)
results = []
if headers is not None:
for header in headers:
results.append(convert_header(header))
return results
示例9: _listRecords
def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
# Generator to yield records from baseUrl in the given metadataPrefix
# Add metatdataPrefix to args
kwargs['metadataPrefix'] = metadataPrefix
client = Client(baseUrl, metadata_registry)
# Check server timestamp granularity support
client.updateGranularity()
for record in client.listRecords(**kwargs):
yield record
示例10: list_metadata_formats
def list_metadata_formats(target, identifier):
if target is not None:
client = Client(target['url'], registry)
metadata_formats = client.listMetadataFormats(identifier=identifier)
results = []
if metadata_formats is not None:
for metadata_format in metadata_formats:
results.append(convert_metadata_formats(metadata_format))
return results
示例11: list_records
def list_records(target, date_from, date_until, setspec):
if target is not None:
client = Client(target['url'], registry)
# todo : clean this, find simplified cases
if date_from is not None and date_until is not None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until, set=setspec)
elif date_from is not None and date_until is not None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until)
elif date_from is not None and date_until is None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, set=setspec)
elif date_from is None and date_until is not None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until, set=setspec)
elif date_from is not None and date_until is None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from)
elif date_from is None and date_until is not None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until)
elif date_from is None and date_until is None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], set=setspec)
elif date_from is None and date_until is None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'])
results = []
if records is not None:
results = []
for record in records:
results.append(convert_record(record, target['metadata_prefix'], target['title']))
return results
示例12: list_records
def list_records(target, date_from, date_until, setspec):
logging.debug("list_records")
if target is not None:
client = Client(target['url'], registry)
# todo : clean this, find simplified cases
if date_from is not None and date_until is not None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until, set=setspec)
elif date_from is not None and date_until is not None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until)
elif date_from is not None and date_until is None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, set=setspec)
elif date_from is None and date_until is not None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until, set=setspec)
elif date_from is not None and date_until is None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from)
elif date_from is None and date_until is not None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until)
elif date_from is None and date_until is None and setspec is not None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'], set=setspec)
elif date_from is None and date_until is None and setspec is None:
records = client.listRecords(metadataPrefix=target['metadata_prefix'])
if records is not None:
for record in records:
yield convert_record(record, target['metadata_prefix'], target['title'])
示例13: pull_data
def pull_data(source):
list_of_records = []
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
# Get list of public experiments at sources
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(source
+ "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
try:
exps_date = []
exps_metadata = []
for (header, meta, extra) in client.listRecords(metadataPrefix='oai_dc'):
exps_date.append(str(header._datestamp))
exps_metadata.append(meta)
logger.debug('Date=%s' % header._datestamp)
except AttributeError as e:
msg = "Error reading experiment %s" % e
logger.error(msg)
raise OAIPMHError(msg)
except error.NoRecordsMatchError as e:
msg = "no public records found on source %s" % e
logger.warn(msg)
return
exp_counter = 0
for exp_metadata in exps_metadata:
user_id = exp_metadata.getField('creator')[0]
user_profile = json.loads(_get_user(source, user_id))
data_tobe_indexed = dict(user_profile)
data_tobe_indexed['user_id'] = user_id
exp_id = exp_metadata.getField('identifier')[0]
description = exp_metadata.getField('description')[0]
title = exp_metadata.getField('title')[0]
if settings.EXPERIMENT_PATH[0] == '/':
settings.EXPERIMENT_PATH = settings.EXPERIMENT_PATH[1:]
experiment_url = os.path.join(source,
settings.EXPERIMENT_PATH % exp_id)
data_tobe_indexed['experiment_id'] = exp_id
data_tobe_indexed['experiment_title'] = title
data_tobe_indexed['experiment_description'] = description
data_tobe_indexed['experiment_url'] = experiment_url
data_tobe_indexed['id'] = experiment_url
data_tobe_indexed['experiment_date'] = exps_date[exp_counter]
exp_counter += 1
for k, v in data_tobe_indexed.items():
logger.debug('%s = %s' % (k, v))
logger.debug('')
list_of_records.append(json.dumps(data_tobe_indexed))
return list_of_records
示例14: test
def test(request):
URL = 'http://www.kulturarv.dk/ffrepox/OAIHandler'
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(URL, registry)
identifyResponse = client.identify()
print dir(identifyResponse)
#for record in client.listRecords(metadataPrefix='oai_dc'):
# result += record
return HttpResponse(identifyResponse.repositoryName())
示例15: harvest_oai_collection_records
def harvest_oai_collection_records(self, collection):
records = []
try:
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(collection.community.repository.base_url, registry)
records = client.listRecords(
metadataPrefix='oai_dc', set=collection.identifier)
except:
return
return records