本文整理汇总了Python中oaipmh.client.Client.identify方法的典型用法代码示例。如果您正苦于以下问题:Python Client.identify方法的具体用法?Python Client.identify怎么用?Python Client.identify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类oaipmh.client.Client
的用法示例。
在下文中一共展示了Client.identify方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def test(request):
URL = 'http://www.kulturarv.dk/ffrepox/OAIHandler'
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(URL, registry)
identifyResponse = client.identify()
print dir(identifyResponse)
#for record in client.listRecords(metadataPrefix='oai_dc'):
# result += record
return HttpResponse(identifyResponse.repositoryName())
示例2: clean
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def clean(self):
cleaned_data = super(CreateRepositoryForm, self).clean()
try:
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(cleaned_data.get('base_url'), registry)
server = client.identify()
# set the repository name apply to model instance when saved.
cleaned_data['name'] = server.repositoryName()
except:
raise ValidationError('Repository base url is invalid.')
return cleaned_data
示例3: _listRecords
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
# Generator to yield records from baseUrl in the given metadataPrefix
# Add metatdataPrefix to args
kwargs['metadataPrefix'] = metadataPrefix
client = Client(baseUrl, self._mdRegistry)
# Check that baseUrl actually represents an OAI-PMH target
try:
client.identify()
except IndexError:
raise NotOAIPMHBaseURLException(
"{0} does not appear to be an OAI-PMH compatible base URL"
"".format(baseUrl)
)
# Check server timestamp granularity support
client.updateGranularity()
for record in client.listRecords(**kwargs):
# Unit test hotfix
header, metadata, about = record
# Fix pyoai returning a "b'...'" string for py3k
if isinstance(metadata, str) and metadata.startswith("b'"):
metadata = ast.literal_eval(metadata).decode("utf-8")
yield (header, metadata, about)
示例4: MetadataRegistry
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
URL = "http://citeseerx.ist.psu.edu/oai2"
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(URL, registry)
client.updateGranularity()
store = Store()
if len(sys.argv) > 1:
start = datetime.strptime(sys.argv[1], '%Y-%m-%d') #2011-10-27, for instance
elif store.last():
start = store.last()
else:
start = client.identify().earliestDatestamp()
#try this and see if it works; if it does resumption tokens right, this should work fine.
chunk = timedelta(days=1)
oneday = timedelta(days=1)
#TODO: clearly they don't do this whole "ordered" thing. Grab records by month or year or something instead of all at once.
#TODO: luckily, once we've done a full slurp, we only need to remember when the last full slurp was and start since then. But if interrupted, we need to start back from where the last *full* slurp was, due to the ordering problem.
#TODO: structure this better, with the try effectively moved much further above. Really, move a lot more into functions
try:
current = start #TODO: make a nice little generator so I can use a for loop
while current <= datetime.now():
print >>sys.stderr, "fetching records @", now(), "starting with", current.strftime('%Y-%m-%d')
示例5: run
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def run(self):
# Check that ElasticSearch is alive
self.check_index()
# If the user specified the --REBUILD flag, recreate the index
if self.options['rebuild']:
self.rebuild_index()
# Connect to the repository
registry = MetadataRegistry()
registry.registerReader(self.settings["metadata_format"], self.settings["metadata_reader"])
client = Client(self.settings["uri"], registry)
identity = client.identify()
print "Connected to repository: %s" % identity.repositoryName()
# got to update granularity or we barf with:
# oaipmh.error.BadArgumentError: Max granularity is YYYY-MM-DD:2003-04-10T00:00:00Z
client.updateGranularity()
# Initialise some variables
batcher = Batch.Batch()
total_records = 0
start = time.time()
# Now do the synchonisation
# If the user specified an identifier, then synchronise this record
if (self.options['identifier'] is not None):
total_records += self.synchronise_record(client, batcher, self.options['identifier'])
else:
# Else, synchronise using the date-range provided by the user, or failing that,
# the date-range based on the last sync
# Get the synchronisation config record
synchronisation_config = self.get_synchronisation_config()
if self.options["from_date"] is not None:
# If the user specified a from-date argument, use it
from_date = self.options["from_date"] # already a date (not a datetime)
elif synchronisation_config is not None and "to_date" in synchronisation_config:
# Else read the last synchronised to_date from the config, and add on a day
from_date = dateutil.parser.parse(synchronisation_config["to_date"]).date() + timedelta(days=1)
else:
# Else use the default_from_date in the config
from_date = dateutil.parser.parse(self.settings['default_from_date']).date()
if self.options["to_date"] is not None:
to_date = self.options["to_date"] # already a date (not a datetime)
else:
to_date = (date.today() - timedelta(days=1))
# Force the from_date to use time 00:00:00
from_date = datetime.combine(from_date, _time(hour=0, minute=0, second=0, microsecond=0))
# Force the to_date to use time 23:59:59
to_date = datetime.combine(to_date, _time(hour=23, minute=59, second=59, microsecond=0))
print "Synchronising from %s - %s" % (from_date, to_date)
while from_date < to_date:
next_date = datetime.combine(from_date.date() + timedelta(days=(self.settings['delta_days'] - 1)), _time(hour=23, minute=59, second=59, microsecond=0))
number_of_records = self.synchronise_period(client, batcher, from_date, next_date)
batcher.clear() #Store the records in elasticsearch
self.put_synchronisation_config(from_date, next_date, number_of_records)
from_date += timedelta(days=(self.settings['delta_days']))
total_records += number_of_records
# Pause so as not to get banned.
to = 20
print "Sleeping for %i seconds so as not to get banned." % to
time.sleep(to)
# Store the records in the index
batcher.clear()
# Print out some statistics
time_spent = time.time() - start
print 'Total time spent: %d seconds' % (time_spent)
if time_spent > 0.001: # careful as its not an integer
print 'Total records synchronised: %i records (%d records/second)' % (total_records, (total_records/time_spent))
else:
print 'Total records synchronised: %i records' % (total_records)
return total_records
sys.exit()
示例6: add_provider
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def add_provider(cxn, args):
"""Add a new provider to the registry database.
Process ``args`` to add a new provider to the registry database. Return 0
for success, 1 for failure (error message should be logged).
``cxn`` => instance of ``sqlite3.Connection``
``args`` => instance of ``argparse.Namespace``
"""
global logger, MAX_NAME_LENGTH
addlogger = logger.getChild('add')
# Validate name
if len(args.name) > MAX_NAME_LENGTH:
addlogger.critical('Short name for new provider must be no more than '
'{0} characters long'.format(MAX_NAME_LENGTH))
return 1
elif args.name.startswith(('http://', 'https://')) or args.name == 'all':
addlogger.critical('Short name for new provider may not be "all" nor '
'may it begin "http://" or "https://"')
return 1
# Try to create row now to avoid unnecessary validation if duplicate
try:
cxn.execute("INSERT INTO providers(name, lastHarvest) values "
"(?, ?)",
(args.name, datetime.fromtimestamp(0))
)
except sqlite3.IntegrityError:
addlogger.critical('Unable to add provider "{0}"; '
'provider with this name already exists'
''.format(args.name)
)
return 1
else:
addlogger.info('Adding provider "{0}"'.format(args.name))
# Get any missing information
# Base URL
if args.url is None:
args.url = raw_input('Base URL:'.ljust(20))
if not args.url:
addlogger.critical('Base URL for new provider not supplied')
return 1
# Set up an OAI-PMH client for validating providers
md_registry = MetadataRegistry()
md_registry.registerReader('oai_dc', oai_dc_reader)
client = Client(args.url, md_registry)
# Validate Base URL by fetching Identify
try:
client.identify()
except (XMLSyntaxError, HTTPError):
addlogger.critical('Base URL for new provider does not return a valid '
'response to an `Identify` request')
return 1
# Destination
if args.dest is None:
args.dest = raw_input('Destination directory: '.ljust(20))
if args.dest:
# Expand user dir
args.dest = os.path.expanduser(args.dest)
else:
addlogger.info('Destination for data for new provider not supplied'
' using default `pwd`: {0}'.format(os.getcwd())
)
args.dest = os.getcwd()
# metadataPrefix
# Check that selected metadataPrefix is available from provider
# Fetch list of available formats
mdps = dict((mdpinfo[0], mdpinfo[1:])
for mdpinfo in
client.listMetadataFormats())
while args.metadataPrefix not in mdps:
print "Available metadataPrefix values:"
# List available formats
for mdp in mdps:
print mdp, '-', mdps[mdp][1]
args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20))
if not args.metadataPrefix:
addlogger.info('metadataPrefix for new provider not supplied. '
'using default: oai_dc')
args.metadataPrefix = 'oai_dc'
cxn.execute("UPDATE providers SET "
"url=?, "
"destination=?, "
"metadataPrefix=? "
"WHERE name=?",
(args.url,
args.dest,
args.metadataPrefix,
args.name
)
)
addlogger.info('URL for next harvest: {0}?verb=ListRecords'
'&metadataPrefix={1}'
'&from={2:%Y-%m-%dT%H:%M:%SZ%z}'
''.format(args.url,
args.metadataPrefix,
datetime.fromtimestamp(0)
)
)
# All done, commit database
cxn.commit()
#.........这里部分代码省略.........
示例7: identifiy
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def identifiy(target):
if target is not None:
client = Client(target['url'], registry)
identify = client.identify()
return convert_identifiy(identify)
示例8: MetadataRegistry
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
'oai_oi': 'http://www.openbeelden.nl/feeds/oai/', #'http://www.openarchives.org/OAI/2.0/oai_oi/',
'oi': 'http://www.openbeelden.nl/oai/'}
)
#URL = 'http://www.openbeelden.nl/oip-test/feeds/oai/'
URL = 'http://www.openbeelden.nl/feeds/oai/'
#URL = 'http://oai.tuxic.nl/oai/'
#Initieer de OAI client
registry = MetadataRegistry()
registry.registerReader('oai_oi', oai_oi_reader)
client = Client(URL, registry)
x = client.updateGranularity()
#Controleer of de OAI service goed geidentificeerd kan worden
x = client.identify()
print 'identity %s' % x.repositoryName()
print 'identity %s' % x.protocolVersion()
print 'identity %s' % x.baseURL()
OUTPUT_DIR = '/Users/jblom/temp'
print 'Firing up the openSKOSHandler'
osh = OpenSKOSHandler()
def processOpenbeelden():
i=0
iarecs = []
#for y in client.listRecords(metadataPrefix='oai_oi', from_=parse('2011-01-01'), until=parse('2011-11-01')):
extent = None
示例9: MARCXMLReader
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
marcxml_reader = MARCXMLReader()
# Defining of metadata Readers in the Registry
from oaipmh import metadata
registry = metadata.MetadataRegistry()
registry.registerReader('oai_dc', metadata.oai_dc_reader)
registry.registerReader('marc21', marcxml_reader)
#### OAI-PMH Client processing
oai = Client('http://snape.mzk.cz/OAI-script', registry)
id = oai.identify()
print id.repositoryName()
print id.adminEmails()
print id.baseURL()
formats = oai.listMetadataFormats()
pprint formats
# 'marc21'
sets = oai.listSets()
for s in sets:
print s
# 'MZK03'
示例10: transfer_experiment
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def transfer_experiment(source):
"""
Pull public experiments from source into current mytardis.
"""
#TODO: Cleanup error messages
#TODO: does not transfer liences as not part of METS format.
#NOTE: As this is a pull we trust the data from the other tardis
# Check identity of the feed
from oaipmh.client import Client
from oaipmh import error
from oaipmh.metadata import MetadataRegistry, oai_dc_reader
from django.core.cache import cache
from django.utils.hashcompat import md5_constructor as md5
# The cache key consists of the task name and the MD5 digest
# of the feed URL.
cache_key = md5("token").hexdigest()
lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
LOCK_EXPIRE = 60 * 5
# cache.add fails if if the key already exists
acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
# memcache delete is very slow, but we have to use it to take
# advantage of using add() for atomic locking
release_lock = lambda: cache.delete(lock_id)
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
source_url = "%s/apps/oaipmh/?verb=Identify" % source
client = Client(source_url, registry)
try:
identify = client.identify()
except AttributeError as e:
msg = "Error reading repos identity: %s:%s" % (source, e)
logger.error(msg)
raise ReposReadError(msg)
except error.ErrorBase as e:
msg = "OAIPMH error: %s" % e
logger.error(msg)
raise OAIPMHError(msg)
except URLError as e:
logger.error(e)
raise
repos = identify.baseURL()
import urlparse
repos_url = urlparse.urlparse(repos)
dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
if dest_name != source:
msg = "Source directory reports incorrect name: %s" % dest_name
logger.error(msg)
raise BadAccessError(msg)
# Get list of public experiments at sources
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(source
+ "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
try:
exps_metadata = [meta
for (header, meta, extra)
in client.listRecords(metadataPrefix='oai_dc')]
except AttributeError as e:
msg = "Error reading experiment %s" % e
logger.error(msg)
raise OAIPMHError(msg)
except error.NoRecordsMatchError as e:
msg = "no public records found on source %s" % e
logger.warn(msg)
return
local_ids = []
for exp_metadata in exps_metadata:
exp_id = exp_metadata.getField('identifier')[0]
user = exp_metadata.getField('creator')[0]
found_user = _get_or_create_user(source, user)
#make sure experiment is publicish
try:
xmldata = getURL("%s/apps/reposproducer/expstate/%s/"
% (source, exp_id))
except HTTPError as e:
msg = "cannot get public state of experiment %s" % exp_id
logger.error(msg)
raise BadAccessError(msg)
try:
exp_state = json.loads(xmldata)
except ValueError as e:
msg = "cannot parse public state of experiment %s" % exp_id
logger.error(msg)
raise BadAccessError(msg)
if not exp_state in [Experiment.PUBLIC_ACCESS_FULL,
Experiment.PUBLIC_ACCESS_METADATA]:
msg = 'cannot ingest private experiments.' % exp_id
logger.error(msg)
raise BadAccessError(msg)
# Get the usernames of isOwner django_user ACLs for the experiment
try:
#.........这里部分代码省略.........
示例11: OpenBeeldenDataLoader
# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
#.........这里部分代码省略.........
'description': ('textList', 'oai_oi:oi/oi:description/text()'),
'abstract': ('textList', 'oai_oi:oi/oi:abstract/text()'),
'publisher': ('textList', 'oai_oi:oi/oi:publisher/text()'),
'contributor': ('textList', 'oai_oi:oi/oi:contributor/text()'),
'date': ('textList', 'oai_oi:oi/oi:date/text()'),
'type': ('textList', 'oai_oi:oi/oi:type/text()'),
'extent': ('textList', 'oai_oi:oi/oi:extent/text()'),
'medium': ('textList', 'oai_oi:oi/oi:medium/text()'),
'identifier': ('textList', 'oai_oi:oi/oi:identifier/text()'),
'source': ('textList', 'oai_oi:oi/oi:source/text()'),
'language': ('textList', 'oai_oi:oi/oi:language/text()'),
'references': ('textList', 'oai_oi:oi/oi:references/text()'),
'spatial': ('textList', 'oai_oi:oi/oi:spatial/text()'),
'attributionName': ('textList', 'oai_oi:oi/oi:attributionName/text()'),
'attributionURL': ('textList', 'oai_oi:oi/oi:attributionURL/text()'),
'license': ('textList', 'oai_oi:oi/oi:license/text()')
},
namespaces={
'oai_oi': 'http://www.openbeelden.nl/feeds/oai/', #'http://www.openarchives.org/OAI/2.0/oai_oi/',
'oi': 'http://www.openbeelden.nl/oai/'
}
)
URL = 'http://www.openbeelden.nl/feeds/oai/'
#Initialize the OAI client
self.registry = MetadataRegistry()
self.registry.registerReader('oai_oi', oai_oi_reader)
self.client = Client(URL, self.registry)
#Test if the connection to the OAI-PMH provider works
x = self.client.updateGranularity()
x = self.client.identify()
print 'identity %s' % x.repositoryName()
print 'identity %s' % x.protocolVersion()
print 'identity %s' % x.baseURL()
"""
for s in client.listSets():
print s
"""
#initialize the OpenSKOSHandler
self.openSKOSHandler = OpenSKOSHandler()
def reindex(self, provider = None):
setupOAIPMHConnection()
i = 0
extent = None
item = None
identifier = None
for rec in self.client.listRecords(metadataPrefix=u'oai_oi', set=u'beeldengeluid'):#stichting_natuurbeelden, beeldengeluid
header, metadata, about = rec
extent = metadata.getField('extent')[0]
item = {
'id' : header.identifier(),
'identifier' : self.getFieldData(metadata, 'identifier'),
'title' : self.getFieldData(metadata, 'title'),
'alternative' : self.getFieldData(metadata, 'alternative'),
'creator' : self.getFieldData(metadata, 'creator'),
'subject' : self.getFieldData(metadata, 'subject'),
'description' : self.getFieldData(metadata, 'description'),
'abstract' : self.getFieldData(metadata, 'abstract'),
'publisher' : self.getFieldData(metadata, 'publisher'),