当前位置: 首页>>代码示例>>Python>>正文


Python Client.identify方法代码示例

本文整理汇总了Python中oaipmh.client.Client.identify方法的典型用法代码示例。如果您正苦于以下问题:Python Client.identify方法的具体用法?Python Client.identify怎么用?Python Client.identify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在oaipmh.client.Client的用法示例。


在下文中一共展示了Client.identify方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def test(request):
	URL = 'http://www.kulturarv.dk/ffrepox/OAIHandler'
	registry = MetadataRegistry()
	registry.registerReader('oai_dc', oai_dc_reader)
	client = Client(URL, registry)
	identifyResponse = client.identify()

	print dir(identifyResponse)
	#for record in client.listRecords(metadataPrefix='oai_dc'):
	#	result += record
	return HttpResponse(identifyResponse.repositoryName())
开发者ID:troels,项目名称:datafest,代码行数:13,代码来源:views.py

示例2: clean

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
    def clean(self):
        cleaned_data = super(CreateRepositoryForm, self).clean()
        try:
            registry = MetadataRegistry()
            registry.registerReader('oai_dc', oai_dc_reader)
            client = Client(cleaned_data.get('base_url'), registry)
            server = client.identify()
            # set the repository name apply to model instance when saved.
            cleaned_data['name'] = server.repositoryName()
        except:
            raise ValidationError('Repository base url is invalid.')

        return cleaned_data
开发者ID:llcit,项目名称:uh-dla-dev-py,代码行数:15,代码来源:forms.py

示例3: _listRecords

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
 def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
     # Generator to yield records from baseUrl in the given metadataPrefix
     # Add metatdataPrefix to args
     kwargs['metadataPrefix'] = metadataPrefix
     client = Client(baseUrl, self._mdRegistry)
     # Check that baseUrl actually represents an OAI-PMH target
     try:
         client.identify()
     except IndexError:
         raise NotOAIPMHBaseURLException(
             "{0} does not appear to be an OAI-PMH compatible base URL"
             "".format(baseUrl)
         )
     # Check server timestamp granularity support
     client.updateGranularity()
     for record in client.listRecords(**kwargs):
         # Unit test hotfix
         header, metadata, about = record
         # Fix pyoai returning a "b'...'" string for py3k
         if isinstance(metadata, str) and metadata.startswith("b'"):
             metadata = ast.literal_eval(metadata).decode("utf-8")
         yield (header, metadata, about)
开发者ID:bloomonkey,项目名称:oai-harvest,代码行数:24,代码来源:harvest.py

示例4: MetadataRegistry

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
URL = "http://citeseerx.ist.psu.edu/oai2"

registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)

client = Client(URL, registry)
client.updateGranularity()

store = Store()

if len(sys.argv) > 1:
    start = datetime.strptime(sys.argv[1], '%Y-%m-%d') #2011-10-27, for instance
elif store.last():
    start = store.last()
else:
    start = client.identify().earliestDatestamp()

#try this and see if it works; if it does resumption tokens right, this should work fine.


chunk = timedelta(days=1)
oneday = timedelta(days=1)

#TODO: clearly they don't do this whole "ordered" thing. Grab records by month or year or something instead of all at once.
#TODO: luckily, once we've done a full slurp, we only need to remember when the last full slurp was and start since then. But if interrupted, we need to start back from where the last *full* slurp was, due to the ordering problem.

#TODO: structure this better, with the try effectively moved much further above. Really, move a lot more into functions
try:
    current = start #TODO: make a nice little generator so I can use a for loop
    while current <= datetime.now():
        print >>sys.stderr, "fetching records @", now(), "starting with", current.strftime('%Y-%m-%d')
开发者ID:fugu13,项目名称:citerec,代码行数:33,代码来源:harvest.py

示例5: run

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
    def run(self):
        # Check that ElasticSearch is alive
        self.check_index()

        # If the user specified the --REBUILD flag, recreate the index
        if self.options['rebuild']:
            self.rebuild_index()

        # Connect to the repository
        registry = MetadataRegistry()
        registry.registerReader(self.settings["metadata_format"], self.settings["metadata_reader"])

        client = Client(self.settings["uri"], registry)
        identity = client.identify()

        print "Connected to repository: %s" % identity.repositoryName()

        # got to update granularity or we barf with: 
        # oaipmh.error.BadArgumentError: Max granularity is YYYY-MM-DD:2003-04-10T00:00:00Z
        client.updateGranularity()

        # Initialise some variables
        batcher = Batch.Batch()
        total_records = 0
        start = time.time()
        
        # Now do the synchonisation
        
        # If the user specified an identifier, then synchronise this record
        if (self.options['identifier'] is not None):
            total_records += self.synchronise_record(client, batcher, self.options['identifier'])
        else:
            # Else, synchronise using the date-range provided by the user, or failing that, 
            # the date-range based on the last sync

            # Get the synchronisation config record
            synchronisation_config = self.get_synchronisation_config()

            
            if self.options["from_date"] is not None:
                # If the user specified a from-date argument, use it
                from_date = self.options["from_date"] # already a date (not a datetime)
            elif synchronisation_config is not None and "to_date" in synchronisation_config:
                # Else read the last synchronised to_date from the config, and add on a day
                from_date = dateutil.parser.parse(synchronisation_config["to_date"]).date() + timedelta(days=1)
            else:
                # Else use the default_from_date in the config
                from_date = dateutil.parser.parse(self.settings['default_from_date']).date()

            if self.options["to_date"] is not None:
                to_date = self.options["to_date"] # already a date (not a datetime)
            else:
                to_date = (date.today() - timedelta(days=1))
            
            # Force the from_date to use time 00:00:00
            from_date = datetime.combine(from_date, _time(hour=0, minute=0, second=0, microsecond=0))

            # Force the to_date to use time 23:59:59
            to_date = datetime.combine(to_date, _time(hour=23, minute=59, second=59, microsecond=0))


            print "Synchronising from %s - %s" % (from_date, to_date)

            while from_date < to_date:
                next_date = datetime.combine(from_date.date() + timedelta(days=(self.settings['delta_days'] - 1)), _time(hour=23, minute=59, second=59, microsecond=0))
                number_of_records = self.synchronise_period(client, batcher, from_date, next_date)
                batcher.clear() #Store the records in elasticsearch
                self.put_synchronisation_config(from_date, next_date, number_of_records)
                from_date += timedelta(days=(self.settings['delta_days']))
                total_records += number_of_records

                # Pause so as not to get banned.
                to = 20
                print "Sleeping for %i seconds so as not to get banned." % to
                time.sleep(to)

            
        # Store the records in the index
        batcher.clear()
        
        # Print out some statistics
        time_spent = time.time() - start
        print 'Total time spent: %d seconds' % (time_spent)

        if time_spent > 0.001: # careful as its not an integer
            print 'Total records synchronised: %i records (%d records/second)' % (total_records, (total_records/time_spent))
        else:
            print 'Total records synchronised: %i records' % (total_records)
        return total_records

        sys.exit()
开发者ID:HeinrichHartmann,项目名称:OpenCitationsCorpus,代码行数:93,代码来源:OpenCitationsImportLibrary.py

示例6: add_provider

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def add_provider(cxn, args):
    """Add a new provider to the registry database.
    
    Process ``args`` to add a new provider to the registry database. Return 0
    for success, 1 for failure (error message should be logged).
    
    ``cxn`` => instance of ``sqlite3.Connection``
    ``args`` => instance of ``argparse.Namespace``
    """
    global logger, MAX_NAME_LENGTH
    addlogger = logger.getChild('add')
    # Validate name
    if len(args.name) > MAX_NAME_LENGTH:
        addlogger.critical('Short name for new provider must be no more than '
                           '{0} characters long'.format(MAX_NAME_LENGTH))
        return 1
    elif args.name.startswith(('http://', 'https://')) or args.name == 'all':
        addlogger.critical('Short name for new provider may not be "all" nor '
                           'may it begin "http://" or "https://"')
        return 1
    # Try to create row now to avoid unnecessary validation if duplicate
    try:
        cxn.execute("INSERT INTO providers(name, lastHarvest) values "
                         "(?, ?)",
                         (args.name, datetime.fromtimestamp(0))
        )
    except sqlite3.IntegrityError:
        addlogger.critical('Unable to add provider "{0}"; '
                           'provider with this name already exists'
                           ''.format(args.name)
                           )
        return 1
    else:
        addlogger.info('Adding provider "{0}"'.format(args.name))
    # Get any missing information
    # Base URL
    if args.url is None:
        args.url = raw_input('Base URL:'.ljust(20))
        if not args.url:
            addlogger.critical('Base URL for new provider not supplied')
            return 1
    # Set up an OAI-PMH client for validating providers
    md_registry = MetadataRegistry()
    md_registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(args.url, md_registry)
    # Validate Base URL by fetching Identify
    try:
        client.identify()
    except (XMLSyntaxError, HTTPError):
        addlogger.critical('Base URL for new provider does not return a valid '
                           'response to an `Identify` request')
        return 1
    # Destination
    if args.dest is None:
        args.dest = raw_input('Destination directory: '.ljust(20))
        if args.dest:
            # Expand user dir
            args.dest = os.path.expanduser(args.dest)
        else:
            addlogger.info('Destination for data for new provider not supplied'
                           ' using default `pwd`: {0}'.format(os.getcwd())
                           )
            args.dest = os.getcwd()
    # metadataPrefix
    # Check that selected metadataPrefix is available from provider
    # Fetch list of available formats
    mdps = dict((mdpinfo[0], mdpinfo[1:])
                    for mdpinfo in
                    client.listMetadataFormats())
    while args.metadataPrefix not in mdps:
        print "Available metadataPrefix values:"
        # List available formats
        for mdp in mdps:
            print mdp, '-', mdps[mdp][1]
        args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20))
        if not args.metadataPrefix:
            addlogger.info('metadataPrefix for new provider not supplied. '
                           'using default: oai_dc')
            args.metadataPrefix = 'oai_dc'
    cxn.execute("UPDATE providers SET "
                     "url=?, "
                     "destination=?, "
                     "metadataPrefix=? "
                     "WHERE name=?",
                     (args.url,
                      args.dest,
                      args.metadataPrefix,
                      args.name
                      )
    )
    addlogger.info('URL for next harvest: {0}?verb=ListRecords'
                   '&metadataPrefix={1}'
                   '&from={2:%Y-%m-%dT%H:%M:%SZ%z}'
                   ''.format(args.url,
                             args.metadataPrefix,
                             datetime.fromtimestamp(0)
                             )
                   )
    # All done, commit database
    cxn.commit()
#.........这里部分代码省略.........
开发者ID:atomotic,项目名称:oai-harvest,代码行数:103,代码来源:registry.py

示例7: identifiy

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def identifiy(target):
    if target is not None:
        client = Client(target['url'], registry)
        identify = client.identify()
        return convert_identifiy(identify)
开发者ID:cogfor,项目名称:oai2es,代码行数:7,代码来源:oaipmh_harvester.py

示例8: MetadataRegistry

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
    'oai_oi': 'http://www.openbeelden.nl/feeds/oai/', #'http://www.openarchives.org/OAI/2.0/oai_oi/',
    'oi': 'http://www.openbeelden.nl/oai/'}
    )

#URL = 'http://www.openbeelden.nl/oip-test/feeds/oai/'
URL = 'http://www.openbeelden.nl/feeds/oai/'
#URL = 'http://oai.tuxic.nl/oai/'

#Initieer de OAI client
registry = MetadataRegistry()
registry.registerReader('oai_oi', oai_oi_reader)
client = Client(URL, registry)
x = client.updateGranularity()

#Controleer of de OAI service goed geidentificeerd kan worden
x = client.identify()
print 'identity %s' % x.repositoryName()
print 'identity %s' % x.protocolVersion()
print 'identity %s' % x.baseURL()

OUTPUT_DIR = '/Users/jblom/temp'


print 'Firing up the openSKOSHandler'
osh = OpenSKOSHandler()

def processOpenbeelden():
	i=0
	iarecs = []
	#for y in client.listRecords(metadataPrefix='oai_oi', from_=parse('2011-01-01'), until=parse('2011-11-01')):
	extent = None
开发者ID:DivePlus,项目名称:dive,代码行数:33,代码来源:harvest_beng_set.py

示例9: MARCXMLReader

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
marcxml_reader = MARCXMLReader()

# Defining of metadata Readers in the Registry

from oaipmh import metadata

registry = metadata.MetadataRegistry()
registry.registerReader('oai_dc', metadata.oai_dc_reader)
registry.registerReader('marc21', marcxml_reader)


#### OAI-PMH Client processing 

oai = Client('http://snape.mzk.cz/OAI-script', registry)

id = oai.identify()
print id.repositoryName()
print id.adminEmails()
print id.baseURL()

formats = oai.listMetadataFormats()
pprint formats

# 'marc21'

sets = oai.listSets()
for s in sets:
	print s

# 'MZK03'
开发者ID:klokan,项目名称:oldmapsonline,代码行数:32,代码来源:oaipmh-client.py

示例10: transfer_experiment

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]
def transfer_experiment(source):
    """
    Pull public experiments from source into current mytardis.
    """

    #TODO: Cleanup error messages
    #TODO: does not transfer liences as not part of METS format.
    #NOTE: As this is a pull we trust the data from the other tardis
    # Check identity of the feed
    from oaipmh.client import Client
    from oaipmh import error
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    from django.core.cache import cache
    from django.utils.hashcompat import md5_constructor as md5

    # The cache key consists of the task name and the MD5 digest
    # of the feed URL.
    cache_key = md5("token").hexdigest()
    lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
    LOCK_EXPIRE = 60 * 5
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    # memcache delete is very slow, but we have to use it to take
    # advantage of using add() for atomic locking
    release_lock = lambda: cache.delete(lock_id)

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    source_url = "%s/apps/oaipmh/?verb=Identify" % source

    client = Client(source_url, registry)
    try:
        identify = client.identify()
    except AttributeError as e:
        msg = "Error reading repos identity: %s:%s" % (source, e)
        logger.error(msg)
        raise ReposReadError(msg)
    except error.ErrorBase as e:
        msg = "OAIPMH error: %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except URLError as e:
        logger.error(e)
        raise
    repos = identify.baseURL()
    import urlparse
    repos_url = urlparse.urlparse(repos)
    dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
    if dest_name != source:
        msg = "Source directory reports incorrect name: %s" % dest_name
        logger.error(msg)
        raise BadAccessError(msg)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(source
        + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
    try:
        exps_metadata = [meta
            for (header, meta, extra)
            in client.listRecords(metadataPrefix='oai_dc')]
    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    local_ids = []
    for exp_metadata in exps_metadata:
        exp_id = exp_metadata.getField('identifier')[0]
        user = exp_metadata.getField('creator')[0]

        found_user = _get_or_create_user(source, user)

        #make sure experiment is publicish
        try:
            xmldata = getURL("%s/apps/reposproducer/expstate/%s/"
            % (source, exp_id))
        except HTTPError as e:
            msg = "cannot get public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        try:
            exp_state = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not exp_state in [Experiment.PUBLIC_ACCESS_FULL,
                              Experiment.PUBLIC_ACCESS_METADATA]:
            msg = 'cannot ingest private experiments.' % exp_id
            logger.error(msg)
            raise BadAccessError(msg)

        # Get the usernames of isOwner django_user ACLs for the experiment
        try:
#.........这里部分代码省略.........
开发者ID:ianedwardthomas,项目名称:mytardis-app-repos-consumer,代码行数:103,代码来源:tasks.py

示例11: OpenBeeldenDataLoader

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import identify [as 别名]

#.........这里部分代码省略.........
		    'description': ('textList', 'oai_oi:oi/oi:description/text()'),
		    'abstract': ('textList', 'oai_oi:oi/oi:abstract/text()'),
		    'publisher':   ('textList', 'oai_oi:oi/oi:publisher/text()'),
		    'contributor': ('textList', 'oai_oi:oi/oi:contributor/text()'),
		    'date':        ('textList', 'oai_oi:oi/oi:date/text()'),
		    'type':        ('textList', 'oai_oi:oi/oi:type/text()'),
		    'extent':        ('textList', 'oai_oi:oi/oi:extent/text()'),
		    'medium':        ('textList', 'oai_oi:oi/oi:medium/text()'),
		    'identifier':  ('textList', 'oai_oi:oi/oi:identifier/text()'),
		    'source':      ('textList', 'oai_oi:oi/oi:source/text()'),
		    'language':    ('textList', 'oai_oi:oi/oi:language/text()'),
		    'references':    ('textList', 'oai_oi:oi/oi:references/text()'),
		    'spatial':    ('textList', 'oai_oi:oi/oi:spatial/text()'),
		    'attributionName':    ('textList', 'oai_oi:oi/oi:attributionName/text()'),
		    'attributionURL':    ('textList', 'oai_oi:oi/oi:attributionURL/text()'),
		    'license':      ('textList', 'oai_oi:oi/oi:license/text()')
		    },

		    namespaces={
		    	'oai_oi': 'http://www.openbeelden.nl/feeds/oai/', #'http://www.openarchives.org/OAI/2.0/oai_oi/',
		    	'oi': 'http://www.openbeelden.nl/oai/'
		    }
		)

		URL = 'http://www.openbeelden.nl/feeds/oai/'

		#Initialize the OAI client
		self.registry = MetadataRegistry()
		self.registry.registerReader('oai_oi', oai_oi_reader)
		self.client = Client(URL, self.registry)

		#Test if the connection to the OAI-PMH provider works
		x = self.client.updateGranularity()
		x = self.client.identify()
		print 'identity %s' % x.repositoryName()
		print 'identity %s' % x.protocolVersion()
		print 'identity %s' % x.baseURL()

		"""
		for s in client.listSets():
			print s
		"""

		#initialize the OpenSKOSHandler
		self.openSKOSHandler = OpenSKOSHandler()

	def reindex(self, provider = None):
		setupOAIPMHConnection()
		i = 0
		extent = None
		item = None
		identifier = None
		for rec in self.client.listRecords(metadataPrefix=u'oai_oi', set=u'beeldengeluid'):#stichting_natuurbeelden, beeldengeluid
			header, metadata, about = rec

			extent = metadata.getField('extent')[0]
			item = {
				'id' : header.identifier(),
				'identifier' : self.getFieldData(metadata, 'identifier'),
				'title' : self.getFieldData(metadata, 'title'),
				'alternative' : self.getFieldData(metadata, 'alternative'),
				'creator' : self.getFieldData(metadata, 'creator'),
				'subject' : self.getFieldData(metadata, 'subject'),
				'description' : self.getFieldData(metadata, 'description'),
				'abstract' : self.getFieldData(metadata, 'abstract'),
				'publisher' : self.getFieldData(metadata, 'publisher'),
开发者ID:beeldengeluid,项目名称:linkedtv-editortool,代码行数:70,代码来源:OpenBeeldenDataLoader.py


注:本文中的oaipmh.client.Client.identify方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。