当前位置: 首页>>代码示例>>Python>>正文


Python metadata.MetadataRegistry类代码示例

本文整理汇总了Python中oaipmh.metadata.MetadataRegistry的典型用法代码示例。如果您正苦于以下问题:Python MetadataRegistry类的具体用法?Python MetadataRegistry怎么用?Python MetadataRegistry使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MetadataRegistry类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: gather_stage

 def gather_stage(self, harvest_job):
     url = harvest_job.source.url
     # Test wether we should use OAI-PMH or DDI
     metadata_registry = MetadataRegistry()
     metadata_registry.registerReader('oai_dc', oai_dc_reader)
     client = oaipmh.client.Client(url, metadata_registry)
     try:
         client.identify()
     except XMLSyntaxError:
         self.harvester = DDIHarvester()
     except urllib2.URLError:
         self._save_gather_error('Could not identify source!', harvest_job)
         return None
     if not self.harvester:
         self.harvester = OAIPMHHarvester()
     objs = self.harvester.gather_stage(harvest_job)
     ret = []
     for obj in objs:
         obj = HarvestObject.get(obj)
         cont = obj.content
         dict = json.loads(cont)
         dict['harv'] = jsonpickle.encode(self.harvester)
         obj.content = json.dumps(dict)
         obj.save()
         ret.append(obj.id)
     return ret
开发者ID:kata-csc,项目名称:ckanext-metadata,代码行数:26,代码来源:harvester.py

示例2: scrape

    def scrape(self):
        raise Exception("not finished")
        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        url = self.setting('pmh-endpoint')
        client = Client(url, registry)

        print "  OAI Repository", url
        print "  Available sets:"
        for s in client.listSets():
            print "   ", s

        oai_set = self.setting('set')
        oai_from = self.setting('from')
        oai_until = self.setting('until')

        kwargs = {}

        if oai_set:
            kwargs['set'] = oai_set

        if oai_from is not None:
            date_args = [int(arg) for arg in oai_from.split("-")]
            kwargs['from_'] = datetime.datetime(*date_args)

        if oai_until is not None:
            date_args = [int(arg) for arg in oai_until.split("-")]
            kwargs['until'] = datetime.datetime(*date_args)

        records = [r for r in client.listRecords(metadataPrefix='oai_dc', **kwargs)]

        data_filepath = os.path.join(self.work_dir(), self.setting('data-file'))
        with open(data_filepath, 'wb') as f:
            print "  picking", len(records), "records"
            pickle.dump(records, f)
开发者ID:cameronneylon,项目名称:oacensus,代码行数:35,代码来源:oai.py

示例3: insertAll

def insertAll(time, time2):
    registry = MetadataRegistry()
    registry.registerReader('arXivRaw', arXivRaw_reader)
    client = Client(URL, registry)
    client.updateGranularity()
    list = client.listRecords(metadataPrefix='arXivRaw', from_=time, until=time2)
    errors = 0
    for a in list:
        #a = list.next()
        try:
            title = '\n'.join(a[1]['title'])
            sr2 = str(' '.join(a[1]['categories']).replace('-','_')).split(' ')
            abstract = '\n'.join(a[1]['abstract'])
            url = 'http://arxiv.org/abs/' + a[1]['id'][0]
            date = datetime.strptime(a[1]['created'][0], '%a, %d %b %Y %H:%M:%S %Z')
            authors = a[1]['authors'][0]# '; '.join(a[1]['keynames'])
            abstract = abstract + '\nBy: ' + authors + '\nIn: ' + ', '.join(sr2)
            print title
            print sr2
            print abstract
            print url
            print date
            print authors
            insert(title + ' (' + authors + ')', str("fullarxiv"), url, abstract, date=date, cross_srs=sr2)
        except:
            print 'ERROR'
            print a
            errors = errors+1
    print 'Completed with %s errors' % errors
开发者ID:rfurman,项目名称:arxaliv,代码行数:29,代码来源:autoinsert.py

示例4: arxiv_oai_scraper

def arxiv_oai_scraper(subject, start, end, sleep_time=0):

    base_url = "http://export.arxiv.org/oai2"
    output = list()

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(base_url, registry)
    client.updateGranularity()

    records = client.listRecords(metadataPrefix='oai_dc', set="{}".format(subject), from_=start, until=end)

    for _, md, _ in records:

        # print md.getField("title")
        # checks for the case in 2010 when there is no title for something
        if md is not None:

            txt_dict = {"title": md["title"],
                    "abstract": md["description"],
                    "date": md["date"],
                    "subject": md["subject"],
                    "url": md["identifier"],
                    "authors": md['creator']}

            output.append(txt_dict)

        time.sleep(sleep_time)

    return output
开发者ID:jaimeps,项目名称:distributed-computing-arxiv,代码行数:30,代码来源:arxiv_scraper.py

示例5: __init__

 def __init__(self, url):
     """Initialize client."""
     registry = MetadataRegistry()
     registry.registerReader('oaf', self.oaf_reader)
     return super(OpenAireClient, self).__init__(
         url, metadata_registry=registry
     )
开发者ID:LibrarPotter,项目名称:zenodo,代码行数:7,代码来源:openaire.py

示例6: list_oai_collections

    def list_oai_collections(self, community):
        """ Retrieve the header data for each record in the current community repo """

        try:
            registry = MetadataRegistry()
            registry.registerReader('oai_dc', oai_dc_reader)
            client = Client(community.repository.base_url, registry)
            records = client.listIdentifiers(
                metadataPrefix='oai_dc', set=community.identifier)
        except:
            community_collections = set()
            return


        """ Filter records to build list of collections in the community set """
        community_collections = set()
        for i in records:
            for j in i.setSpec():
                if j[:3] == 'col':
                    community_collections.add(j)
    
        print len(community_collections)
        """ Build collection tuples (identifier, name) """
        for i in community_collections:
            # print i
            # print community_collections
            
            set_data = []
            set_data.append(i)  # Store identifier
            set_data.append('Collection: %s'%i)  # Store human readable name
            # print set_data
            self.collections.append(set_data)
开发者ID:llcit,项目名称:uh-dla-dev-py,代码行数:32,代码来源:utils.py

示例7: _get_client_identifier

 def _get_client_identifier(self, url, harvest_job=None):
     registry = MetadataRegistry()
     registry.registerReader(self.metadata_prefix_value, oai_dc_reader)
     client = oaipmh.client.Client(url, registry)
     try:
         identifier = client.identify()
     except (urllib2.URLError, urllib2.HTTPError,):
         if harvest_job:
             self._save_gather_error(
                 'Could not gather from %s!' % harvest_job.source.url,
                 harvest_job)
         return client, None
     except socket.error:
         if harvest_job:
             errno, errstr = sys.exc_info()[:2]
             self._save_gather_error(
                 'Socket error OAI-PMH %s, details:\n%s' % (errno, errstr),
                 harvest_job)
         return client, None
     except ValueError:
         # We have no source URL when importing via UI.
         return client, None
     except Exception as e:
         # Guard against miscellaneous stuff. Probably plain bugs.
         log.debug(traceback.format_exc(e))
         return client, None
     return client, identifier
开发者ID:florenthemmi,项目名称:ckanext-oaipmh,代码行数:27,代码来源:harvester.py

示例8: gather_stage

    def gather_stage(self, harvest_job):
        '''
        The gather stage will recieve a HarvestJob object and will be
        responsible for:
            - gathering all the necessary objects to fetch on a later.
              stage (e.g. for a CSW server, perform a GetRecords request)
            - creating the necessary HarvestObjects in the database, specifying
              the guid and a reference to its source and job.
            - creating and storing any suitable HarvestGatherErrors that may
              occur.
            - returning a list with all the ids of the created HarvestObjects.

        :param harvest_job: HarvestJob object
        :returns: A list of HarvestObject ids
        '''
        self._set_config(harvest_job.source.config)
        sets = []
        harvest_objs = []
        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        client = oaipmh.client.Client(harvest_job.source.url, registry)
        try:
            identifier = client.identify()
        except urllib2.URLError:
            self._save_gather_error('Could not gather anything from %s!' %
                                    harvest_job.source.url, harvest_job)
            return None
        domain = identifier.repositoryName()
        group = Group.by_name(domain)
        if not group:
            group = Group(name=domain, description=domain)
        query = self.config['query'] if 'query' in self.config else ''
        try:
            for set in client.listSets():
                identifier, name, _ = set
                if 'query' in self.config:
                    if query in name:
                        sets.append((identifier, name))
                else:
                    sets.append((identifier, name))
        except NoSetHierarchyError:
            sets.append(('1', 'Default'))
            self._save_gather_error('Could not fetch sets!', harvest_job)

        for set_id, set_name in sets:
            harvest_obj = HarvestObject(job=harvest_job)
            harvest_obj.content = json.dumps(
                                             {
                                              'set': set_id, \
                                              'set_name': set_name, \
                                              'domain': domain
                                              }
                                             )
            harvest_obj.save()
            harvest_objs.append(harvest_obj.id)
        model.repo.commit()
        return harvest_objs
开发者ID:ilrt,项目名称:ckanext-oaipmh,代码行数:57,代码来源:harvester.py

示例9: __init__

    def __init__(self, configuration_file):
        """Constructor."""
        self.oai_config = ConfigParser.SafeConfigParser()
        self.oai_config.read(configuration_file)
        self.current_config = 'ToulouseBis'

        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        self.client = Client(self._get_config_value('url'), registry)
开发者ID:JeanFred,项目名称:BibRose,代码行数:9,代码来源:OaiClient.py

示例10: test

def test(request):
	URL = 'http://www.kulturarv.dk/ffrepox/OAIHandler'
	registry = MetadataRegistry()
	registry.registerReader('oai_dc', oai_dc_reader)
	client = Client(URL, registry)
	identifyResponse = client.identify()

	print dir(identifyResponse)
	#for record in client.listRecords(metadataPrefix='oai_dc'):
	#	result += record
	return HttpResponse(identifyResponse.repositoryName())
开发者ID:troels,项目名称:datafest,代码行数:11,代码来源:views.py

示例11: test_get_record

 def test_get_record(self):
     metadata_reg = MetadataRegistry()
     metadata_reg.registerReader('oai_dc', oai_dc_reader)
     client = Client(config.get('ckan.site_url') + self.base_url, metadata_reg)
     res = self._oai_get_method_and_validate('?verb=ListIdentifiers&metadataPrefix=oai_dc&set=roger')
     urllib2.urlopen = mock.Mock(return_value=StringIO(res))
     ids = client.listIdentifiers(metadataPrefix='oai_dc')
     offset = self.base_url + '?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc' % ids.next().identifier()
     res = self.app.get(offset)
     self.assert_(oaischema.validate(etree.fromstring(res.body)))
     self.assert_("abraham" in res.body)
开发者ID:florenthemmi,项目名称:ckanext-oaipmh,代码行数:11,代码来源:test_oai.py

示例12: harvest_oai_collection_records

    def harvest_oai_collection_records(self, collection):
        records = []
        try:
            registry = MetadataRegistry()
            registry.registerReader('oai_dc', oai_dc_reader)
            client = Client(collection.community.repository.base_url, registry)
            records = client.listRecords(
                metadataPrefix='oai_dc', set=collection.identifier)
        except:
            return

        return records
开发者ID:llcit,项目名称:uh-dla-dev-py,代码行数:12,代码来源:utils.py

示例13: clean

    def clean(self):
        cleaned_data = super(CreateRepositoryForm, self).clean()
        try:
            registry = MetadataRegistry()
            registry.registerReader('oai_dc', oai_dc_reader)
            client = Client(cleaned_data.get('base_url'), registry)
            server = client.identify()
            # set the repository name apply to model instance when saved.
            cleaned_data['name'] = server.repositoryName()
        except:
            raise ValidationError('Repository base url is invalid.')

        return cleaned_data
开发者ID:llcit,项目名称:uh-dla-dev-py,代码行数:13,代码来源:forms.py

示例14: test_resumption_identifiers

 def test_resumption_identifiers(self):
     metadata_reg = MetadataRegistry()
     metadata_reg.registerReader('oai_dc', oai_dc_reader)
     urllib2.urlopen = realopen
     client = CKANServer()
     metadata_registry = metadata.MetadataRegistry()
     metadata_registry.registerReader('oai_dc', oai_dc_reader)
     metadata_registry.registerWriter('oai_dc', oai_dc_writer)
     serv = BatchingServer(client, metadata_registry=metadata_registry)
     client = ServerClient(serv, metadata_reg)
     recs = client.listIdentifiers(metadataPrefix='oai_dc')
     for rec in recs:
         self.assert_(rec)
开发者ID:florenthemmi,项目名称:ckanext-oaipmh,代码行数:13,代码来源:test_oai.py

示例15: get_client

def get_client(url, transforms):
    transforms = fix_transforms(transforms)
    registry = MetadataRegistry()
    c = Client(url, registry)
    metadata = c.listMetadataFormats()
    metadata[0] = [
        'fbb', 'http://www.kulturarv.dk/fbb/fbb.xsd', 'http://www.kulturarv.dk/fbb']
    namespaces = dict((x[0], x[2]) for x in metadata)
    fields = dict((transform['field'], ('textList', transform['path']))
                  for transform in transforms)
    namespace = metadata[0][0]
    print namespaces,fields
    registry.registerReader(namespace, MetadataReader(fields=fields, namespaces=namespaces))
    return c, namespace
开发者ID:kraenhansen,项目名称:datafest,代码行数:14,代码来源:transformer.py


注:本文中的oaipmh.metadata.MetadataRegistry类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。