当前位置: 首页>>代码示例>>Python>>正文


Python Client.listRecords方法代码示例

本文整理汇总了Python中oaipmh.client.Client.listRecords方法的典型用法代码示例。如果您正苦于以下问题:Python Client.listRecords方法的具体用法?Python Client.listRecords怎么用?Python Client.listRecords使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在oaipmh.client.Client的用法示例。


在下文中一共展示了Client.listRecords方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: list_records

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def list_records(target, date_from, date_until, setspec):
    if target is not None:
        client = Client(target['url'], registry)
        # todo : clean this, find simplified cases
        if date_from is not None and date_until is not None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until, set=setspec)
        elif date_from is not None and date_until is not None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until)
        elif date_from is not None and date_until is None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, set=setspec)
        elif date_from is None and date_until is not None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until, set=setspec)
        elif date_from is not None and date_until is None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from)
        elif date_from is None and date_until is not None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until)
        elif date_from is None and date_until is None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], set=setspec)
        elif date_from is None and date_until is None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'])

        results = []
        if records is not None:
            results = []
            for record in records:
                results.append(convert_record(record, target['metadata_prefix'], target['title']))
        return results
开发者ID:kant,项目名称:reference_manager,代码行数:29,代码来源:oaipmh_harvester.py

示例2: list_records

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def list_records(target, date_from, date_until, setspec):
    logging.debug("list_records")
    if target is not None:
        client = Client(target['url'], registry)
        # todo : clean this, find simplified cases
        if date_from is not None and date_until is not None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until, set=setspec)
        elif date_from is not None and date_until is not None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, until=date_until)
        elif date_from is not None and date_until is None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from, set=setspec)
        elif date_from is None and date_until is not None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until, set=setspec)
        elif date_from is not None and date_until is None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], from_=date_from)
        elif date_from is None and date_until is not None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], until=date_until)
        elif date_from is None and date_until is None and setspec is not None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'], set=setspec)
        elif date_from is None and date_until is None and setspec is None:
            records = client.listRecords(metadataPrefix=target['metadata_prefix'])

        if records is not None:
            for record in records:
                yield convert_record(record, target['metadata_prefix'], target['title'])
开发者ID:cogfor,项目名称:oai2es,代码行数:27,代码来源:oaipmh_harvester.py

示例3: scrape

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
    def scrape(self):
        raise Exception("not finished")
        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        url = self.setting('pmh-endpoint')
        client = Client(url, registry)

        print "  OAI Repository", url
        print "  Available sets:"
        for s in client.listSets():
            print "   ", s

        oai_set = self.setting('set')
        oai_from = self.setting('from')
        oai_until = self.setting('until')

        kwargs = {}

        if oai_set:
            kwargs['set'] = oai_set

        if oai_from is not None:
            date_args = [int(arg) for arg in oai_from.split("-")]
            kwargs['from_'] = datetime.datetime(*date_args)

        if oai_until is not None:
            date_args = [int(arg) for arg in oai_until.split("-")]
            kwargs['until'] = datetime.datetime(*date_args)

        records = [r for r in client.listRecords(metadataPrefix='oai_dc', **kwargs)]

        data_filepath = os.path.join(self.work_dir(), self.setting('data-file'))
        with open(data_filepath, 'wb') as f:
            print "  picking", len(records), "records"
            pickle.dump(records, f)
开发者ID:cameronneylon,项目名称:oacensus,代码行数:37,代码来源:oai.py

示例4: insertAll

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def insertAll(time, time2):
    registry = MetadataRegistry()
    registry.registerReader('arXivRaw', arXivRaw_reader)
    client = Client(URL, registry)
    client.updateGranularity()
    list = client.listRecords(metadataPrefix='arXivRaw', from_=time, until=time2)
    errors = 0
    for a in list:
        #a = list.next()
        try:
            title = '\n'.join(a[1]['title'])
            sr2 = str(' '.join(a[1]['categories']).replace('-','_')).split(' ')
            abstract = '\n'.join(a[1]['abstract'])
            url = 'http://arxiv.org/abs/' + a[1]['id'][0]
            date = datetime.strptime(a[1]['created'][0], '%a, %d %b %Y %H:%M:%S %Z')
            authors = a[1]['authors'][0]# '; '.join(a[1]['keynames'])
            abstract = abstract + '\nBy: ' + authors + '\nIn: ' + ', '.join(sr2)
            print title
            print sr2
            print abstract
            print url
            print date
            print authors
            insert(title + ' (' + authors + ')', str("fullarxiv"), url, abstract, date=date, cross_srs=sr2)
        except:
            print 'ERROR'
            print a
            errors = errors+1
    print 'Completed with %s errors' % errors
开发者ID:rfurman,项目名称:arxaliv,代码行数:31,代码来源:autoinsert.py

示例5: arxiv_oai_scraper

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def arxiv_oai_scraper(subject, start, end, sleep_time=0):

    base_url = "http://export.arxiv.org/oai2"
    output = list()

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(base_url, registry)
    client.updateGranularity()

    records = client.listRecords(metadataPrefix='oai_dc', set="{}".format(subject), from_=start, until=end)

    for _, md, _ in records:

        # print md.getField("title")
        # checks for the case in 2010 when there is no title for something
        if md is not None:

            txt_dict = {"title": md["title"],
                    "abstract": md["description"],
                    "date": md["date"],
                    "subject": md["subject"],
                    "url": md["identifier"],
                    "authors": md['creator']}

            output.append(txt_dict)

        time.sleep(sleep_time)

    return output
开发者ID:jaimeps,项目名称:distributed-computing-arxiv,代码行数:32,代码来源:arxiv_scraper.py

示例6: _listRecords

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
 def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
     # Generator to yield records from baseUrl in the given metadataPrefix
     # Add metatdataPrefix to args
     kwargs['metadataPrefix'] = metadataPrefix
     client = Client(baseUrl, metadata_registry)
     # Check server timestamp granularity support
     client.updateGranularity()
     for record in client.listRecords(**kwargs):
         yield record
开发者ID:atomotic,项目名称:oai-harvest,代码行数:11,代码来源:harvest.py

示例7: pull_data

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def pull_data(source):
    list_of_records = []
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(source
                    + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
    try:
        exps_date = []
        exps_metadata = []
        for (header, meta, extra) in client.listRecords(metadataPrefix='oai_dc'):
            exps_date.append(str(header._datestamp))
            exps_metadata.append(meta)
            logger.debug('Date=%s' % header._datestamp)

    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    exp_counter = 0
    for exp_metadata in exps_metadata:
        user_id = exp_metadata.getField('creator')[0]
        user_profile = json.loads(_get_user(source, user_id))
        data_tobe_indexed = dict(user_profile)
        data_tobe_indexed['user_id'] = user_id

        exp_id = exp_metadata.getField('identifier')[0]
        description = exp_metadata.getField('description')[0]
        title = exp_metadata.getField('title')[0]
        if settings.EXPERIMENT_PATH[0] == '/':
            settings.EXPERIMENT_PATH = settings.EXPERIMENT_PATH[1:]
        experiment_url = os.path.join(source,
                                      settings.EXPERIMENT_PATH % exp_id)

        data_tobe_indexed['experiment_id'] = exp_id
        data_tobe_indexed['experiment_title'] = title
        data_tobe_indexed['experiment_description'] = description
        data_tobe_indexed['experiment_url'] = experiment_url
        data_tobe_indexed['id'] = experiment_url
        data_tobe_indexed['experiment_date'] = exps_date[exp_counter]
        exp_counter += 1
        for k, v in data_tobe_indexed.items():
            logger.debug('%s = %s' % (k, v))
        logger.debug('')
        list_of_records.append(json.dumps(data_tobe_indexed))

    return list_of_records
开发者ID:bioscience-data-platform,项目名称:bdpindex,代码行数:56,代码来源:search.py

示例8: harvest_oai_collection_records

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
    def harvest_oai_collection_records(self, collection):
        records = []
        try:
            registry = MetadataRegistry()
            registry.registerReader('oai_dc', oai_dc_reader)
            client = Client(collection.community.repository.base_url, registry)
            records = client.listRecords(
                metadataPrefix='oai_dc', set=collection.identifier)
        except:
            return

        return records
开发者ID:llcit,项目名称:uh-dla-dev-py,代码行数:14,代码来源:utils.py

示例9: read_base_records

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
 def read_base_records(self):
     registry = MetadataRegistry()
     registry.registerReader('base_dc', base_dc_reader)
     client = Client('http://doai.io/oai', registry)
     for header, record, _ in client.listRecords(metadataPrefix='base_dc'):
         # only process records for which base was unsure
         if '2' not in record['oa']:
             continue
         # extract splash_url
         for link in record['identifier']:
             metadata = {'base_oa':''.join(record['oa']),
                     'splash_url':link,
                     'from_identifier':header.identifier()}
             yield self.filter_url(link,metadata, looking_for='any')
开发者ID:dissemin,项目名称:croawl,代码行数:16,代码来源:base_spider.py

示例10: index_documents

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def index_documents(main_url, database_name, url, reader, prefix, format):
    registry = MetadataRegistry()
    registry.registerReader(prefix, reader)
    client = Client(url, registry)
    return_stuff = []
    for record in client.listRecords(metadataPrefix=prefix):
        r = record[1]
        value = format(r,record[0].identifier())
        if value != None:
            return_stuff.append(value)
        if len(return_stuff) >= 10000:
            sync_files(main_url, database_name, return_stuff)     
            return_stuff = []
    sync_files(main_url, database_name, return_stuff)                 
开发者ID:LearningRegistry,项目名称:LearningRegistry,代码行数:16,代码来源:util.py

示例11: scrape

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def scrape(start=START, end=END, set=SET_THESIS, type='Thesis'):
    """
    Create an OAI-PMH client, gather metadata and output it.

    """    
    total = num = 0
    msg = "Fetching records between " + str(start) + " and " + str(end)
    sys.stderr.write(msg + "\n")

    #
    # Set up metadata readers
    #
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    registry.registerReader('qdc', qdc_reader)
    # registry.registerReader('rdf', rdf_reader)   # no reader yet
    # registry.registerReader('ore', ore_reader)   # no reader yet
    # registry.registerReader('mets', mets_reader) # no reader yet

    client = Client(URL, registry)
    records = client.listRecords(metadataPrefix='qdc',
                                 from_=start, until=end, set=set)
    for (h, m, a) in records:
        print h, m, a
        if not m:
            sys.stderr.write("o")
            continue
        total = total + 1
        
        handle = m.getField('identifier')
        if not handle:
            sys.stderr.write("Record without a handle.\n")
            continue

        r = dict({ 'handle' : handle[0] })
        for key in qdc_reader._fields.keys():
           r[key] = m.getField(key)
        RECORDS.append(r)

        sys.stderr.write('.')
        sys.stderr.flush()
        num = num + 1
    msg = "\nCollected " + str(num) + " records, out of " + str(total)
    sys.stderr.write('\n' + msg + '\n');

    if options.store:
        pickle.dump(RECORDS, open(options.store, "wb"))
开发者ID:gondree,项目名称:nps-calhoun-reports,代码行数:49,代码来源:calhoun_scrape.py

示例12: update

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
    def update(self, from_date=None):
        self._log.info('Harvesting oai server: %s' % self._url)
        registry = MetadataRegistry()
        registry.registerReader(self._prefix, lambda el: el)

        client = Client(self._url, registry)
        try:
            for header, element, about in client.listRecords(
                metadataPrefix = self._prefix,
                from_ = from_date):
                added = self._process_record(header, element)
                if added:
                    yield self._get_id(header)
        except NoRecordsMatchError:
            pass

        super(OAIBasedContentProvider, self).update()
开发者ID:Alchemy-Meister,项目名称:OAI-PMH,代码行数:19,代码来源:oai.py

示例13: processItems

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
def processItems():
    oai_oi_reader = MetadataReader(
        fields={
        'title':       ('textList', 'oai_oi:oi/oi:title/text()'),
        'alternative': ('textList', 'oai_oi:oi/oi:alternative/text()'),
        'creator':     ('textList', 'oai_oi:oi/oi:creator/text()'),
        'subject':     ('textList', 'oai_oi:oi/oi:subject/text()'),
        'description': ('textList', 'oai_oi:oi/oi:description/text()'),
        'abstract':       ('textList', 'oai_oi:oi/oi:abstract/text()'),
        'publisher':   ('textList', 'oai_oi:oi/oi:publisher/text()'),
        'contributor': ('textList', 'oai_oi:oi/oi:contributor/text()'),
        'date':        ('textList', 'oai_oi:oi/oi:date/text()'),
        'type':        ('textList', 'oai_oi:oi/oi:type/text()'),
        'extent':      ('textList', 'oai_oi:oi/oi:extend/text()'),
        'medium':       ('textList', 'oai_oi:oi/oi:medium/text()'),
        'identifier':  ('textList', 'oai_oi:oi/oi:identifier/text()'),
        'source':      ('textList', 'oai_oi:oi/oi:source/text()'),
        'language':    ('textList', 'oai_oi:oi/oi:language/text()'),
        'references':  ('textList', 'oai_oi:oi/oi:references/text()'),
        'spatial':  ('textList', 'oai_oi:oi/oi:spatial/text()'),
        'attributionName':       ('textList', 'oai_oi:oi/oi:attributionName/text()'),
        'attributionURL':       ('textList', 'oai_oi:oi/oi:attributionURL/text()'),
        'license':       ('textList', 'oai_oi:oi/oi:license/text()'),
        #Zitten er niet in
        #'rights':      ('textList', 'oai_oi:oi/oi:rights/text()'),
        #'relation':    ('textList', 'oai_oi:oi/oi:relation/text()'),
        #'coverage':    ('textList', 'oai_oi:oi/oi:coverage/text()'),
        #'format':      ('textList', 'oai_oi:oi/oi:format/text()'),        
        },
        namespaces={
            'oi' : 'http://www.openbeelden.nl/oai/',
            'oai_oi' : 'http://www.openarchives.org/OAI/2.0/oai_dc/',
            'dc' : 'http://purl.org/dc/elements/1.1/',
            'dcterms' : 'http://purl.org/dc/terms',
        }
    )
    url = u'http://www.openbeelden.nl/feeds/oai/'

    registry = MetadataRegistry()
    registry.registerReader('oai_oi', oai_oi_reader)
    client = Client(url, registry)
    
    for record in client.listRecords(metadataPrefix='oai_oi'):
        processItem(record)
开发者ID:bymerej,项目名称:ts-multichill-bot,代码行数:46,代码来源:openbeelden_uploader.py

示例14: iter_items

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
    def iter_items(self, partition):
        """ Partition is an OAI-PMH endpoint """

        # source = "oai:%s" % partition

        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        client = Client(partition, registry)

        for record in client.listRecords(metadataPrefix='oai_dc'):
            header, metadata, _ = record

            if header.isDeleted():
                continue

            # _id = header.identifier()
            # date = header.datestamp()

            meta = metadata.getMap()

            # TODO: there are much validation and heuristics to be done here!

            # format0 = (meta.get("format") or [None])[0]
            # if not format0:
            #     continue

            # if format0 not in ("application/pdf", ):
            #     continue

            url0 = (meta.get("identifier") or [None])[0]

            if not url0:
                continue

            title0 = (meta.get("title") or [""])[0].encode("utf-8")
            desc0 = (meta.get("description") or [""])[0].encode("utf-8")

            # TODO: validate that the url0 is not on another domain?!
            yield url0, {}, "html", 2, """
                <html><head><title>%s</title></head><body>%s</body></html>
            """ % (title0, desc0)
开发者ID:bakztfuture,项目名称:cosr-back,代码行数:43,代码来源:oai.py

示例15: _listRecords

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import listRecords [as 别名]
 def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
     # Generator to yield records from baseUrl in the given metadataPrefix
     # Add metatdataPrefix to args
     kwargs['metadataPrefix'] = metadataPrefix
     client = Client(baseUrl, self._mdRegistry)
     # Check that baseUrl actually represents an OAI-PMH target
     try:
         client.identify()
     except IndexError:
         raise NotOAIPMHBaseURLException(
             "{0} does not appear to be an OAI-PMH compatible base URL"
             "".format(baseUrl)
         )
     # Check server timestamp granularity support
     client.updateGranularity()
     for record in client.listRecords(**kwargs):
         # Unit test hotfix
         header, metadata, about = record
         # Fix pyoai returning a "b'...'" string for py3k
         if isinstance(metadata, str) and metadata.startswith("b'"):
             metadata = ast.literal_eval(metadata).decode("utf-8")
         yield (header, metadata, about)
开发者ID:bloomonkey,项目名称:oai-harvest,代码行数:24,代码来源:harvest.py


注:本文中的oaipmh.client.Client.listRecords方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。