当前位置: 首页>>代码示例>>Python>>正文


Python Client.updateGranularity方法代码示例

本文整理汇总了Python中oaipmh.client.Client.updateGranularity方法的典型用法代码示例。如果您正苦于以下问题:Python Client.updateGranularity方法的具体用法?Python Client.updateGranularity怎么用?Python Client.updateGranularity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在oaipmh.client.Client的用法示例。


在下文中一共展示了Client.updateGranularity方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: insertAll

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
def insertAll(time, time2):
    registry = MetadataRegistry()
    registry.registerReader('arXivRaw', arXivRaw_reader)
    client = Client(URL, registry)
    client.updateGranularity()
    list = client.listRecords(metadataPrefix='arXivRaw', from_=time, until=time2)
    errors = 0
    for a in list:
        #a = list.next()
        try:
            title = '\n'.join(a[1]['title'])
            sr2 = str(' '.join(a[1]['categories']).replace('-','_')).split(' ')
            abstract = '\n'.join(a[1]['abstract'])
            url = 'http://arxiv.org/abs/' + a[1]['id'][0]
            date = datetime.strptime(a[1]['created'][0], '%a, %d %b %Y %H:%M:%S %Z')
            authors = a[1]['authors'][0]# '; '.join(a[1]['keynames'])
            abstract = abstract + '\nBy: ' + authors + '\nIn: ' + ', '.join(sr2)
            print title
            print sr2
            print abstract
            print url
            print date
            print authors
            insert(title + ' (' + authors + ')', str("fullarxiv"), url, abstract, date=date, cross_srs=sr2)
        except:
            print 'ERROR'
            print a
            errors = errors+1
    print 'Completed with %s errors' % errors
开发者ID:rfurman,项目名称:arxaliv,代码行数:31,代码来源:autoinsert.py

示例2: arxiv_oai_scraper

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
def arxiv_oai_scraper(subject, start, end, sleep_time=0):

    base_url = "http://export.arxiv.org/oai2"
    output = list()

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(base_url, registry)
    client.updateGranularity()

    records = client.listRecords(metadataPrefix='oai_dc', set="{}".format(subject), from_=start, until=end)

    for _, md, _ in records:

        # print md.getField("title")
        # checks for the case in 2010 when there is no title for something
        if md is not None:

            txt_dict = {"title": md["title"],
                    "abstract": md["description"],
                    "date": md["date"],
                    "subject": md["subject"],
                    "url": md["identifier"],
                    "authors": md['creator']}

            output.append(txt_dict)

        time.sleep(sleep_time)

    return output
开发者ID:jaimeps,项目名称:distributed-computing-arxiv,代码行数:32,代码来源:arxiv_scraper.py

示例3: _listRecords

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
 def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
     # Generator to yield records from baseUrl in the given metadataPrefix
     # Add metatdataPrefix to args
     kwargs['metadataPrefix'] = metadataPrefix
     client = Client(baseUrl, metadata_registry)
     # Check server timestamp granularity support
     client.updateGranularity()
     for record in client.listRecords(**kwargs):
         yield record
开发者ID:atomotic,项目名称:oai-harvest,代码行数:11,代码来源:harvest.py

示例4: _listRecords

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
 def _listRecords(self, baseUrl, metadataPrefix="oai_dc", **kwargs):
     # Generator to yield records from baseUrl in the given metadataPrefix
     # Add metatdataPrefix to args
     kwargs['metadataPrefix'] = metadataPrefix
     client = Client(baseUrl, self._mdRegistry)
     # Check that baseUrl actually represents an OAI-PMH target
     try:
         client.identify()
     except IndexError:
         raise NotOAIPMHBaseURLException(
             "{0} does not appear to be an OAI-PMH compatible base URL"
             "".format(baseUrl)
         )
     # Check server timestamp granularity support
     client.updateGranularity()
     for record in client.listRecords(**kwargs):
         # Unit test hotfix
         header, metadata, about = record
         # Fix pyoai returning a "b'...'" string for py3k
         if isinstance(metadata, str) and metadata.startswith("b'"):
             metadata = ast.literal_eval(metadata).decode("utf-8")
         yield (header, metadata, about)
开发者ID:bloomonkey,项目名称:oai-harvest,代码行数:24,代码来源:harvest.py

示例5: now

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
def now():
    return datetime.now().ctime()

print >>sys.stderr, "beginning @", now()


    

URL = "http://citeseerx.ist.psu.edu/oai2"

registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)

client = Client(URL, registry)
client.updateGranularity()

store = Store()

if len(sys.argv) > 1:
    start = datetime.strptime(sys.argv[1], '%Y-%m-%d') #2011-10-27, for instance
elif store.last():
    start = store.last()
else:
    start = client.identify().earliestDatestamp()

#try this and see if it works; if it does resumption tokens right, this should work fine.


chunk = timedelta(days=1)
oneday = timedelta(days=1)
开发者ID:fugu13,项目名称:citerec,代码行数:32,代码来源:harvest.py

示例6: run

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
    def run(self):
        # Check that ElasticSearch is alive
        self.check_index()

        # If the user specified the --REBUILD flag, recreate the index
        if self.options['rebuild']:
            self.rebuild_index()

        # Connect to the repository
        registry = MetadataRegistry()
        registry.registerReader(self.settings["metadata_format"], self.settings["metadata_reader"])

        client = Client(self.settings["uri"], registry)
        identity = client.identify()

        print "Connected to repository: %s" % identity.repositoryName()

        # got to update granularity or we barf with: 
        # oaipmh.error.BadArgumentError: Max granularity is YYYY-MM-DD:2003-04-10T00:00:00Z
        client.updateGranularity()

        # Initialise some variables
        batcher = Batch.Batch()
        total_records = 0
        start = time.time()
        
        # Now do the synchonisation
        
        # If the user specified an identifier, then synchronise this record
        if (self.options['identifier'] is not None):
            total_records += self.synchronise_record(client, batcher, self.options['identifier'])
        else:
            # Else, synchronise using the date-range provided by the user, or failing that, 
            # the date-range based on the last sync

            # Get the synchronisation config record
            synchronisation_config = self.get_synchronisation_config()

            
            if self.options["from_date"] is not None:
                # If the user specified a from-date argument, use it
                from_date = self.options["from_date"] # already a date (not a datetime)
            elif synchronisation_config is not None and "to_date" in synchronisation_config:
                # Else read the last synchronised to_date from the config, and add on a day
                from_date = dateutil.parser.parse(synchronisation_config["to_date"]).date() + timedelta(days=1)
            else:
                # Else use the default_from_date in the config
                from_date = dateutil.parser.parse(self.settings['default_from_date']).date()

            if self.options["to_date"] is not None:
                to_date = self.options["to_date"] # already a date (not a datetime)
            else:
                to_date = (date.today() - timedelta(days=1))
            
            # Force the from_date to use time 00:00:00
            from_date = datetime.combine(from_date, _time(hour=0, minute=0, second=0, microsecond=0))

            # Force the to_date to use time 23:59:59
            to_date = datetime.combine(to_date, _time(hour=23, minute=59, second=59, microsecond=0))


            print "Synchronising from %s - %s" % (from_date, to_date)

            while from_date < to_date:
                next_date = datetime.combine(from_date.date() + timedelta(days=(self.settings['delta_days'] - 1)), _time(hour=23, minute=59, second=59, microsecond=0))
                number_of_records = self.synchronise_period(client, batcher, from_date, next_date)
                batcher.clear() #Store the records in elasticsearch
                self.put_synchronisation_config(from_date, next_date, number_of_records)
                from_date += timedelta(days=(self.settings['delta_days']))
                total_records += number_of_records

                # Pause so as not to get banned.
                to = 20
                print "Sleeping for %i seconds so as not to get banned." % to
                time.sleep(to)

            
        # Store the records in the index
        batcher.clear()
        
        # Print out some statistics
        time_spent = time.time() - start
        print 'Total time spent: %d seconds' % (time_spent)

        if time_spent > 0.001: # careful as its not an integer
            print 'Total records synchronised: %i records (%d records/second)' % (total_records, (total_records/time_spent))
        else:
            print 'Total records synchronised: %i records' % (total_records)
        return total_records

        sys.exit()
开发者ID:HeinrichHartmann,项目名称:OpenCitationsCorpus,代码行数:93,代码来源:OpenCitationsImportLibrary.py

示例7: oaiSpider

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]
def oaiSpider(subject="hep-ex", section="physics", start=None, end=None, sleep_time = 0):
    '''
    Pull articles using the Open Archives Initiative protocol
    
    subject    - String defining the subset of the main section
    section    - String defining the main section (typically physics or nothing)
    start      - A datetime.datetime object restricting the starting date of returned articles
    end        - A datetime.datetime object restricting the ending date of the returned articles
    sleep_time - A number specifying how many ms to wait between the record queries
    
    Examples

       oaiSpider("hep-ex", "physics")
       ==> returns all HEP experiment articles
       
       oaiSpider("cs", "", datetime(2011,06,24))
       ==> returns all computer science articles submitted after June 24th, 2011
       
       oaiSpider("hep-ph", "physics", None, datetime(2011,06, 24))
       ==> returns all HEP phenomenology articles submitted before June 24th, 2011

    Returns a list of dictionaries containing the article metadata
    '''

    from oaipmh.client import Client
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    base_url = "http://export.arxiv.org/oai2"
    output = []

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(base_url, registry)
    client.updateGranularity()

    if section == None:
        section = ""
    if len(section) > 0 and section[-1] != ":":
        section += ":"

    # sets = client.listSets()
    # for entry in sets:
    #     print entry
    
    ### OAIPMH module sucks donkey balls
    # Causes some error when I use the from_ or until keys
    records = client.listRecords(metadataPrefix='oai_dc'
                                 , set='%s%s' % (section, subject)
                                 , from_=start
                                 #, from_=datestamp
                                 , until=end
                                 )
    
    counter = 0
    
    for (header, metadata, aux) in records:
        
        print counter

        # for key in  metadata._map.keys():
        #     print key, metadata[key]

        output.append({"title"    : cleanText(metadata["title"][0]),
                       "abstract" : cleanText(metadata["description"][0]),
                       "date"     : convertDate(max(metadata["date"])),
                       "subject"  : subject,
                       "url"      : metadata["identifier"][0],
                       "authors"  : "; ".join( metadata['creator']),
                       })

        print output[-1]
        counter += 1
        
        # break
        # if counter > 15:
        #     break
        time.sleep(sleep_time)

    return output
开发者ID:kinnskogr,项目名称:buzzArxiv,代码行数:81,代码来源:pullFeed.py

示例8: OpenBeeldenDataLoader

# 需要导入模块: from oaipmh.client import Client [as 别名]
# 或者: from oaipmh.client.Client import updateGranularity [as 别名]

#.........这里部分代码省略.........
		    'subject':     ('textList', 'oai_oi:oi/oi:subject/text()'),
		    'description': ('textList', 'oai_oi:oi/oi:description/text()'),
		    'abstract': ('textList', 'oai_oi:oi/oi:abstract/text()'),
		    'publisher':   ('textList', 'oai_oi:oi/oi:publisher/text()'),
		    'contributor': ('textList', 'oai_oi:oi/oi:contributor/text()'),
		    'date':        ('textList', 'oai_oi:oi/oi:date/text()'),
		    'type':        ('textList', 'oai_oi:oi/oi:type/text()'),
		    'extent':        ('textList', 'oai_oi:oi/oi:extent/text()'),
		    'medium':        ('textList', 'oai_oi:oi/oi:medium/text()'),
		    'identifier':  ('textList', 'oai_oi:oi/oi:identifier/text()'),
		    'source':      ('textList', 'oai_oi:oi/oi:source/text()'),
		    'language':    ('textList', 'oai_oi:oi/oi:language/text()'),
		    'references':    ('textList', 'oai_oi:oi/oi:references/text()'),
		    'spatial':    ('textList', 'oai_oi:oi/oi:spatial/text()'),
		    'attributionName':    ('textList', 'oai_oi:oi/oi:attributionName/text()'),
		    'attributionURL':    ('textList', 'oai_oi:oi/oi:attributionURL/text()'),
		    'license':      ('textList', 'oai_oi:oi/oi:license/text()')
		    },

		    namespaces={
		    	'oai_oi': 'http://www.openbeelden.nl/feeds/oai/', #'http://www.openarchives.org/OAI/2.0/oai_oi/',
		    	'oi': 'http://www.openbeelden.nl/oai/'
		    }
		)

		URL = 'http://www.openbeelden.nl/feeds/oai/'

		#Initialize the OAI client
		self.registry = MetadataRegistry()
		self.registry.registerReader('oai_oi', oai_oi_reader)
		self.client = Client(URL, self.registry)

		#Test if the connection to the OAI-PMH provider works
		x = self.client.updateGranularity()
		x = self.client.identify()
		print 'identity %s' % x.repositoryName()
		print 'identity %s' % x.protocolVersion()
		print 'identity %s' % x.baseURL()

		"""
		for s in client.listSets():
			print s
		"""

		#initialize the OpenSKOSHandler
		self.openSKOSHandler = OpenSKOSHandler()

	def reindex(self, provider = None):
		setupOAIPMHConnection()
		i = 0
		extent = None
		item = None
		identifier = None
		for rec in self.client.listRecords(metadataPrefix=u'oai_oi', set=u'beeldengeluid'):#stichting_natuurbeelden, beeldengeluid
			header, metadata, about = rec

			extent = metadata.getField('extent')[0]
			item = {
				'id' : header.identifier(),
				'identifier' : self.getFieldData(metadata, 'identifier'),
				'title' : self.getFieldData(metadata, 'title'),
				'alternative' : self.getFieldData(metadata, 'alternative'),
				'creator' : self.getFieldData(metadata, 'creator'),
				'subject' : self.getFieldData(metadata, 'subject'),
				'description' : self.getFieldData(metadata, 'description'),
				'abstract' : self.getFieldData(metadata, 'abstract'),
开发者ID:beeldengeluid,项目名称:linkedtv-editortool,代码行数:70,代码来源:OpenBeeldenDataLoader.py


注:本文中的oaipmh.client.Client.updateGranularity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。