本文整理汇总了Python中ckanext.harvest.model.HarvestJob.source方法的典型用法代码示例。如果您正苦于以下问题:Python HarvestJob.source方法的具体用法?Python HarvestJob.source怎么用?Python HarvestJob.source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ckanext.harvest.model.HarvestJob
的用法示例。
在下文中一共展示了HarvestJob.source方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_zaincremental_harvester
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def test_zaincremental_harvester(self):
client = CKANServer()
metadata_registry = metadata.MetadataRegistry()
metadata_registry.registerReader('oai_dc', oai_dc_reader)
metadata_registry.registerWriter('oai_dc', oai_dc_writer)
serv = BatchingServer(client, metadata_registry=metadata_registry)
oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
harv = OAIPMHHarvester()
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
harvest_job.gather_started = ((datetime.now() + timedelta(days=1)))
harvest_job.source.config = '{"incremental":"True"}'
harvest_job.source.type = "OAI-PMH"
Session.add(harvest_job)
rev = model.repo.new_revision()
rev.timestamp = ((datetime.now() + timedelta(days=2)))
pkg = Package(name='footest', revision=rev)
Session.add(pkg)
pkg.save()
roger = Group.get('roger')
roger.add_package_by_name('footest')
Session.add(roger)
roger.save()
gathered = harv.gather_stage(harvest_job)
harvest_object = HarvestObject.get(gathered[0])
harv.fetch_stage(harvest_object)
harvobj = json.loads(harvest_object.content)
self.assert_(harvobj['records'])
示例2: harvest_job_create
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise Exception('Can not create jobs on inactive sources')
# Check if there already is an unrun or currently running job for this source
exists = _check_for_existing_jobs(context, source_id)
if exists:
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestJobExists('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例3: harvest_job_create
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise HarvestError('Can not create jobs on inactive sources')
# Check if there already is an unrun job for this source
data_dict ={
'source_id':source_id,
'status':u'New'
}
exists = harvest_job_list(context,data_dict)
if len(exists):
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestError('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例4: test_0harvester_url_error
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def test_0harvester_url_error(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
urllib2.urlopen = realopen
self.assert_(self.harv.gather_stage(harvest_job) == None)
示例5: _create_harvester_info
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def _create_harvester_info(self, config=True):
rev = model.repo.new_revision()
harv = OAIPMHHarvester()
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
if config:
harvest_job.source.config = '{"query": ""}'
harvest_job.source.type = "OAI-PMH"
Session.add(harvest_job)
return harvest_job, harv
示例6: test_harvester_1gather_ddi
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def test_harvester_1gather_ddi(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
urllib2.urlopen = mock.Mock(side_effect=self._side_effect_ddi_datas)
self.gathered = self.harv.gather_stage(harvest_job)
self.assert_(len(self.gathered) == 1)
self.assert_(isinstance(self.harv.harvester, DDIHarvester))
示例7: _create_harvester
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def _create_harvester(self, config=True):
harv = DDIHarvester()
harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
if config:
harvest_job.source.config = ''
else:
harvest_job.source.config = None
harvest_job.source.type = "DDI"
Session.add(harvest_job)
return harv, harvest_job
示例8: run_job_synchronously
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def run_job_synchronously(self):
import datetime
from ckan import model
from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
from ckanext.harvest.queue import fetch_and_import_stages
from ckan.lib.search.index import PackageSearchIndex
package_index = PackageSearchIndex()
source_id = unicode(self.args[1])
source = HarvestSource.get(source_id)
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == source.type:
break
else:
print "No harvester found to handle the job."
return
job = HarvestJob()
job.source = source
job.status = "Running"
job.gather_started = datetime.datetime.utcnow()
job.save()
try:
harvest_object_ids = harvester.gather_stage(job)
job.gather_finished = datetime.datetime.utcnow()
job.save()
for obj_id in harvest_object_ids:
obj = HarvestObject.get(obj_id)
obj.retry_times += 1
obj.save()
fetch_and_import_stages(harvester, obj)
job.finished = datetime.datetime.utcnow()
job.status = "Done"
job.save()
# And reindex the harvest source so it gets its counts right.
# Must call update on a data_dict as returned by package_show, not the class object.
package_index.index_package(get_action('package_show')({'validate': False, 'ignore_auth': True}, {'id': source.id}))
finally:
job.finished = datetime.datetime.utcnow()
if job.status != "Done": job.status = "Error"
job.save()
示例9: harvest_job_create
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def harvest_job_create(context, data_dict):
'''
Creates a Harvest Job for a Harvest Source and runs it (by putting it on
the gather queue)
:param source_id: id of the harvest source to create a job for
:type source_id: string
:param run: whether to also run it or not (default: True)
:type run: bool
'''
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create', context, data_dict)
source_id = data_dict['source_id']
run_it = data_dict.get('run', True)
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise toolkit.NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s',
source_id)
raise HarvestSourceInactiveError('Can not create jobs on inactive sources')
# Check if there already is an unrun or currently running job for this
# source
exists = _check_for_existing_jobs(context, source_id)
if exists:
log.warn('There is already an unrun job %r for this source %s',
exists, source_id)
raise HarvestJobExists('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
if run_it:
toolkit.get_action('harvest_send_job_to_gather_queue')(
context, {'id': job.id})
return harvest_job_dictize(job, context)
示例10: test_harvester_4gather_oaipmh
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def test_harvester_4gather_oaipmh(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
client = CKANServer()
metadata_registry = metadata.MetadataRegistry()
metadata_registry.registerReader('oai_dc', oai_dc_reader)
metadata_registry.registerWriter('oai_dc', oai_dc_writer)
serv = BatchingServer(client, metadata_registry=metadata_registry)
oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
self.gathered = self.harv.gather_stage(harvest_job)
self.assert_(len(self.gathered) > 1)
self.assert_(isinstance(self.harv.harvester, OAIPMHHarvester))
示例11: test_zzcomplete
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
def test_zzcomplete(self):
raise SkipTest('Takes ages, do not run')
urllib2.urlopen = realopen
harv = DDIHarvester()
harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://www.fsd.uta.fi/fi/aineistot/luettelo/fsd-ddi-records-uris-fi.txt"
harvest_job.source.config = ''
harvest_job.source.type = "DDI"
Session.add(harvest_job)
gathered = harv.gather_stage(harvest_job)
diffs = []
for gath in gathered:
harvest_object = HarvestObject.get(gath)
print json.loads(harvest_object.content)['url']
before = datetime.now()
harv.fetch_stage(harvest_object)
harv.import_stage(harvest_object)
diff = datetime.now() - before
print diff
diffs.append(diff)
print sum(diffs, timedelta)
示例12: Exception
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import source [as 别名]
if not source_pkg:
log.error('Harvest source %s does not exist', source_name)
return
source_id = source_pkg.id
source = HarvestSource.get(source_id)
if not source:
log.error('Harvest source %s does not exist', source_id)
return
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise Exception('Can not create jobs on inactive sources')
job = HarvestJob()
job.source = source
job.save()
context['harvest_job'] = job
print str(datetime.datetime.now()) + ' Start to import doi datasets.'
print 'Datasets found on remote doi server: ' + str(len(collected_ids)) + ', on local: ' + str(len(existing_ids)) + '.'
ids_to_add = collected_ids - existing_ids
print 'Datasets to be added as new: ' + str(len(ids_to_add)) + '.'
for num, doi_id in enumerate(ids_to_add):
context.pop('package', None)
context.pop('group', None)
try:
new_package = self.get_doi_package(url_dataset + doi_id)
new_harvestobj = self.get_doi_harvestobj(url_harvestobj + to_import[doi_id])
except Exception, e: