本文整理汇总了Python中ckanext.harvest.model.HarvestJob类的典型用法代码示例。如果您正苦于以下问题:Python HarvestJob类的具体用法?Python HarvestJob怎么用?Python HarvestJob使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HarvestJob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_gather
def test_gather(self):
source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
source.save()
job = HarvestJob(source=source)
job.save()
self.harvester.client = _FakeClient()
self.harvester.gather_stage(job)
示例2: test_zaincremental_harvester
def test_zaincremental_harvester(self):
client = CKANServer()
metadata_registry = metadata.MetadataRegistry()
metadata_registry.registerReader('oai_dc', oai_dc_reader)
metadata_registry.registerWriter('oai_dc', oai_dc_writer)
serv = BatchingServer(client, metadata_registry=metadata_registry)
oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
harv = OAIPMHHarvester()
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
harvest_job.gather_started = ((datetime.now() + timedelta(days=1)))
harvest_job.source.config = '{"incremental":"True"}'
harvest_job.source.type = "OAI-PMH"
Session.add(harvest_job)
rev = model.repo.new_revision()
rev.timestamp = ((datetime.now() + timedelta(days=2)))
pkg = Package(name='footest', revision=rev)
Session.add(pkg)
pkg.save()
roger = Group.get('roger')
roger.add_package_by_name('footest')
Session.add(roger)
roger.save()
gathered = harv.gather_stage(harvest_job)
harvest_object = HarvestObject.get(gathered[0])
harv.fetch_stage(harvest_object)
harvobj = json.loads(harvest_object.content)
self.assert_(harvobj['records'])
示例3: harvest_job_create
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise Exception('Can not create jobs on inactive sources')
# Check if there already is an unrun or currently running job for this source
exists = _check_for_existing_jobs(context, source_id)
if exists:
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestJobExists('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例4: harvest_job_create
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise HarvestError('Can not create jobs on inactive sources')
# Check if there already is an unrun job for this source
data_dict ={
'source_id':source_id,
'status':u'New'
}
exists = harvest_job_list(context,data_dict)
if len(exists):
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestError('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例5: harvest_job_abort
def harvest_job_abort(context, data_dict):
'''
Aborts a harvest job. Given a harvest source_id, it looks for the latest
one and (assuming it not already Finished) marks it as Finished. It also
marks any of that source's harvest objects and (if not complete or error)
marks them "ERROR", so any left in limbo are cleaned up. Does not actually
stop running any queued harvest fetchs/objects.
:param source_id: the name or id of the harvest source with a job to abort
:type source_id: string
'''
check_access('harvest_job_abort', context, data_dict)
model = context['model']
source_id = data_dict.get('source_id')
source = harvest_source_show(context, {'id': source_id})
# HarvestJob set status to 'Finished'
# Don not use harvest_job_list since it can use a lot of memory
last_job = model.Session.query(HarvestJob) \
.filter_by(source_id=source['id']) \
.order_by(HarvestJob.created.desc()).first()
if not last_job:
raise NotFound('Error: source has no jobs')
job = get_action('harvest_job_show')(context,
{'id': last_job.id})
if job['status'] != 'Finished':
# i.e. New or Running
job_obj = HarvestJob.get(job['id'])
job_obj.status = new_status = 'Finished'
model.repo.commit_and_remove()
log.info('Harvest job changed status from "%s" to "%s"',
job['status'], new_status)
else:
log.info('Harvest job unchanged. Source %s status is: "%s"',
job['id'], job['status'])
# HarvestObjects set to ERROR
job_obj = HarvestJob.get(job['id'])
objs = job_obj.objects
for obj in objs:
if obj.state not in ('COMPLETE', 'ERROR'):
old_state = obj.state
obj.state = 'ERROR'
log.info('Harvest object changed state from "%s" to "%s": %s',
old_state, obj.state, obj.id)
else:
log.info('Harvest object not changed from "%s": %s',
obj.state, obj.id)
model.repo.commit_and_remove()
job_obj = HarvestJob.get(job['id'])
return harvest_job_dictize(job_obj, context)
示例6: test_0harvester_url_error
def test_0harvester_url_error(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
urllib2.urlopen = realopen
self.assert_(self.harv.gather_stage(harvest_job) == None)
示例7: test_import
def test_import(self):
source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
source.save()
job = HarvestJob(source=source)
job.save()
harvest_object = self._run_import("cmdi_1.xml", job)
self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))
package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730180'})
self.assertEquals(package.get('id', None), 'http://urn.fi/urn:nbn:fi:lb-20140730180')
self.assertEquals(package.get('name', None), 'urn-nbn-fi-lb-20140730180')
self.assertEquals(package.get('notes', None), u'{"eng": "Test description"}')
self.assertEquals(package.get('version', None), '2012-09-07')
self.assertEquals(package.get('title', []), '{"eng": "Longi Corpus"}')
self.assertEquals(package.get('license_id', None), 'undernegotiation')
provider = config['ckan.site_url']
expected_pid = {u'id': u'http://islrn.org/resources/248-895-085-557-0',
u'provider': provider,
u'type': u'metadata'}
self.assertTrue(expected_pid in package.get('pids'))
model.Session.flush()
harvest_object = self._run_import("cmdi_2.xml", job)
self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))
package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730186'})
self.assertEquals(package['temporal_coverage_begin'], '1880')
self.assertEquals(package['temporal_coverage_end'], '1939')
self.assertEquals(package.get('license_id', None), 'other')
# Delete package
harvest_object = HarvestObject()
harvest_object.content = None
harvest_object.id = "test-cmdi-delete"
harvest_object.guid = "test-cmdi-delete"
harvest_object.source = job.source
harvest_object.harvest_source_id = None
harvest_object.job = job
harvest_object.package_id = package.get('id')
harvest_object.report_status = "deleted"
harvest_object.save()
self.harvester.import_stage(harvest_object)
model.Session.flush()
self.assertEquals(model.Package.get(package['id']).state, 'deleted')
示例8: _create_harvester_info
def _create_harvester_info(self, config=True):
rev = model.repo.new_revision()
harv = OAIPMHHarvester()
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
if config:
harvest_job.source.config = '{"query": ""}'
harvest_job.source.type = "OAI-PMH"
Session.add(harvest_job)
return harvest_job, harv
示例9: test_harvester_1gather_ddi
def test_harvester_1gather_ddi(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
urllib2.urlopen = mock.Mock(side_effect=self._side_effect_ddi_datas)
self.gathered = self.harv.gather_stage(harvest_job)
self.assert_(len(self.gathered) == 1)
self.assert_(isinstance(self.harv.harvester, DDIHarvester))
示例10: _create_harvester
def _create_harvester(self, config=True):
harv = DDIHarvester()
harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
if config:
harvest_job.source.config = ''
else:
harvest_job.source.config = None
harvest_job.source.type = "DDI"
Session.add(harvest_job)
return harv, harvest_job
示例11: harvest_send_job_to_gather_queue
def harvest_send_job_to_gather_queue(context, data_dict):
'''
Sends a harvest job to the gather queue.
:param id: the id of the harvest job
:type id: string
'''
log.info('Send job to gather queue: %r', data_dict)
job_id = logic.get_or_bust(data_dict, 'id')
job = toolkit.get_action('harvest_job_show')(
context, {'id': job_id})
check_access('harvest_send_job_to_gather_queue', context, job)
# gather queue
publisher = get_gather_publisher()
# Check the source is active
source = harvest_source_show(context, {'id': job['source_id']})
if not source['active']:
raise toolkit.ValidationError('Source is not active')
job_obj = HarvestJob.get(job['id'])
job_obj.status = job['status'] = u'Running'
job_obj.save()
publisher.send({'harvest_job_id': job['id']})
log.info('Sent job %s to the gather queue', job['id'])
return harvest_job_dictize(job_obj, context)
示例12: harvest_job_exists
def harvest_job_exists(value, context):
'''Check if a harvest job exists and returns the model if it does'''
result = HarvestJob.get(value)
if not result:
raise Invalid('Harvest Job with id %r does not exist.' % str(value))
return result
示例13: harvest_job_report
def harvest_job_report(context, data_dict):
check_access('harvest_job_show', context, data_dict)
model = context['model']
id = data_dict.get('id')
job = HarvestJob.get(id)
if not job:
raise NotFound
report = {
'gather_errors': [],
'object_errors': {}
}
# Gather errors
q = model.Session.query(harvest_model.HarvestGatherError) \
.join(harvest_model.HarvestJob) \
.filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestGatherError.created.desc())
for error in q.all():
report['gather_errors'].append({
'message': error.message
})
# Object errors
# Check if the harvester for this job's source has a method for returning
# the URL to the original document
original_url_builder = None
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == job.source.type:
if hasattr(harvester, 'get_original_url'):
original_url_builder = harvester.get_original_url
q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
.join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
for error, guid in q.all():
if not error.harvest_object_id in report['object_errors']:
report['object_errors'][error.harvest_object_id] = {
'guid': guid,
'errors': []
}
if original_url_builder:
url = original_url_builder(error.harvest_object_id)
if url:
report['object_errors'][error.harvest_object_id]['original_url'] = url
report['object_errors'][error.harvest_object_id]['errors'].append({
'message': error.message,
'line': error.line,
'type': error.stage
})
return report
示例14: harvest_job_create
def harvest_job_create(context, data_dict):
'''
Creates a Harvest Job for a Harvest Source and runs it (by putting it on
the gather queue)
:param source_id: id of the harvest source to create a job for
:type source_id: string
:param run: whether to also run it or not (default: True)
:type run: bool
'''
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create', context, data_dict)
source_id = data_dict['source_id']
run_it = data_dict.get('run', True)
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise toolkit.NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s',
source_id)
raise HarvestSourceInactiveError('Can not create jobs on inactive sources')
# Check if there already is an unrun or currently running job for this
# source
exists = _check_for_existing_jobs(context, source_id)
if exists:
log.warn('There is already an unrun job %r for this source %s',
exists, source_id)
raise HarvestJobExists('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
if run_it:
toolkit.get_action('harvest_send_job_to_gather_queue')(
context, {'id': job.id})
return harvest_job_dictize(job, context)
示例15: test_harvester_4gather_oaipmh
def test_harvester_4gather_oaipmh(self):
self.harv = MetadataHarvester()
self.harv.config = "{}"
harvest_job = HarvestJob()
harvest_job.source = HarvestSource()
harvest_job.source.title = "Test"
harvest_job.source.url = "http://foo"
harvest_job.source.type = "Metadata"
client = CKANServer()
metadata_registry = metadata.MetadataRegistry()
metadata_registry.registerReader('oai_dc', oai_dc_reader)
metadata_registry.registerWriter('oai_dc', oai_dc_writer)
serv = BatchingServer(client, metadata_registry=metadata_registry)
oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
self.gathered = self.harv.gather_stage(harvest_job)
self.assert_(len(self.gathered) > 1)
self.assert_(isinstance(self.harv.harvester, OAIPMHHarvester))