本文整理汇总了Python中ckanext.harvest.model.HarvestJob.get方法的典型用法代码示例。如果您正苦于以下问题:Python HarvestJob.get方法的具体用法?Python HarvestJob.get怎么用?Python HarvestJob.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ckanext.harvest.model.HarvestJob
的用法示例。
在下文中一共展示了HarvestJob.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: harvest_job_abort
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_abort(context, data_dict):
'''
Aborts a harvest job. Given a harvest source_id, it looks for the latest
one and (assuming it not already Finished) marks it as Finished. It also
marks any of that source's harvest objects and (if not complete or error)
marks them "ERROR", so any left in limbo are cleaned up. Does not actually
stop running any queued harvest fetchs/objects.
:param source_id: the name or id of the harvest source with a job to abort
:type source_id: string
'''
check_access('harvest_job_abort', context, data_dict)
model = context['model']
source_id = data_dict.get('source_id')
source = harvest_source_show(context, {'id': source_id})
# HarvestJob set status to 'Finished'
# Don not use harvest_job_list since it can use a lot of memory
last_job = model.Session.query(HarvestJob) \
.filter_by(source_id=source['id']) \
.order_by(HarvestJob.created.desc()).first()
if not last_job:
raise NotFound('Error: source has no jobs')
job = get_action('harvest_job_show')(context,
{'id': last_job.id})
if job['status'] != 'Finished':
# i.e. New or Running
job_obj = HarvestJob.get(job['id'])
job_obj.status = new_status = 'Finished'
model.repo.commit_and_remove()
log.info('Harvest job changed status from "%s" to "%s"',
job['status'], new_status)
else:
log.info('Harvest job unchanged. Source %s status is: "%s"',
job['id'], job['status'])
# HarvestObjects set to ERROR
job_obj = HarvestJob.get(job['id'])
objs = job_obj.objects
for obj in objs:
if obj.state not in ('COMPLETE', 'ERROR'):
old_state = obj.state
obj.state = 'ERROR'
log.info('Harvest object changed state from "%s" to "%s": %s',
old_state, obj.state, obj.id)
else:
log.info('Harvest object not changed from "%s": %s',
obj.state, obj.id)
model.repo.commit_and_remove()
job_obj = HarvestJob.get(job['id'])
return harvest_job_dictize(job_obj, context)
示例2: harvest_job_report
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_report(context, data_dict):
check_access('harvest_job_show', context, data_dict)
model = context['model']
id = data_dict.get('id')
job = HarvestJob.get(id)
if not job:
raise NotFound
report = {
'gather_errors': [],
'object_errors': {}
}
# Gather errors
q = model.Session.query(harvest_model.HarvestGatherError) \
.join(harvest_model.HarvestJob) \
.filter(harvest_model.HarvestGatherError.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestGatherError.created.desc())
for error in q.all():
report['gather_errors'].append({
'message': error.message
})
# Object errors
# Check if the harvester for this job's source has a method for returning
# the URL to the original document
original_url_builder = None
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == job.source.type:
if hasattr(harvester, 'get_original_url'):
original_url_builder = harvester.get_original_url
q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
.join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
for error, guid in q.all():
if not error.harvest_object_id in report['object_errors']:
report['object_errors'][error.harvest_object_id] = {
'guid': guid,
'errors': []
}
if original_url_builder:
url = original_url_builder(error.harvest_object_id)
if url:
report['object_errors'][error.harvest_object_id]['original_url'] = url
report['object_errors'][error.harvest_object_id]['errors'].append({
'message': error.message,
'line': error.line,
'type': error.stage
})
return report
示例3: harvest_send_job_to_gather_queue
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_send_job_to_gather_queue(context, data_dict):
'''
Sends a harvest job to the gather queue.
:param id: the id of the harvest job
:type id: string
'''
log.info('Send job to gather queue: %r', data_dict)
job_id = logic.get_or_bust(data_dict, 'id')
job = toolkit.get_action('harvest_job_show')(
context, {'id': job_id})
check_access('harvest_send_job_to_gather_queue', context, job)
# gather queue
publisher = get_gather_publisher()
# Check the source is active
source = harvest_source_show(context, {'id': job['source_id']})
if not source['active']:
raise toolkit.ValidationError('Source is not active')
job_obj = HarvestJob.get(job['id'])
job_obj.status = job['status'] = u'Running'
job_obj.save()
publisher.send({'harvest_job_id': job['id']})
log.info('Sent job %s to the gather queue', job['id'])
return harvest_job_dictize(job_obj, context)
示例4: harvest_job_exists
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_exists(value, context):
'''Check if a harvest job exists and returns the model if it does'''
result = HarvestJob.get(value)
if not result:
raise Invalid('Harvest Job with id %r does not exist.' % str(value))
return result
示例5: _create_job
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def _create_job(self, source_id):
# Create a job
context = {"model": model, "session": Session, "user": u"harvest"}
job_dict = get_action("harvest_job_create")(context, {"source_id": source_id})
job = HarvestJob.get(job_dict["id"])
assert job
return job
示例6: setup
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def setup(self):
print ("")
print ("TestUM:setup() before each test method")
# Add sysadmin user
self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True)
model.Session.add(self.harvestUser)
model.Session.commit()
source_fixture = {
'title': 'Test Source',
'name': 'test-source',
'url': u'xml/sample.xml',
'source_type': u'ngds'
}
context = {
'model': model,
'session': model.Session,
'user': u'harvest'
}
if config.get('ckan.harvest.auth.profile') == u'publisher' \
and not 'publisher_id' in source_fixture:
source_fixture['publisher_id'] = self.publisher.id
source_dict=get_action('harvest_source_create')(context, source_fixture)
self.oHarvestSource = HarvestSource.get(source_dict['id'])
job_dict=get_action('harvest_job_create')(context,{'source_id': self.oHarvestSource.id})
self.oHarvestJob = HarvestJob.get(job_dict['id'])
context = {
'model' : model,
'session': model.Session,
'ignore_auth': True,
}
data_dict = {
'guid' : 'guid',
'content' : self.contentDataset,
'job_id' : self.oHarvestJob.id,
'extras' : { 'a key' : 'a value' },
}
oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict)
self.oHarvestObject = HarvestObject.get(oHarvestObject['id'])
package_schema = default_update_package_schema()
self.context = {
'model':model,
'session': model.Session,
'user':u'harvest',
'schema':package_schema,
'api_version': '2'
}
示例7: _create_job
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def _create_job(self,source_id):
# Create a job
context ={'model':model,
'session':Session,
'user':u'harvest'}
job_dict=get_action('harvest_job_create')(context,{'source_id':source_id})
job = HarvestJob.get(job_dict['id'])
assert job
return job
示例8: harvest_job_report
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_report(context, data_dict):
check_access("harvest_job_show", context, data_dict)
model = context["model"]
id = data_dict.get("id")
job = HarvestJob.get(id)
if not job:
raise NotFound
report = {"gather_errors": [], "object_errors": {}}
# Gather errors
q = (
model.Session.query(harvest_model.HarvestGatherError)
.join(harvest_model.HarvestJob)
.filter(harvest_model.HarvestGatherError.harvest_job_id == job.id)
.order_by(harvest_model.HarvestGatherError.created.desc())
)
for error in q.all():
report["gather_errors"].append({"message": error.message})
# Object errors
# Check if the harvester for this job's source has a method for returning
# the URL to the original document
original_url_builder = None
for harvester in PluginImplementations(IHarvester):
if harvester.info()["name"] == job.source.type:
if hasattr(harvester, "get_original_url"):
original_url_builder = harvester.get_original_url
q = (
model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid)
.join(harvest_model.HarvestObject)
.filter(harvest_model.HarvestObject.harvest_job_id == job.id)
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
)
for error, guid in q.all():
if not error.harvest_object_id in report["object_errors"]:
report["object_errors"][error.harvest_object_id] = {"guid": guid, "errors": []}
if original_url_builder:
url = original_url_builder(error.harvest_object_id)
if url:
report["object_errors"][error.harvest_object_id]["original_url"] = url
report["object_errors"][error.harvest_object_id]["errors"].append(
{"message": error.message, "line": error.line, "type": error.stage}
)
return report
示例9: get_job_object
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def get_job_object(context, data_dict = {}):
if not 'job' in context:
model = context['model']
id = data_dict.get('id',None)
job = HarvestJob.get(id)
if not job:
raise NotFound
else:
job = context['job']
return job
示例10: harvest_job_show
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_show(context, data_dict):
check_access("harvest_job_show", context, data_dict)
id = data_dict.get("id")
attr = data_dict.get("attr", None)
job = HarvestJob.get(id, attr=attr)
if not job:
raise NotFound
return harvest_job_dictize(job, context)
示例11: harvest_job_show
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def harvest_job_show(context,data_dict):
p.toolkit.check_access('harvest_job_show',context,data_dict)
id = data_dict.get('id')
attr = data_dict.get('attr',None)
job = HarvestJob.get(id,attr=attr)
if not job:
raise NotFound
return harvest_job_dictize(job,context)
示例12: gather_callback
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def gather_callback(message_data,message):
try:
id = message_data['harvest_job_id']
log.debug('Received harvest job id: %s' % id)
# Get a publisher for the fetch queue
publisher = get_fetch_publisher()
try:
job = HarvestJob.get(id)
except:
log.error('Harvest job does not exist: %s' % id)
else:
# Send the harvest job to the plugins that implement
# the Harvester interface, only if the source type
# matches
harvester_found = False
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == job.source.type:
harvester_found = True
# Get a list of harvest object ids from the plugin
job.gather_started = datetime.datetime.now()
harvest_object_ids = harvester.gather_stage(job)
job.gather_finished = datetime.datetime.now()
job.save()
log.debug('Received from plugin''s gather_stage: %r' % harvest_object_ids)
if harvest_object_ids and len(harvest_object_ids) > 0:
for id in harvest_object_ids:
# Send the id to the fetch queue
publisher.send({'harvest_object_id':id})
log.debug('Sent object %s to the fetch queue' % id)
if not harvester_found:
msg = 'No harvester could be found for source type %s' % job.source.type
err = HarvestGatherError(message=msg,job=job)
err.save()
log.error(msg)
job.status = u'Finished'
job.save()
finally:
publisher.close()
except KeyError:
log.error('No harvest job id received')
finally:
message.ack()
示例13: run_test_harvest
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def run_test_harvest(self):
from ckanext.harvest import queue
from ckanext.harvest.tests import lib
from ckanext.harvest.logic import HarvestJobExists
from ckanext.harvest.model import HarvestJob
# Determine the source
if len(self.args) >= 2:
source_id_or_name = unicode(self.args[1])
else:
print 'Please provide a source id'
sys.exit(1)
context = {'model': model, 'session': model.Session,
'user': self.admin_user['name']}
source = get_action('harvest_source_show')(
context, {'id': source_id_or_name})
# Determine the job
try:
job_dict = get_action('harvest_job_create')(
context, {'source_id': source['id']})
except HarvestJobExists:
running_jobs = get_action('harvest_job_list')(
context, {'source_id': source['id'], 'status': 'Running'})
if running_jobs:
print '\nSource "%s" apparently has a "Running" job:\n%r' \
% (source.get('name') or source['id'], running_jobs)
resp = raw_input('Abort it? (y/n)')
if not resp.lower().startswith('y'):
sys.exit(1)
job_dict = get_action('harvest_job_abort')(
context, {'source_id': source['id']})
else:
print 'Reusing existing harvest job'
jobs = get_action('harvest_job_list')(
context, {'source_id': source['id'], 'status': 'New'})
assert len(jobs) == 1, \
'Multiple "New" jobs for this source! %r' % jobs
job_dict = jobs[0]
job_obj = HarvestJob.get(job_dict['id'])
harvester = queue.get_harvester(source['source_type'])
assert harvester, \
'No harvester found for type: %s' % source['source_type']
lib.run_harvest_job(job_obj, harvester)
示例14: test_error_mail_sent
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def test_error_mail_sent(self, mock_mailer_mail_recipient):
context, harvest_source, job = self._create_harvest_source_and_job_if_not_existing()
# create a HarvestGatherError
job_model = HarvestJob.get(job['id'])
msg = 'System error - No harvester could be found for source type %s' % job_model.source.type
err = HarvestGatherError(message=msg, job=job_model)
err.save()
status = toolkit.get_action('harvest_source_show_status')(context, {'id': harvest_source['id']})
send_error_mail(
context,
harvest_source['id'],
status
)
assert_equal(1, status['last_job']['stats']['errored'])
assert mock_mailer_mail_recipient.called
示例15: run_test_harvest
# 需要导入模块: from ckanext.harvest.model import HarvestJob [as 别名]
# 或者: from ckanext.harvest.model.HarvestJob import get [as 别名]
def run_test_harvest(self):
from ckanext.harvest import queue
from ckanext.harvest.tests import lib
from ckanext.harvest.logic import HarvestJobExists
from ckanext.harvest.model import HarvestJob
# Determine the source
if len(self.args) >= 2:
source_id_or_name = unicode(self.args[1])
else:
print "Please provide a source id"
sys.exit(1)
context = {"model": model, "session": model.Session, "user": self.admin_user["name"]}
source = get_action("harvest_source_show")(context, {"id": source_id_or_name})
# Determine the job
try:
job_dict = get_action("harvest_job_create")(context, {"source_id": source["id"]})
except HarvestJobExists:
running_jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "Running"})
if running_jobs:
print '\nSource "%s" apparently has a "Running" job:\n%r' % (
source.get("name") or source["id"],
running_jobs,
)
resp = raw_input("Abort it? (y/n)")
if not resp.lower().startswith("y"):
sys.exit(1)
job_dict = get_action("harvest_job_abort")(context, {"source_id": source["id"]})
else:
print "Reusing existing harvest job"
jobs = get_action("harvest_job_list")(context, {"source_id": source["id"], "status": "New"})
assert len(jobs) == 1, 'Multiple "New" jobs for this source! %r' % jobs
job_dict = jobs[0]
job_obj = HarvestJob.get(job_dict["id"])
harvester = queue.get_harvester(source["source_type"])
assert harvester, "No harvester found for type: %s" % source["source_type"]
lib.run_harvest_job(job_obj, harvester)