本文整理汇总了Python中ckanext.harvest.model.HarvestSource类的典型用法代码示例。如果您正苦于以下问题:Python HarvestSource类的具体用法?Python HarvestSource怎么用?Python HarvestSource使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HarvestSource类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_gather
def test_gather(self):
source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
source.save()
job = HarvestJob(source=source)
job.save()
self.harvester.client = _FakeClient()
self.harvester.gather_stage(job)
示例2: test_import
def test_import(self):
source = HarvestSource(url="http://localhost/test_cmdi", type="cmdi")
source.save()
job = HarvestJob(source=source)
job.save()
harvest_object = self._run_import("cmdi_1.xml", job)
self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))
package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730180'})
self.assertEquals(package.get('id', None), 'http://urn.fi/urn:nbn:fi:lb-20140730180')
self.assertEquals(package.get('name', None), 'urn-nbn-fi-lb-20140730180')
self.assertEquals(package.get('notes', None), u'{"eng": "Test description"}')
self.assertEquals(package.get('version', None), '2012-09-07')
self.assertEquals(package.get('title', []), '{"eng": "Longi Corpus"}')
self.assertEquals(package.get('license_id', None), 'undernegotiation')
provider = config['ckan.site_url']
expected_pid = {u'id': u'http://islrn.org/resources/248-895-085-557-0',
u'provider': provider,
u'type': u'metadata'}
self.assertTrue(expected_pid in package.get('pids'))
model.Session.flush()
harvest_object = self._run_import("cmdi_2.xml", job)
self.assertEquals(len(harvest_object.errors), 0, u"\n".join(unicode(error.message) for error in (harvest_object.errors or [])))
package = get_action('package_show')({'user': 'harvest'}, {'id': 'urn-nbn-fi-lb-20140730186'})
self.assertEquals(package['temporal_coverage_begin'], '1880')
self.assertEquals(package['temporal_coverage_end'], '1939')
self.assertEquals(package.get('license_id', None), 'other')
# Delete package
harvest_object = HarvestObject()
harvest_object.content = None
harvest_object.id = "test-cmdi-delete"
harvest_object.guid = "test-cmdi-delete"
harvest_object.source = job.source
harvest_object.harvest_source_id = None
harvest_object.job = job
harvest_object.package_id = package.get('id')
harvest_object.report_status = "deleted"
harvest_object.save()
self.harvester.import_stage(harvest_object)
model.Session.flush()
self.assertEquals(model.Package.get(package['id']).state, 'deleted')
示例3: harvest_source_create
def harvest_source_create(context,data_dict):
log.info('Creating harvest source: %r', data_dict)
check_access('harvest_source_create',context,data_dict)
model = context['model']
session = context['session']
schema = context.get('schema') or default_harvest_source_schema()
data, errors = validate(data_dict, schema)
if errors:
session.rollback()
log.warn('Harvest source does not validate: %r', errors)
raise ValidationError(errors,_error_summary(errors))
source = HarvestSource()
source.url = data['url'].strip()
source.type = data['type']
opt = ['active','title','description','user_id','publisher_id','config']
for o in opt:
if o in data and data[o] is not None:
source.__setattr__(o,data[o])
if 'active' in data_dict:
source.active = data['active']
source.save()
log.info('Harvest source created: %s', source.id)
return harvest_source_dictize(source,context)
示例4: harvest_source_id_exists
def harvest_source_id_exists(value, context):
result = HarvestSource.get(value)
if not result:
raise Invalid('Harvest Source with id %r does not exist.' % str(value))
return value
示例5: after_show
def after_show(self, context, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
# This is a harvest source dataset, add extra info from the
# HarvestSource object
source = HarvestSource.get(data_dict['id'])
if not source:
log.error('Harvest source not found for dataset {0}'.format(data_dict['id']))
return data_dict
data_dict['status'] = harvest_logic.action.get.harvest_source_show_status(context, {'id': source.id})
elif not 'type' in data_dict or data_dict['type'] != DATASET_TYPE_NAME:
# This is a normal dataset, check if it was harvested and if so, add
# info about the HarvestObject and HarvestSource
harvest_object = model.Session.query(HarvestObject) \
.filter(HarvestObject.package_id==data_dict['id']) \
.filter(HarvestObject.current==True) \
.first()
# validate is false is passed only on indexing.
if harvest_object and not context.get('validate', True):
for key, value in [
('harvest_object_id', harvest_object.id),
('harvest_source_id', harvest_object.source.id),
('harvest_source_title', harvest_object.source.title),
]:
_add_extra(data_dict, key, value)
return data_dict
示例6: harvest_job_list
def harvest_job_list(context,data_dict):
model = context['model']
user = context.get('user')
# Check user is logged in
if not user:
return {'success': False, 'msg': _('Only logged users are authorized to see their sources')}
user_obj = User.get(user)
# Checks for non sysadmin users
if not Authorizer().is_sysadmin(user):
if not user_obj or len(user_obj.get_groups(u'publisher')) == 0:
return {'success': False, 'msg': _('User %s must belong to a publisher to list harvest jobs') % str(user)}
source_id = data_dict.get('source_id',False)
if not source_id:
return {'success': False, 'msg': _('Only sysadmins can list all harvest jobs') % str(user)}
source = HarvestSource.get(source_id)
if not source:
raise NotFound
if not source.publisher_id in [g.id for g in user_obj.get_groups(u'publisher')]:
return {'success': False, 'msg': _('User %s not authorized to list jobs from source %s') % (str(user),source.id)}
return {'success': True}
示例7: harvest_job_create
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise HarvestError('Can not create jobs on inactive sources')
# Check if there already is an unrun job for this source
data_dict ={
'source_id':source_id,
'status':u'New'
}
exists = harvest_job_list(context,data_dict)
if len(exists):
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestError('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例8: harvest_source_index_clear
def harvest_source_index_clear(context, data_dict):
'''
Clears all datasets, jobs and objects related to a harvest source, but
keeps the source itself. This is useful to clean history of long running
harvest sources to start again fresh.
:param id: the id of the harvest source to clear
:type id: string
'''
check_access('harvest_source_clear', context, data_dict)
harvest_source_id = data_dict.get('id')
source = HarvestSource.get(harvest_source_id)
if not source:
log.error('Harvest source %s does not exist', harvest_source_id)
raise NotFound('Harvest source %s does not exist' % harvest_source_id)
harvest_source_id = source.id
conn = make_connection()
query = ''' +%s:"%s" +site_id:"%s" ''' % (
'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))
try:
conn.delete_query(query)
if asbool(config.get('ckan.search.solr_commit', 'true')):
conn.commit()
except Exception, e:
log.exception(e)
raise SearchIndexError(e)
示例9: harvest_job_create
def harvest_job_create(context,data_dict):
log.info('Harvest job create: %r', data_dict)
check_access('harvest_job_create',context,data_dict)
source_id = data_dict['source_id']
# Check if source exists
source = HarvestSource.get(source_id)
if not source:
log.warn('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
# Check if the source is active
if not source.active:
log.warn('Harvest job cannot be created for inactive source %s', source_id)
raise Exception('Can not create jobs on inactive sources')
# Check if there already is an unrun or currently running job for this source
exists = _check_for_existing_jobs(context, source_id)
if exists:
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
raise HarvestJobExists('There already is an unrun job for this source')
job = HarvestJob()
job.source = source
job.save()
log.info('Harvest job saved %s', job.id)
return harvest_job_dictize(job,context)
示例10: harvest_source_show
def harvest_source_show(context,data_dict):
'''
Returns the metadata of a harvest source
This method just proxies the request to package_show. All auth checks and
validation will be done there.
:param id: the id or name of the harvest source
:type id: string
:returns: harvest source metadata
:rtype: dictionary
'''
check_access('harvest_source_show',context,data_dict)
id = data_dict.get('id')
attr = data_dict.get('attr',None)
source = HarvestSource.get(id,attr=attr)
context['source'] = source
if not source:
raise NotFound
if 'include_status' not in context:
context['include_status'] = True
return harvest_source_dictize(source,context)
示例11: harvest_source_clear
def harvest_source_clear(context, data_dict):
"""
Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself.
This is useful to clean history of long running harvest sources to start again fresh.
:param id: the id of the harvest source to clear
:type id: string
"""
check_access("harvest_source_clear", context, data_dict)
harvest_source_id = data_dict.get("id", None)
source = HarvestSource.get(harvest_source_id)
if not source:
log.error("Harvest source %s does not exist", harvest_source_id)
raise NotFound("Harvest source %s does not exist" % harvest_source_id)
harvest_source_id = source.id
# Clear all datasets from this source from the index
harvest_source_index_clear(context, data_dict)
sql = """begin; update package set state = 'to_delete' where id in (select package_id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object_error where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object_extra where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object where harvest_source_id = '{harvest_source_id}';
delete from harvest_gather_error where harvest_job_id in (select id from harvest_job where source_id = '{harvest_source_id}');
delete from harvest_job where source_id = '{harvest_source_id}';
delete from package_role where package_id in (select id from package where state = 'to_delete' );
delete from user_object_role where id not in (select user_object_role_id from package_role) and context = 'Package';
delete from resource_revision where resource_group_id in (select id from resource_group where package_id in (select id from package where state = 'to_delete'));
delete from resource_group_revision where package_id in (select id from package where state = 'to_delete');
delete from package_tag_revision where package_id in (select id from package where state = 'to_delete');
delete from member_revision where table_id in (select id from package where state = 'to_delete');
delete from package_extra_revision where package_id in (select id from package where state = 'to_delete');
delete from package_revision where id in (select id from package where state = 'to_delete');
delete from package_tag where package_id in (select id from package where state = 'to_delete');
delete from resource where resource_group_id in (select id from resource_group where package_id in (select id from package where state = 'to_delete'));
delete from package_extra where package_id in (select id from package where state = 'to_delete');
delete from member where table_id in (select id from package where state = 'to_delete');
delete from resource_group where package_id in (select id from package where state = 'to_delete');
delete from package where id in (select id from package where state = 'to_delete'); commit;""".format(
harvest_source_id=harvest_source_id
)
model = context["model"]
model.Session.execute(sql)
# Refresh the index for this source to update the status object
context.update({"validate": False, "ignore_auth": True})
package_dict = logic.get_action("package_show")(context, {"id": harvest_source_id})
if package_dict:
package_index = PackageSearchIndex()
package_index.index_package(package_dict)
return {"id": harvest_source_id}
示例12: test_form_validate_new_object_and_sync
def test_form_validate_new_object_and_sync(self):
assert not HarvestSource.get(u'http://localhost/', None, 'url')
fs = form.get_harvest_source_fieldset()
register = HarvestSource
data = {
'HarvestSource--url': u'http://localhost/',
'HarvestSource--type': u'Gemini',
'HarvestSource--description': u'My source'
}
fs = fs.bind(register, data=data, session=model.Session)
# Test bound_fields.validate().
fs.validate()
assert not fs.errors
# Test bound_fields.sync().
fs.sync()
model.Session.commit()
source = HarvestSource.get(u'http://localhost/', None, 'url')
assert source.id
示例13: setup
def setup(self):
print ("")
print ("TestUM:setup() before each test method")
# Add sysadmin user
self.harvestUser = model.User(name=u'harvest', password=u'test', sysadmin=True)
model.Session.add(self.harvestUser)
model.Session.commit()
source_fixture = {
'title': 'Test Source',
'name': 'test-source',
'url': u'xml/sample.xml',
'source_type': u'ngds'
}
context = {
'model': model,
'session': model.Session,
'user': u'harvest'
}
if config.get('ckan.harvest.auth.profile') == u'publisher' \
and not 'publisher_id' in source_fixture:
source_fixture['publisher_id'] = self.publisher.id
source_dict=get_action('harvest_source_create')(context, source_fixture)
self.oHarvestSource = HarvestSource.get(source_dict['id'])
job_dict=get_action('harvest_job_create')(context,{'source_id': self.oHarvestSource.id})
self.oHarvestJob = HarvestJob.get(job_dict['id'])
context = {
'model' : model,
'session': model.Session,
'ignore_auth': True,
}
data_dict = {
'guid' : 'guid',
'content' : self.contentDataset,
'job_id' : self.oHarvestJob.id,
'extras' : { 'a key' : 'a value' },
}
oHarvestObject = toolkit.get_action('harvest_object_create')(context, data_dict)
self.oHarvestObject = HarvestObject.get(oHarvestObject['id'])
package_schema = default_update_package_schema()
self.context = {
'model':model,
'session': model.Session,
'user':u'harvest',
'schema':package_schema,
'api_version': '2'
}
示例14: _update_harvest_source_object
def _update_harvest_source_object(context, data_dict):
'''
Updates an actual HarvestSource object with the data dict
of the harvest_source dataset. All validation and authorization
checks should be used by now, so this function is not to be used
directly to update harvest sources.
:param data_dict: A standard package data_dict
:returns: The created HarvestSource object
:rtype: HarvestSource object
'''
source_id = data_dict.get('id')
log.info('Harvest source %s update: %r', source_id, data_dict)
source = HarvestSource.get(source_id)
if not source:
log.error('Harvest source %s does not exist', source_id)
raise logic.NotFound('Harvest source %s does not exist' % source_id)
fields = ['url', 'title', 'description', 'user_id',
'publisher_id', 'frequency']
for f in fields:
if f in data_dict and data_dict[f] is not None:
if f == 'url':
data_dict[f] = data_dict[f].strip()
source.__setattr__(f,data_dict[f])
# Avoids clashes with the dataset type
if 'source_type' in data_dict:
source.type = data_dict['source_type']
if 'config' in data_dict:
source.config = data_dict['config']
# Don't change state unless explicitly set in the dict
if 'state' in data_dict:
source.active = data_dict.get('state') == 'active'
# Don't commit yet, let package_create do it
source.add()
# Abort any pending jobs
if not source.active:
jobs = HarvestJob.filter(source=source,status=u'New')
log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
if jobs:
for job in jobs:
job.status = u'Aborted'
job.add()
return source
示例15: get_source_object
def get_source_object(context, data_dict = {}):
if not 'source' in context:
model = context['model']
id = data_dict.get('id',None)
source = HarvestSource.get(id)
if not source:
raise NotFound
else:
source = context['source']
return source