本文整理汇总了Python中ckan.lib.search.index.PackageSearchIndex类的典型用法代码示例。如果您正苦于以下问题:Python PackageSearchIndex类的具体用法?Python PackageSearchIndex怎么用?Python PackageSearchIndex使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PackageSearchIndex类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: harvest_sources_reindex
def harvest_sources_reindex(context, data_dict):
"""
Reindexes all harvest source datasets with the latest status
"""
log.info("Reindexing all harvest sources")
check_access("harvest_sources_reindex", context, data_dict)
model = context["model"]
packages = (
model.Session.query(model.Package)
.filter(model.Package.type == DATASET_TYPE_NAME)
.filter(model.Package.state == u"active")
.all()
)
package_index = PackageSearchIndex()
reindex_context = {"defer_commit": True}
for package in packages:
get_action("harvest_source_reindex")(reindex_context, {"id": package.id})
package_index.commit()
return True
示例2: harvest_sources_reindex
def harvest_sources_reindex(context, data_dict):
"""
Reindexes all harvest source datasets with the latest status
"""
log.info("Reindexing all harvest sources")
check_access("harvest_sources_reindex", context, data_dict)
model = context["model"]
packages = (
model.Session.query(model.Package)
.filter(model.Package.type == DATASET_TYPE_NAME)
.filter(model.Package.state == u"active")
.all()
)
package_index = PackageSearchIndex()
for package in packages:
if "extras_as_string" in context:
del context["extras_as_string"]
context.update({"validate": False, "ignore_auth": True})
package_dict = logic.get_action("package_show")(context, {"id": package.id})
log.debug("Updating search index for harvest source {0}".format(package.id))
package_index.index_package(package_dict, defer_commit=True)
package_index.commit()
log.info("Updated search index for {0} harvest sources".format(len(packages)))
示例3: harvest_sources_reindex
def harvest_sources_reindex(context, data_dict):
'''
Reindexes all harvest source datasets with the latest status
'''
log.info('Reindexing all harvest sources')
check_access('harvest_sources_reindex', context, data_dict)
model = context['model']
packages = model.Session.query(model.Package) \
.filter(model.Package.type==DATASET_TYPE_NAME) \
.filter(model.Package.state==u'active') \
.all()
package_index = PackageSearchIndex()
for package in packages:
if 'extras_as_string'in context:
del context['extras_as_string']
context.update({'ignore_auth': True})
package_dict = logic.get_action('harvest_source_show')(context,
{'id': package.id})
log.debug('Updating search index for harvest source {0}'.format(package.id))
package_index.index_package(package_dict, defer_commit=True)
package_index.commit()
log.info('Updated search index for {0} harvest sources'.format(len(packages)))
示例4: harvest_source_reindex
def harvest_source_reindex(context, data_dict):
'''Reindex a single harvest source'''
harvest_source_id = logic.get_or_bust(data_dict, 'id')
defer_commit = context.get('defer_commit', False)
if 'extras_as_string'in context:
del context['extras_as_string']
context.update({'ignore_auth': True})
package_dict = logic.get_action('harvest_source_show')(
context, {'id': harvest_source_id})
log.debug('Updating search index for harvest source: %s',
package_dict.get('name') or harvest_source_id)
# Remove configuration values
new_dict = {}
if package_dict.get('config'):
config = json.loads(package_dict['config'])
for key, value in package_dict.iteritems():
if key not in config:
new_dict[key] = value
package_index = PackageSearchIndex()
package_index.index_package(new_dict, defer_commit=defer_commit)
return True
示例5: harvest_source_clear
def harvest_source_clear(context, data_dict):
"""
Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself.
This is useful to clean history of long running harvest sources to start again fresh.
:param id: the id of the harvest source to clear
:type id: string
"""
check_access("harvest_source_clear", context, data_dict)
harvest_source_id = data_dict.get("id", None)
source = HarvestSource.get(harvest_source_id)
if not source:
log.error("Harvest source %s does not exist", harvest_source_id)
raise NotFound("Harvest source %s does not exist" % harvest_source_id)
harvest_source_id = source.id
# Clear all datasets from this source from the index
harvest_source_index_clear(context, data_dict)
sql = """begin; update package set state = 'to_delete' where id in (select package_id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object_error where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object_extra where harvest_object_id in (select id from harvest_object where harvest_source_id = '{harvest_source_id}');
delete from harvest_object where harvest_source_id = '{harvest_source_id}';
delete from harvest_gather_error where harvest_job_id in (select id from harvest_job where source_id = '{harvest_source_id}');
delete from harvest_job where source_id = '{harvest_source_id}';
delete from package_role where package_id in (select id from package where state = 'to_delete' );
delete from user_object_role where id not in (select user_object_role_id from package_role) and context = 'Package';
delete from resource_revision where resource_group_id in (select id from resource_group where package_id in (select id from package where state = 'to_delete'));
delete from resource_group_revision where package_id in (select id from package where state = 'to_delete');
delete from package_tag_revision where package_id in (select id from package where state = 'to_delete');
delete from member_revision where table_id in (select id from package where state = 'to_delete');
delete from package_extra_revision where package_id in (select id from package where state = 'to_delete');
delete from package_revision where id in (select id from package where state = 'to_delete');
delete from package_tag where package_id in (select id from package where state = 'to_delete');
delete from resource where resource_group_id in (select id from resource_group where package_id in (select id from package where state = 'to_delete'));
delete from package_extra where package_id in (select id from package where state = 'to_delete');
delete from member where table_id in (select id from package where state = 'to_delete');
delete from resource_group where package_id in (select id from package where state = 'to_delete');
delete from package where id in (select id from package where state = 'to_delete'); commit;""".format(
harvest_source_id=harvest_source_id
)
model = context["model"]
model.Session.execute(sql)
# Refresh the index for this source to update the status object
context.update({"validate": False, "ignore_auth": True})
package_dict = logic.get_action("package_show")(context, {"id": harvest_source_id})
if package_dict:
package_index = PackageSearchIndex()
package_index.index_package(package_dict)
return {"id": harvest_source_id}
示例6: _update_search_index
def _update_search_index(package_id, log):
'''
Tells CKAN to update its search index for a given package.
'''
from ckan import model
from ckan.lib.search.index import PackageSearchIndex
package_index = PackageSearchIndex()
context_ = {'model': model, 'ignore_auth': True, 'session': model.Session,
'use_cache': False, 'validate': False}
package = toolkit.get_action('package_show')(context_, {'id': package_id})
package_index.index_package(package, defer_commit=False)
log.info('Search indexed %s', package['name'])
示例7: _update_search_index
def _update_search_index(package_id, log):
"""
Tells CKAN to update its search index for a given package.
"""
from ckan import model
from ckan.lib.search.index import PackageSearchIndex
package_index = PackageSearchIndex()
context_ = {"model": model, "ignore_auth": True, "session": model.Session, "use_cache": False, "validate": False}
package = toolkit.get_action("package_show")(context_, {"id": package_id})
package_index.index_package(package, defer_commit=False)
log.info("Search indexed %s", package["name"])
示例8: run_job_synchronously
def run_job_synchronously(self):
import datetime
from ckan import model
from ckan.plugins import PluginImplementations
from ckanext.harvest.interfaces import IHarvester
from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject
from ckanext.harvest.queue import fetch_and_import_stages
from ckan.lib.search.index import PackageSearchIndex
package_index = PackageSearchIndex()
source_id = unicode(self.args[1])
source = HarvestSource.get(source_id)
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == source.type:
break
else:
print "No harvester found to handle the job."
return
job = HarvestJob()
job.source = source
job.status = "Running"
job.gather_started = datetime.datetime.utcnow()
job.save()
try:
harvest_object_ids = harvester.gather_stage(job)
job.gather_finished = datetime.datetime.utcnow()
job.save()
for obj_id in harvest_object_ids:
obj = HarvestObject.get(obj_id)
obj.retry_times += 1
obj.save()
fetch_and_import_stages(harvester, obj)
job.finished = datetime.datetime.utcnow()
job.status = "Done"
job.save()
# And reindex the harvest source so it gets its counts right.
# Must call update on a data_dict as returned by package_show, not the class object.
package_index.index_package(get_action('package_show')({'validate': False, 'ignore_auth': True}, {'id': source.id}))
finally:
job.finished = datetime.datetime.utcnow()
if job.status != "Done": job.status = "Error"
job.save()
示例9: harvest_source_reindex
def harvest_source_reindex(context, data_dict):
"""Reindex a single harvest source"""
harvest_source_id = logic.get_or_bust(data_dict, "id")
defer_commit = context.get("defer_commit", False)
if "extras_as_string" in context:
del context["extras_as_string"]
context.update({"ignore_auth": True})
package_dict = logic.get_action("harvest_source_show")(context, {"id": harvest_source_id})
log.debug("Updating search index for harvest source {0}".format(harvest_source_id))
# Remove configuration values
new_dict = {}
if package_dict.get("config"):
config = json.loads(package_dict["config"])
for key, value in package_dict.iteritems():
if key not in config:
new_dict[key] = value
package_index = PackageSearchIndex()
package_index.index_package(new_dict, defer_commit=defer_commit)
return True
示例10: harvest_sources_reindex
def harvest_sources_reindex(context, data_dict):
'''
Reindexes all harvest source datasets with the latest status
'''
log.info('Reindexing all harvest sources')
check_access('harvest_sources_reindex', context, data_dict)
model = context['model']
packages = model.Session.query(model.Package) \
.filter(model.Package.type==DATASET_TYPE_NAME) \
.filter(model.Package.state==u'active') \
.all()
package_index = PackageSearchIndex()
reindex_context = {'defer_commit': True}
for package in packages:
get_action('harvest_source_reindex')(reindex_context, {'id': package.id})
package_index.commit()
return True
示例11: harvest_jobs_run
def harvest_jobs_run(context, data_dict):
log.info("Harvest job run: %r", data_dict)
check_access("harvest_jobs_run", context, data_dict)
session = context["session"]
source_id = data_dict.get("source_id", None)
if not source_id:
_make_scheduled_jobs(context, data_dict)
context["return_objects"] = False
# Flag finished jobs as such
jobs = harvest_job_list(context, {"source_id": source_id, "status": u"Running"})
if len(jobs):
package_index = PackageSearchIndex()
for job in jobs:
if job["gather_finished"]:
objects = (
session.query(HarvestObject.id)
.filter(HarvestObject.harvest_job_id == job["id"])
.filter(and_((HarvestObject.state != u"COMPLETE"), (HarvestObject.state != u"ERROR")))
.order_by(HarvestObject.import_finished.desc())
)
if objects.count() == 0:
job_obj = HarvestJob.get(job["id"])
job_obj.status = u"Finished"
last_object = (
session.query(HarvestObject)
.filter(HarvestObject.harvest_job_id == job["id"])
.filter(HarvestObject.import_finished != None)
.order_by(HarvestObject.import_finished.desc())
.first()
)
if last_object:
job_obj.finished = last_object.import_finished
job_obj.save()
# Reindex the harvest source dataset so it has the latest
# status
if "extras_as_string" in context:
del context["extras_as_string"]
context.update({"validate": False, "ignore_auth": True})
package_dict = logic.get_action("package_show")(context, {"id": job_obj.source.id})
if package_dict:
package_index.index_package(package_dict)
# resubmit old redis tasks
resubmit_jobs()
# Check if there are pending harvest jobs
jobs = harvest_job_list(context, {"source_id": source_id, "status": u"New"})
if len(jobs) == 0:
log.info("No new harvest jobs.")
raise Exception("There are no new harvesting jobs")
# Send each job to the gather queue
publisher = get_gather_publisher()
sent_jobs = []
for job in jobs:
context["detailed"] = False
source = harvest_source_show(context, {"id": job["source_id"]})
if source["active"]:
job_obj = HarvestJob.get(job["id"])
job_obj.status = job["status"] = u"Running"
job_obj.save()
publisher.send({"harvest_job_id": job["id"]})
log.info("Sent job %s to the gather queue" % job["id"])
sent_jobs.append(job)
publisher.close()
return sent_jobs
示例12: harvest_jobs_run
def harvest_jobs_run(context,data_dict):
log.info('Harvest job run: %r', data_dict)
check_access('harvest_jobs_run',context,data_dict)
session = context['session']
source_id = data_dict.get('source_id',None)
if not source_id:
_make_scheduled_jobs(context, data_dict)
context['return_objects'] = False
# Flag finished jobs as such
jobs = harvest_job_list(context,{'source_id':source_id,'status':u'Running'})
if len(jobs):
package_index = PackageSearchIndex()
for job in jobs:
if job['gather_finished']:
objects = session.query(HarvestObject.id) \
.filter(HarvestObject.harvest_job_id==job['id']) \
.filter(and_((HarvestObject.state!=u'COMPLETE'),
(HarvestObject.state!=u'ERROR'))) \
.order_by(HarvestObject.import_finished.desc())
if objects.count() == 0:
job_obj = HarvestJob.get(job['id'])
job_obj.status = u'Finished'
last_object = session.query(HarvestObject) \
.filter(HarvestObject.harvest_job_id==job['id']) \
.filter(HarvestObject.import_finished!=None) \
.order_by(HarvestObject.import_finished.desc()) \
.first()
if last_object:
job_obj.finished = last_object.import_finished
job_obj.save()
# Reindex the harvest source dataset so it has the latest
# status
if 'extras_as_string'in context:
del context['extras_as_string']
context.update({'validate': False, 'ignore_auth': True})
package_dict = logic.get_action('package_show')(context,
{'id': job_obj.source.id})
if package_dict:
package_index.index_package(package_dict)
# resubmit old redis tasks
resubmit_jobs()
# Check if there are pending harvest jobs
jobs = harvest_job_list(context,{'source_id':source_id,'status':u'New'})
if len(jobs) == 0:
log.info('No new harvest jobs.')
raise Exception('There are no new harvesting jobs')
# Send each job to the gather queue
publisher = get_gather_publisher()
sent_jobs = []
for job in jobs:
context['detailed'] = False
source = harvest_source_show(context,{'id':job['source_id']})
if source['active']:
job_obj = HarvestJob.get(job['id'])
job_obj.status = job['status'] = u'Running'
job_obj.save()
publisher.send({'harvest_job_id': job['id']})
log.info('Sent job %s to the gather queue' % job['id'])
sent_jobs.append(job)
publisher.close()
return sent_jobs