本文整理汇总了Python中invenio_indexer.api.RecordIndexer类的典型用法代码示例。如果您正苦于以下问题:Python RecordIndexer类的具体用法?Python RecordIndexer怎么用?Python RecordIndexer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RecordIndexer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index_after_commit
def index_after_commit(sender, changes):
"""Index a record in ES after it was committed to the DB.
This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
because, despite the name, at that point we are not yet sure whether the record
has been really committed to the DB.
"""
indexer = RecordIndexer()
for model_instance, change in changes:
if isinstance(model_instance, RecordMetadata):
if change in ('insert', 'update') and not model_instance.json.get("deleted"):
if hasattr(model_instance, '_enhanced_record'):
record = model_instance._enhanced_record
else:
record = model_instance.json
indexer.index(InspireRecord(record, model_instance))
else:
try:
indexer.delete(InspireRecord(
model_instance.json, model_instance))
except NotFoundError:
# Record not found in ES
LOGGER.debug('Record %s not found in ES',
model_instance.json.get("id"))
pass
pid_type = get_pid_type_from_schema(model_instance.json['$schema'])
pid_value = model_instance.json['control_number']
db_version = model_instance.version_id
index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
示例2: continuous_migration
def continuous_migration():
"""Task to continuously migrate what is pushed up by Legacy."""
indexer = RecordIndexer()
redis_url = current_app.config.get('CACHE_REDIS_URL')
r = StrictRedis.from_url(redis_url)
try:
while r.llen('legacy_records'):
raw_record = r.lpop('legacy_records')
if raw_record:
# FIXME use migrate_and_insert_record(raw_record)
# The record might be None, in case a parallel
# continuous_migration task has already consumed the queue.
raw_record = zlib.decompress(raw_record)
record = marc_create_record(raw_record, keep_singletons=False)
recid = int(record['001'][0])
prod_record = InspireProdRecords(recid=recid)
prod_record.marcxml = raw_record
json_record = create_record(record)
with db.session.begin_nested():
try:
record = record_upsert(json_record)
except ValidationError as e:
# Invalid record, will not get indexed
errors = "ValidationError: Record {0}: {1}".format(
recid, e
)
prod_record.valid = False
prod_record.errors = errors
db.session.merge(prod_record)
continue
indexer.index_by_id(record.id)
finally:
db.session.commit()
db.session.close()
示例3: store_record
def store_record(obj, *args, **kwargs):
"""Create and index new record in main record space."""
assert "$schema" in obj.data, "No $schema attribute found!"
# Create record
# FIXME: Do some preprocessing of obj.data before creating a record so that
# we're sure that the schema will be validated without touching the full
# holdingpen stack.
record = Record.create(obj.data, id_=None)
# Create persistent identifier.
pid = inspire_recid_minter(str(record.id), record)
# Commit any changes to record
record.commit()
# Dump any changes to record
obj.data = record.dumps()
# Commit to DB before indexing
db.session.commit()
# Index record
indexer = RecordIndexer()
indexer.index_by_id(pid.object_uuid)
示例4: test_reindex
def test_reindex(app, script_info):
"""Test reindex."""
# load records
with app.test_request_context():
runner = CliRunner()
rec_uuid = uuid.uuid4()
data = {'title': 'Test0'}
record = Record.create(data, id_=rec_uuid)
db.session.commit()
# Initialize queue
res = runner.invoke(cli.queue, ['init', 'purge'],
obj=script_info)
assert 0 == res.exit_code
res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
assert 0 == res.exit_code
res = runner.invoke(cli.run, [], obj=script_info)
assert 0 == res.exit_code
sleep(5)
indexer = RecordIndexer()
index, doc_type = indexer.record_to_index(record)
res = current_search_client.get(index=index, doc_type=doc_type,
id=rec_uuid)
assert res['found']
# Destroy queue
res = runner.invoke(cli.queue, ['delete'],
obj=script_info)
assert 0 == res.exit_code
示例5: load_records
def load_records(app, filename, schema, tries=5):
"""Try to index records."""
indexer = RecordIndexer()
records = []
with app.app_context():
with mock.patch('invenio_records.api.Record.validate',
return_value=None):
data_filename = pkg_resources.resource_filename(
'invenio_records', filename)
records_data = load(data_filename)
with db.session.begin_nested():
for item in records_data:
record_id = uuid.uuid4()
item_dict = dict(marc21.do(item))
item_dict['$schema'] = schema
recid_minter(record_id, item_dict)
oaiid_minter(record_id, item_dict)
record = Record.create(item_dict, id_=record_id)
indexer.index(record)
records.append(record.id)
db.session.commit()
# Wait for indexer to finish
for i in range(tries):
response = current_search_client.search()
if response['hits']['total'] >= len(records):
break
current_search.flush_and_refresh('_all')
return records
示例6: glossary_terms
def glossary_terms():
"""Load demo terms records."""
from invenio_db import db
from invenio_records import Record
from invenio_indexer.api import RecordIndexer
from cernopendata.modules.records.minters.termid import \
cernopendata_termid_minter
indexer = RecordIndexer()
schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
'records/glossary-term-v1.0.0.json'
)
data = pkg_resources.resource_filename('cernopendata',
'modules/fixtures/data')
glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))
for filename in glossary_terms_json:
click.echo('Loading glossary-terms from {0} ...'.format(filename))
with open(filename, 'rb') as source:
for data in json.load(source):
if "collections" not in data and \
not isinstance(
data.get("collections", None), basestring):
data["collections"] = []
data["collections"].append({"primary": "Terms"})
id = uuid.uuid4()
cernopendata_termid_minter(id, data)
data['$schema'] = schema
record = Record.create(data, id_=id)
db.session.commit()
indexer.index(record)
db.session.expunge_all()
示例7: records
def records():
"""Load records."""
import pkg_resources
import uuid
from dojson.contrib.marc21 import marc21
from dojson.contrib.marc21.utils import create_record, split_blob
from invenio_pidstore import current_pidstore
from invenio_records.api import Record
# pkg resources the demodata
data_path = pkg_resources.resource_filename(
'invenio_records', 'data/marc21/bibliographic.xml'
)
with open(data_path) as source:
indexer = RecordIndexer()
with db.session.begin_nested():
for index, data in enumerate(split_blob(source.read()), start=1):
# create uuid
rec_uuid = uuid.uuid4()
# do translate
record = marc21.do(create_record(data))
# create PID
current_pidstore.minters['recid_minter'](
rec_uuid, record
)
# create record
indexer.index(Record.create(record, id_=rec_uuid))
db.session.commit()
示例8: test_indexer_bulk_index
def test_indexer_bulk_index(app, queue):
"""Test delay indexing."""
with app.app_context():
with establish_connection() as c:
indexer = RecordIndexer()
id1 = uuid.uuid4()
id2 = uuid.uuid4()
indexer.bulk_index([id1, id2])
indexer.bulk_delete([id1, id2])
consumer = Consumer(
connection=c,
queue=indexer.mq_queue.name,
exchange=indexer.mq_exchange.name,
routing_key=indexer.mq_routing_key)
messages = list(consumer.iterqueue())
[m.ack() for m in messages]
assert len(messages) == 4
data0 = messages[0].decode()
assert data0['id'] == str(id1)
assert data0['op'] == 'index'
data2 = messages[2].decode()
assert data2['id'] == str(id1)
assert data2['op'] == 'delete'
示例9: test_basic_search
def test_basic_search(app, db, es):
"""Test basic search functionality."""
# The index should be empty
assert len(ItemSearch().execute()) == 0
# Create item1, search for everything
item1 = Item.create({})
item1.commit()
record_indexer = RecordIndexer()
record_indexer.index(item1)
current_search.flush_and_refresh('_all')
assert len(ItemSearch().execute()) == 1
# Create item2, search for everything again
item2 = Item.create({'foo': 'bar'})
item2.commit()
record_indexer.index(item2)
current_search.flush_and_refresh('_all')
assert len(ItemSearch().execute()) == 2
# Search for item2
assert len(ItemSearch().query('match', foo='bar').execute()) == 1
# Search for nonsense
assert len(ItemSearch().query('match', foo='banana').execute()) == 0
示例10: migrate_chunk
def migrate_chunk(chunk, broken_output=None, dry_run=False):
from invenio_indexer.api import RecordIndexer
from ..pidstore.minters import inspire_recid_minter
indexer = RecordIndexer()
index_queue = []
for raw_record in chunk:
record = marc_create_record(raw_record, keep_singletons=False)
json_record = create_record(record)
if '$schema' in json_record:
json_record['$schema'] = url_for(
'invenio_jsonschemas.get_schema',
schema_path="records/{0}".format(json_record['$schema'])
)
rec_uuid = str(Record.create(json_record, id_=None).id)
# Create persistent identifier.
pid = inspire_recid_minter(rec_uuid, json_record)
index_queue.append(pid.object_uuid)
db.session.commit()
# Request record indexing
for i in index_queue:
indexer.index_by_id(i)
# Send task to migrate files.
return rec_uuid
示例11: records
def records():
"""Load records."""
import pkg_resources
import uuid
from flask_login import login_user, logout_user
from dojson.contrib.marc21 import marc21
from dojson.contrib.marc21.utils import create_record, split_blob
from invenio_accounts.models import User
from invenio_deposit.api import Deposit
users = User.query.all()
# pkg resources the demodata
data_path = pkg_resources.resource_filename(
'invenio_records', 'data/marc21/bibliographic.xml'
)
with open(data_path) as source:
with current_app.test_request_context():
indexer = RecordIndexer()
with db.session.begin_nested():
for index, data in enumerate(split_blob(source.read()),
start=1):
login_user(users[index % len(users)])
# do translate
record = marc21.do(create_record(data))
# create record
indexer.index(Deposit.create(record))
logout_user()
db.session.commit()
示例12: records
def records():
"""Load test data fixture."""
import uuid
from invenio_records.api import Record
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
create_test_user()
indexer = RecordIndexer()
# Record 1 - Live record
with db.session.begin_nested():
rec_uuid = uuid.uuid4()
pid1 = PersistentIdentifier.create(
'recid', '1', object_type='rec', object_uuid=rec_uuid,
status=PIDStatus.REGISTERED)
Record.create({
'title': 'Registered',
'description': 'This is an awesome description',
'control_number': '1',
'access_right': 'restricted',
'access_conditions': 'fuu',
'owners': [1, 2],
'recid': 1
}, id_=rec_uuid)
indexer.index_by_id(pid1.object_uuid)
db.session.commit()
sleep(3)
示例13: record_not_yet_deleted
def record_not_yet_deleted(app):
snippet = (
'<record>'
' <controlfield tag="001">333</controlfield>'
' <controlfield tag="005">20160913214552.0</controlfield>'
' <datafield tag="980" ind1=" " ind2=" ">'
' <subfield code="a">HEP</subfield>'
' </datafield>'
'</record>'
)
with app.app_context():
json_record = hep.do(create_record(snippet))
json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'
with db.session.begin_nested():
record = record_upsert(json_record)
if record:
ri = RecordIndexer()
ri.index(record)
db.session.commit()
yield
with app.app_context():
_delete_record_from_everywhere('literature', 333)
示例14: update_expired_embargoes
def update_expired_embargoes():
"""Release expired embargoes every midnight."""
logger = current_app.logger
base_url = urlunsplit((
current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
current_app.config['JSONSCHEMAS_HOST'],
current_app.config.get('APPLICATION_ROOT') or '', '', ''
))
# The task needs to run in a request context as JSON Schema validation
# will use url_for.
with current_app.test_request_context('/', base_url=base_url):
s = B2ShareRecordsSearch(
using=current_search_client,
index='records'
).query(
'query_string',
query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
datetime.now(timezone.utc).isoformat()
),
allow_leading_wildcard=False
).fields([])
record_ids = [hit.meta.id for hit in s.scan()]
if record_ids:
logger.info('Changing access of {} embargoed publications'
' to public.'.format(len(record_ids)))
for record in Record.get_records(record_ids):
logger.debug('Making embargoed publication {} public'.format(
record.id))
record['open_access'] = True
record.commit()
db.session.commit()
indexer = RecordIndexer()
indexer.bulk_index(record_ids)
indexer.process_bulk_queue()
示例15: data_policies
def data_policies(skip_files):
"""Load demo Data Policy records."""
from invenio_db import db
from invenio_indexer.api import RecordIndexer
from cernopendata.modules.records.minters.recid import \
cernopendata_recid_minter
from invenio_files_rest.models import \
Bucket, FileInstance, ObjectVersion
from invenio_records_files.models import RecordsBuckets
from invenio_records_files.api import Record
from invenio_records.models import RecordMetadata
indexer = RecordIndexer()
schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
'records/data-policies-v1.0.0.json'
)
data = pkg_resources.resource_filename('cernopendata',
'modules/fixtures/data')
data_policies_json = glob.glob(os.path.join(data, '*.json'))
for filename in data_policies_json:
click.echo('Loading data-policies from {0} ...'.format(filename))
with open(filename, 'rb') as source:
for data in json.load(source):
files = data.pop('files', [])
id = uuid.uuid4()
cernopendata_recid_minter(id, data)
data['$schema'] = schema
record = Record.create(data, id_=id)
bucket = Bucket.create()
RecordsBuckets.create(
record=record.model, bucket=bucket)
for file in files:
if skip_files:
break
assert 'uri' in file
assert 'size' in file
assert 'checksum' in file
f = FileInstance.create()
filename = file.get("uri").split('/')[-1:][0]
f.set_uri(file.get("uri"), file.get(
"size"), file.get("checksum"))
ObjectVersion.create(
bucket,
filename,
_file_id=f.id
)
db.session.commit()
indexer.index(record)
db.session.expunge_all()