本文整理汇总了Python中aleph.model.Document类的典型用法代码示例。如果您正苦于以下问题:Python Document类的具体用法?Python Document怎么用?Python Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_load_pdf_file
def test_load_pdf_file(self):
pdf_path = self.get_fixture_path('demo.pdf')
document = Document.by_keys(collection_id=self.collection.id,
foreign_id='demo.pdf')
db.session.commit()
db.session.refresh(document)
ingest_document(document, pdf_path)
assert Document.all().count() == 1, Document.all().count()
示例2: test_load_sample_directory
def test_load_sample_directory(self):
samples_path = self.get_fixture_path('samples')
document = Document.by_keys(collection_id=self.collection.id,
foreign_id='samples')
db.session.commit()
db.session.refresh(document)
ingest_document(document, samples_path)
assert Document.all().count() == 5, Document.all().count()
示例3: index
def index():
sources_ids = match_ids('sources', authz.sources(authz.READ))
q = Document.all().filter(Document.source_id.in_(sources_ids))
hashes = request.args.getlist('content_hash')
if len(hashes):
q = q.filter(Document.content_hash.in_(hashes))
return jsonify(Pager(q))
示例4: index_document
def index_document(document_id):
clear_session()
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Index document: %r", document)
data = document.to_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
es.index(index=es_index, doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_children(document)
try:
if document.type == Document.TYPE_TEXT:
bulk(es, generate_pages(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
if document.type == Document.TYPE_TABULAR:
bulk(es, generate_records(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
except Exception as ex:
log.exception(ex)
示例5: foreign_id_exists
def foreign_id_exists(self, source, foreign_id):
q = Document.all_ids().filter(Document.source_id == source.id)
q = q.filter(Document.foreign_id == foreign_id)
exists = q.first() is not None
if exists:
log.info("Foreign ID exists (%s): %s", source, foreign_id)
return exists
示例6: get_document
def get_document(document_id):
document = Document.by_id(document_id)
if document is None:
raise NotFound()
readable = [c for c in document.collection_ids if authz.collection_read(c)]
authz.require(len(readable))
return document
示例7: index_document
def index_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
try:
log.info("Index document: %r", document)
data = document.to_index_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_children(document)
if document.type == Document.TYPE_TEXT:
bulk(get_es(), generate_pages(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
if document.type == Document.TYPE_TABULAR:
bulk(get_es(), generate_records(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
except Exception as ex:
log.exception(ex)
process.exception(process.INDEX, component=__name__,
document_id=document.id, meta=document.meta,
source_id=document.source_id, exception=ex)
示例8: ingest_upload
def ingest_upload(collection_id):
require(request.authz.can(collection_id, request.authz.WRITE))
sync = get_flag('sync')
meta, foreign_id = _load_metadata()
parent_id = _load_parent(collection_id, meta)
upload_dir = mkdtemp(prefix='aleph.upload.')
try:
path = None
content_hash = None
for storage in request.files.values():
path = safe_filename(storage.filename, default='upload')
path = os.path.join(upload_dir, path)
storage.save(path)
content_hash = checksum(path)
document = Document.by_keys(collection_id=collection_id,
parent_id=parent_id,
foreign_id=foreign_id,
content_hash=content_hash)
document.update(meta)
document.schema = Document.SCHEMA
if content_hash is None:
document.schema = Document.SCHEMA_FOLDER
ingest_document(document, path,
role_id=request.authz.id,
content_hash=content_hash)
finally:
shutil.rmtree(upload_dir)
if document.collection.casefile:
# Make sure collection counts are always accurate.
update_document(document, sync=sync)
return jsonify({
'status': 'ok',
'id': stringify(document.id)
}, status=201)
示例9: load_fixtures
def load_fixtures(self, file_name, process_documents=True):
filepath = os.path.abspath(os.path.join(FIXTURES, file_name))
load_fixtures(db, loaders.load(filepath))
db.session.commit()
if process_documents:
for doc_id, in Document.all_ids():
analyze_document(doc_id)
optimize_search()
示例10: load_fixtures
def load_fixtures(self, file_name, process_documents=True):
filepath = self.get_fixture_path(file_name)
load_fixtures(db, loaders.load(filepath))
db.session.commit()
if process_documents:
for doc_id, in Document.all_ids():
analyze_document(doc_id)
optimize_search()
示例11: create_document
def create_document(self, meta, type=None):
if meta.content_hash:
q = Document.all()
if meta.foreign_id:
q = q.filter(Document.foreign_id == meta.foreign_id)
else:
q = q.filter(Document.content_hash == meta.content_hash)
q = q.filter(Document.source_id == self.source_id)
document = q.first()
if document is None:
document = Document()
document.source_id = self.source_id
document.meta = meta
document.type = type or self.DOCUMENT_TYPE
db.session.add(document)
db.session.flush()
return document
示例12: load_documents
def load_documents():
graph = get_graph()
tx = graph.begin()
for i, document in enumerate(Document.all()):
load_document(tx, document)
if i > 0 and i % 1000 == 0:
tx.commit()
tx = graph.begin()
tx.commit()
示例13: index
def index():
collection_ids = match_ids('collection', authz.collections(authz.READ))
q = Document.all()
clause = Collection.id.in_(collection_ids)
q = q.filter(Document.collections.any(clause))
hashes = request.args.getlist('content_hash')
if len(hashes):
q = q.filter(Document.content_hash.in_(hashes))
return jsonify(Pager(q))
示例14: create_document
def create_document(self, meta, type=None):
if meta.content_hash:
q = Document.all()
if meta.foreign_id:
q = q.filter(Document.foreign_id == meta.foreign_id)
else:
q = q.filter(Document.content_hash == meta.content_hash)
clause = Collection.id == self.collection_id
q = q.filter(Document.collections.any(clause))
document = q.first()
if document is None:
document = Document()
document.collections = [Collection.by_id(self.collection_id)]
document.meta = meta
document.type = type or self.DOCUMENT_TYPE
db.session.add(document)
db.session.flush()
return document
示例15: load_documents
def load_documents():
graph = get_graph()
tx = graph.begin()
for i, document in enumerate(Document.all()):
log.info("Load doc [%s]: %r", document.id, document.meta)
load_document(tx, document)
if i > 0 and i % 1000 == 0:
tx.commit()
tx = graph.begin()
tx.commit()