本文整理汇总了Python中aleph.model.Document.by_id方法的典型用法代码示例。如果您正苦于以下问题:Python Document.by_id方法的具体用法?Python Document.by_id怎么用?Python Document.by_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类aleph.model.Document
的用法示例。
在下文中一共展示了Document.by_id方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
try:
log.info("Index document: %r", document)
data = document.to_index_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_children(document)
if document.type == Document.TYPE_TEXT:
bulk(get_es(), generate_pages(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
if document.type == Document.TYPE_TABULAR:
bulk(get_es(), generate_records(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
except Exception as ex:
log.exception(ex)
process.exception(process.INDEX, component=__name__,
document_id=document.id, meta=document.meta,
source_id=document.source_id, exception=ex)
示例2: get_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def get_document(document_id):
document = Document.by_id(document_id)
if document is None:
raise NotFound()
readable = [c for c in document.collection_ids if authz.collection_read(c)]
authz.require(len(readable))
return document
示例3: index_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
clear_session()
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Index document: %r", document)
data = document.to_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
es.index(index=es_index, doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_children(document)
try:
if document.type == Document.TYPE_TEXT:
bulk(es, generate_pages(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
if document.type == Document.TYPE_TABULAR:
bulk(es, generate_records(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
except Exception as ex:
log.exception(ex)
示例4: analyze_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
clear_session()
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Analyze document: %r", document)
for cls in get_analyzers():
cls().analyze(document, document.meta)
index_document(document_id)
示例5: analyze_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Analyze document: %r", document)
try:
for cls in get_analyzers():
cls().analyze(document, document.meta)
except Exception as ex:
log.exception(ex)
index_document(document_id)
示例6: _load_parent
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def _load_parent(collection_id, meta):
"""Determine the parent document for the document that is to be
ingested."""
parent_id = meta.get('parent_id')
if parent_id is None:
return
parent = Document.by_id(parent_id, collection_id=collection_id)
if parent is None:
raise BadRequest(response=jsonify({
'status': 'error',
'message': 'Cannot load parent document'
}, status=400))
return parent_id
示例7: index_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Index document: %r", document)
data = document.to_index_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_records(document)
bulk_op(generate_records(document))
示例8: analyze_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Analyze document: %r", document)
for cls in get_analyzers():
try:
cls().analyze(document, document.meta)
except Exception as ex:
log.exception(ex)
process.exception(process.ANALYZE, component=cls.__name__,
document_id=document.id, meta=document.meta,
source_id=document.source_id, exception=ex)
index_document(document_id)
示例9: index_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Index document: %r", document)
data = document.to_index_dict()
data['entities'] = generate_entities(document)
data['title_latin'] = latinize_text(data.get('title'))
data['summary_latin'] = latinize_text(data.get('summary'))
get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
id=document.id)
clear_records(document)
bulk(get_es(), generate_records(document), stats_only=True,
chunk_size=2000, request_timeout=60.0)
示例10: generate_entity_references
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def generate_entity_references(entity):
if entity.state != Entity.STATE_ACTIVE:
return
# This is all a bit hacky: we're re-generating all the entity
# references for the given entity by effectively re-implementing
# the RegexEntityAnalyzer. The alternative was to conduct a
# search for potential matching documents, re-analyze them and
# re-index them. This proved to be too slow in reality.
log.info("Updating document references: %r", entity)
rex = '|'.join(entity.regex_terms)
rex = re.compile('( |^)(%s)( |$)' % rex)
documents = defaultdict(int)
try:
for document_id, text in scan_entity_mentions(entity):
text = normalize_strong(text)
if text is None or len(text) <= 2:
continue
for match in rex.finditer(text):
documents[document_id] += 1
except Exception:
log.exception('Failed to fully scan documents for entity refresh.')
q = db.session.query(Reference)
q = q.filter(Reference.entity_id == entity.id)
q = q.filter(Reference.origin == 'regex')
q.delete(synchronize_session='fetch')
log.info("Re-matching %r gave %r documents.", entity,
len(documents))
for document_id, weight in documents.items():
doc = Document.by_id(document_id)
if doc is None:
continue
ref = Reference()
ref.document_id = document_id
ref.entity_id = entity.id
ref.origin = 'regex'
ref.weight = weight
db.session.add(ref)
db.session.commit()
delete_entity_references(entity.id)
update_entity_references(entity.id)
示例11: generate_entity_references
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def generate_entity_references(entity):
if entity.state != Entity.STATE_ACTIVE:
return
rex = '|'.join(entity.regex_terms)
rex = re.compile('( |^)(%s)( |$)' % rex)
documents = defaultdict(int)
try:
for document_id, text in scan_entity_mentions(entity):
text = normalize_strong(text)
if text is None or len(text) <= 2:
continue
for match in rex.finditer(text):
documents[document_id] += 1
except Exception:
log.exception('Failed to fully scan documents for entity refresh.')
q = db.session.query(Reference)
q = q.filter(Reference.entity_id == entity.id)
q = q.filter(Reference.origin == 'regex')
q.delete(synchronize_session='fetch')
log.info("Re-matching %r gave %r documents.", entity,
len(documents))
for document_id, weight in documents.items():
doc = Document.by_id(document_id)
if doc is None:
continue
ref = Reference()
ref.document_id = document_id
ref.entity_id = entity.id
ref.origin = 'regex'
ref.weight = weight
db.session.add(ref)
db.session.commit()
delete_entity_references(entity.id)
q = db.session.query(func.distinct(Reference.document_id))
q = q.filter(Reference.entity_id == entity.id)
for document_id, in q:
index_document(document_id, index_records=False)
示例12: analyze_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
log.info("Analyze document: %r", document)
analyzers = []
meta = document.meta
for cls in get_analyzers():
try:
analyzer = cls(document, meta)
analyzer.prepare()
analyzers.append(analyzer)
except Exception as ex:
log.exception(ex)
if document.type == Document.TYPE_TEXT:
for page in document.pages:
for analyzer in analyzers:
analyzer.on_page(page)
for text in page.text_parts():
for analyzer in analyzers:
analyzer.on_text(text)
if document.type == Document.TYPE_TABULAR:
for record in document.records:
for analyzer in analyzers:
analyzer.on_record(record)
for text in record.text_parts():
for analyzer in analyzers:
analyzer.on_text(text)
for analyzer in analyzers:
try:
analyzer.finalize()
except Exception as ex:
log.exception(ex)
document.meta = meta
db.session.add(document)
db.session.commit()
index_document(document_id)
示例13: get_document
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def get_document(document_id):
document = Document.by_id(document_id)
if document is None:
raise NotFound()
authz.require(authz.source_read(document.source_id))
return document
示例14: analyze_document_id
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document_id(document_id):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
analyze_document(document)
示例15: index_document_id
# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document_id(document_id, index_records=True):
document = Document.by_id(document_id)
if document is None:
log.info("Could not find document: %r", document_id)
return
index_document(document)