当前位置: 首页>>代码示例>>Python>>正文


Python Document.by_id方法代码示例

本文整理汇总了Python中aleph.model.Document.by_id方法的典型用法代码示例。如果您正苦于以下问题:Python Document.by_id方法的具体用法?Python Document.by_id怎么用?Python Document.by_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在aleph.model.Document的用法示例。


在下文中一共展示了Document.by_id方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: index_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    try:
        log.info("Index document: %r", document)
        data = document.to_index_dict()
        data['entities'] = generate_entities(document)
        data['title_latin'] = latinize_text(data.get('title'))
        data['summary_latin'] = latinize_text(data.get('summary'))
        get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
                       id=document.id)

        clear_children(document)
        if document.type == Document.TYPE_TEXT:
            bulk(get_es(), generate_pages(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)

        if document.type == Document.TYPE_TABULAR:
            bulk(get_es(), generate_records(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)
    except Exception as ex:
        log.exception(ex)
        process.exception(process.INDEX, component=__name__,
                          document_id=document.id, meta=document.meta,
                          source_id=document.source_id, exception=ex)
开发者ID:01-,项目名称:aleph,代码行数:29,代码来源:__init__.py

示例2: get_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def get_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        raise NotFound()
    readable = [c for c in document.collection_ids if authz.collection_read(c)]
    authz.require(len(readable))
    return document
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:9,代码来源:util.py

示例3: index_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
    clear_session()
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Index document: %r", document)
    data = document.to_dict()
    data['entities'] = generate_entities(document)
    data['title_latin'] = latinize_text(data.get('title'))
    data['summary_latin'] = latinize_text(data.get('summary'))
    es.index(index=es_index, doc_type=TYPE_DOCUMENT, body=data,
             id=document.id)
    clear_children(document)

    try:
        if document.type == Document.TYPE_TEXT:
            bulk(es, generate_pages(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)

        if document.type == Document.TYPE_TABULAR:
            bulk(es, generate_records(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)
    except Exception as ex:
        log.exception(ex)
开发者ID:DavidLemayian,项目名称:aleph,代码行数:27,代码来源:__init__.py

示例4: analyze_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
    clear_session()
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Analyze document: %r", document)
    for cls in get_analyzers():
        cls().analyze(document, document.meta)
    index_document(document_id)
开发者ID:DavidLemayian,项目名称:aleph,代码行数:12,代码来源:__init__.py

示例5: analyze_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Analyze document: %r", document)
    try:
        for cls in get_analyzers():
            cls().analyze(document, document.meta)
    except Exception as ex:
        log.exception(ex)
    index_document(document_id)
开发者ID:backgroundcheck,项目名称:aleph,代码行数:14,代码来源:__init__.py

示例6: _load_parent

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def _load_parent(collection_id, meta):
    """Determine the parent document for the document that is to be
    ingested."""
    parent_id = meta.get('parent_id')
    if parent_id is None:
        return
    parent = Document.by_id(parent_id, collection_id=collection_id)
    if parent is None:
        raise BadRequest(response=jsonify({
            'status': 'error',
            'message': 'Cannot load parent document'
        }, status=400))
    return parent_id
开发者ID:pudo,项目名称:aleph,代码行数:15,代码来源:ingest_api.py

示例7: index_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Index document: %r", document)
    data = document.to_index_dict()
    data['entities'] = generate_entities(document)
    data['title_latin'] = latinize_text(data.get('title'))
    data['summary_latin'] = latinize_text(data.get('summary'))
    get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
                   id=document.id)

    clear_records(document)
    bulk_op(generate_records(document))
开发者ID:andkamau,项目名称:aleph,代码行数:17,代码来源:__init__.py

示例8: analyze_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Analyze document: %r", document)
    for cls in get_analyzers():
        try:
            cls().analyze(document, document.meta)
        except Exception as ex:
            log.exception(ex)
            process.exception(process.ANALYZE, component=cls.__name__,
                              document_id=document.id, meta=document.meta,
                              source_id=document.source_id, exception=ex)
    index_document(document_id)
开发者ID:stefanw,项目名称:aleph,代码行数:17,代码来源:__init__.py

示例9: index_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Index document: %r", document)
    data = document.to_index_dict()
    data['entities'] = generate_entities(document)
    data['title_latin'] = latinize_text(data.get('title'))
    data['summary_latin'] = latinize_text(data.get('summary'))
    get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
                   id=document.id)

    clear_records(document)
    bulk(get_es(), generate_records(document), stats_only=True,
         chunk_size=2000, request_timeout=60.0)
开发者ID:stefanw,项目名称:aleph,代码行数:18,代码来源:__init__.py

示例10: generate_entity_references

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def generate_entity_references(entity):
    if entity.state != Entity.STATE_ACTIVE:
        return
    # This is all a bit hacky: we're re-generating all the entity
    # references for the given entity by effectively re-implementing
    # the RegexEntityAnalyzer. The alternative was to conduct a
    # search for potential matching documents, re-analyze them and
    # re-index them. This proved to be too slow in reality.

    log.info("Updating document references: %r", entity)
    rex = '|'.join(entity.regex_terms)
    rex = re.compile('( |^)(%s)( |$)' % rex)

    documents = defaultdict(int)
    try:
        for document_id, text in scan_entity_mentions(entity):
            text = normalize_strong(text)
            if text is None or len(text) <= 2:
                continue
            for match in rex.finditer(text):
                documents[document_id] += 1
    except Exception:
        log.exception('Failed to fully scan documents for entity refresh.')

    q = db.session.query(Reference)
    q = q.filter(Reference.entity_id == entity.id)
    q = q.filter(Reference.origin == 'regex')
    q.delete(synchronize_session='fetch')

    log.info("Re-matching %r gave %r documents.", entity,
             len(documents))

    for document_id, weight in documents.items():
        doc = Document.by_id(document_id)
        if doc is None:
            continue
        ref = Reference()
        ref.document_id = document_id
        ref.entity_id = entity.id
        ref.origin = 'regex'
        ref.weight = weight
        db.session.add(ref)

    db.session.commit()
    delete_entity_references(entity.id)
    update_entity_references(entity.id)
开发者ID:andkamau,项目名称:aleph,代码行数:48,代码来源:__init__.py

示例11: generate_entity_references

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def generate_entity_references(entity):
    if entity.state != Entity.STATE_ACTIVE:
        return

    rex = '|'.join(entity.regex_terms)
    rex = re.compile('( |^)(%s)( |$)' % rex)

    documents = defaultdict(int)
    try:
        for document_id, text in scan_entity_mentions(entity):
            text = normalize_strong(text)
            if text is None or len(text) <= 2:
                continue
            for match in rex.finditer(text):
                documents[document_id] += 1
    except Exception:
        log.exception('Failed to fully scan documents for entity refresh.')

    q = db.session.query(Reference)
    q = q.filter(Reference.entity_id == entity.id)
    q = q.filter(Reference.origin == 'regex')
    q.delete(synchronize_session='fetch')

    log.info("Re-matching %r gave %r documents.", entity,
             len(documents))

    for document_id, weight in documents.items():
        doc = Document.by_id(document_id)
        if doc is None:
            continue
        ref = Reference()
        ref.document_id = document_id
        ref.entity_id = entity.id
        ref.origin = 'regex'
        ref.weight = weight
        db.session.add(ref)

    db.session.commit()
    delete_entity_references(entity.id)
    q = db.session.query(func.distinct(Reference.document_id))
    q = q.filter(Reference.entity_id == entity.id)
    for document_id, in q:
        index_document(document_id, index_records=False)
开发者ID:backgroundcheck,项目名称:aleph,代码行数:45,代码来源:__init__.py

示例12: analyze_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Analyze document: %r", document)
    analyzers = []
    meta = document.meta
    for cls in get_analyzers():
        try:
            analyzer = cls(document, meta)
            analyzer.prepare()
            analyzers.append(analyzer)
        except Exception as ex:
            log.exception(ex)

    if document.type == Document.TYPE_TEXT:
        for page in document.pages:
            for analyzer in analyzers:
                analyzer.on_page(page)
            for text in page.text_parts():
                for analyzer in analyzers:
                    analyzer.on_text(text)

    if document.type == Document.TYPE_TABULAR:
        for record in document.records:
            for analyzer in analyzers:
                analyzer.on_record(record)
            for text in record.text_parts():
                for analyzer in analyzers:
                    analyzer.on_text(text)

    for analyzer in analyzers:
        try:
            analyzer.finalize()
        except Exception as ex:
            log.exception(ex)
    document.meta = meta
    db.session.add(document)
    db.session.commit()
    index_document(document_id)
开发者ID:andkamau,项目名称:aleph,代码行数:43,代码来源:__init__.py

示例13: get_document

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def get_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        raise NotFound()
    authz.require(authz.source_read(document.source_id))
    return document
开发者ID:01-,项目名称:aleph,代码行数:8,代码来源:util.py

示例14: analyze_document_id

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def analyze_document_id(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    analyze_document(document)
开发者ID:nivertech,项目名称:aleph,代码行数:8,代码来源:__init__.py

示例15: index_document_id

# 需要导入模块: from aleph.model import Document [as 别名]
# 或者: from aleph.model.Document import by_id [as 别名]
def index_document_id(document_id, index_records=True):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    index_document(document)
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:8,代码来源:documents.py


注:本文中的aleph.model.Document.by_id方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。