当前位置: 首页>>代码示例>>Python>>正文


Python model.Document类代码示例

本文整理汇总了Python中aleph.model.Document的典型用法代码示例。如果您正苦于以下问题:Python Document类的具体用法?Python Document怎么用?Python Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Document类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_load_pdf_file

 def test_load_pdf_file(self):
     pdf_path = self.get_fixture_path('demo.pdf')
     document = Document.by_keys(collection_id=self.collection.id,
                                 foreign_id='demo.pdf')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, pdf_path)
     assert Document.all().count() == 1, Document.all().count()
开发者ID:pudo,项目名称:aleph,代码行数:8,代码来源:test_ingest.py

示例2: test_load_sample_directory

 def test_load_sample_directory(self):
     samples_path = self.get_fixture_path('samples')
     document = Document.by_keys(collection_id=self.collection.id,
                                 foreign_id='samples')
     db.session.commit()
     db.session.refresh(document)
     ingest_document(document, samples_path)
     assert Document.all().count() == 5, Document.all().count()
开发者ID:pudo,项目名称:aleph,代码行数:8,代码来源:test_ingest.py

示例3: index

def index():
    sources_ids = match_ids('sources', authz.sources(authz.READ))
    q = Document.all().filter(Document.source_id.in_(sources_ids))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))
开发者ID:01-,项目名称:aleph,代码行数:7,代码来源:documents_api.py

示例4: index_document

def index_document(document_id):
    clear_session()
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    log.info("Index document: %r", document)
    data = document.to_dict()
    data['entities'] = generate_entities(document)
    data['title_latin'] = latinize_text(data.get('title'))
    data['summary_latin'] = latinize_text(data.get('summary'))
    es.index(index=es_index, doc_type=TYPE_DOCUMENT, body=data,
             id=document.id)
    clear_children(document)

    try:
        if document.type == Document.TYPE_TEXT:
            bulk(es, generate_pages(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)

        if document.type == Document.TYPE_TABULAR:
            bulk(es, generate_records(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)
    except Exception as ex:
        log.exception(ex)
开发者ID:DavidLemayian,项目名称:aleph,代码行数:25,代码来源:__init__.py

示例5: foreign_id_exists

 def foreign_id_exists(self, source, foreign_id):
     q = Document.all_ids().filter(Document.source_id == source.id)
     q = q.filter(Document.foreign_id == foreign_id)
     exists = q.first() is not None
     if exists:
         log.info("Foreign ID exists (%s): %s", source, foreign_id)
     return exists
开发者ID:01-,项目名称:aleph,代码行数:7,代码来源:crawler.py

示例6: get_document

def get_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        raise NotFound()
    readable = [c for c in document.collection_ids if authz.collection_read(c)]
    authz.require(len(readable))
    return document
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:7,代码来源:util.py

示例7: index_document

def index_document(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    try:
        log.info("Index document: %r", document)
        data = document.to_index_dict()
        data['entities'] = generate_entities(document)
        data['title_latin'] = latinize_text(data.get('title'))
        data['summary_latin'] = latinize_text(data.get('summary'))
        get_es().index(index=get_es_index(), doc_type=TYPE_DOCUMENT, body=data,
                       id=document.id)

        clear_children(document)
        if document.type == Document.TYPE_TEXT:
            bulk(get_es(), generate_pages(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)

        if document.type == Document.TYPE_TABULAR:
            bulk(get_es(), generate_records(document), stats_only=True,
                 chunk_size=2000, request_timeout=60.0)
    except Exception as ex:
        log.exception(ex)
        process.exception(process.INDEX, component=__name__,
                          document_id=document.id, meta=document.meta,
                          source_id=document.source_id, exception=ex)
开发者ID:01-,项目名称:aleph,代码行数:27,代码来源:__init__.py

示例8: ingest_upload

def ingest_upload(collection_id):
    require(request.authz.can(collection_id, request.authz.WRITE))
    sync = get_flag('sync')
    meta, foreign_id = _load_metadata()
    parent_id = _load_parent(collection_id, meta)
    upload_dir = mkdtemp(prefix='aleph.upload.')
    try:
        path = None
        content_hash = None
        for storage in request.files.values():
            path = safe_filename(storage.filename, default='upload')
            path = os.path.join(upload_dir, path)
            storage.save(path)
            content_hash = checksum(path)
        document = Document.by_keys(collection_id=collection_id,
                                    parent_id=parent_id,
                                    foreign_id=foreign_id,
                                    content_hash=content_hash)
        document.update(meta)
        document.schema = Document.SCHEMA
        if content_hash is None:
            document.schema = Document.SCHEMA_FOLDER
        ingest_document(document, path,
                        role_id=request.authz.id,
                        content_hash=content_hash)
    finally:
        shutil.rmtree(upload_dir)

    if document.collection.casefile:
        # Make sure collection counts are always accurate.
        update_document(document, sync=sync)
    return jsonify({
        'status': 'ok',
        'id': stringify(document.id)
    }, status=201)
开发者ID:pudo,项目名称:aleph,代码行数:35,代码来源:ingest_api.py

示例9: load_fixtures

 def load_fixtures(self, file_name, process_documents=True):
     filepath = os.path.abspath(os.path.join(FIXTURES, file_name))
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     if process_documents:
         for doc_id, in Document.all_ids():
             analyze_document(doc_id)
         optimize_search()
开发者ID:01-,项目名称:aleph,代码行数:8,代码来源:util.py

示例10: load_fixtures

 def load_fixtures(self, file_name, process_documents=True):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     if process_documents:
         for doc_id, in Document.all_ids():
             analyze_document(doc_id)
         optimize_search()
开发者ID:stefanw,项目名称:aleph,代码行数:8,代码来源:util.py

示例11: create_document

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         q = q.filter(Document.source_id == self.source_id)
         document = q.first()
     if document is None:
         document = Document()
         document.source_id = self.source_id
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document
开发者ID:stefanw,项目名称:aleph,代码行数:17,代码来源:ingestor.py

示例12: load_documents

def load_documents():
    graph = get_graph()
    tx = graph.begin()
    for i, document in enumerate(Document.all()):
        load_document(tx, document)
        if i > 0 and i % 1000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()
开发者ID:nivertech,项目名称:aleph,代码行数:9,代码来源:documents.py

示例13: index

def index():
    collection_ids = match_ids('collection', authz.collections(authz.READ))
    q = Document.all()
    clause = Collection.id.in_(collection_ids)
    q = q.filter(Document.collections.any(clause))
    hashes = request.args.getlist('content_hash')
    if len(hashes):
        q = q.filter(Document.content_hash.in_(hashes))
    return jsonify(Pager(q))
开发者ID:adamchainz,项目名称:aleph,代码行数:9,代码来源:documents_api.py

示例14: create_document

 def create_document(self, meta, type=None):
     if meta.content_hash:
         q = Document.all()
         if meta.foreign_id:
             q = q.filter(Document.foreign_id == meta.foreign_id)
         else:
             q = q.filter(Document.content_hash == meta.content_hash)
         clause = Collection.id == self.collection_id
         q = q.filter(Document.collections.any(clause))
         document = q.first()
     if document is None:
         document = Document()
         document.collections = [Collection.by_id(self.collection_id)]
     document.meta = meta
     document.type = type or self.DOCUMENT_TYPE
     db.session.add(document)
     db.session.flush()
     return document
开发者ID:adamchainz,项目名称:aleph,代码行数:18,代码来源:ingestor.py

示例15: load_documents

def load_documents():
    graph = get_graph()
    tx = graph.begin()
    for i, document in enumerate(Document.all()):
        log.info("Load doc [%s]: %r", document.id, document.meta)
        load_document(tx, document)
        if i > 0 and i % 1000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()
开发者ID:rlugojr,项目名称:aleph,代码行数:10,代码来源:documents.py


注:本文中的aleph.model.Document类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。