当前位置: 首页>>代码示例>>Python>>正文


Python Collection.by_foreign_id方法代码示例

本文整理汇总了Python中aleph.model.Collection.by_foreign_id方法的典型用法代码示例。如果您正苦于以下问题:Python Collection.by_foreign_id方法的具体用法?Python Collection.by_foreign_id怎么用?Python Collection.by_foreign_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在aleph.model.Collection的用法示例。


在下文中一共展示了Collection.by_foreign_id方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_collection

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
 def find_collection(self, foreign_id, data):
     collection = Collection.by_foreign_id(foreign_id, data)
     if not hasattr(self, 'entity_cache'):
         self.entity_cache = {}
     self.entity_cache[collection.id] = []
     db.session.flush()
     return collection
开发者ID:adamchainz,项目名称:aleph,代码行数:9,代码来源:crawler.py

示例2: load_collection

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
 def load_collection(self):
     if not hasattr(self, '_collection'):
         self._collection = Collection.by_foreign_id('polyglot:ner', {
             'label': 'Automatically Extracted Persons and Companies',
             'public': True
         })
     return self._collection
开发者ID:andkamau,项目名称:aleph,代码行数:9,代码来源:polyglot_entity.py

示例3: crawl

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            for req in data.get('paginator', {}).get('object_list'):
                category = REQUEST_TYPES.get(req.get('ticket_type'))
                if category is None:
                    continue
                ent = Entity.by_foreign_id(str(req.get('id')), collection, {
                    'name': req.get('name'),
                    'category': category,
                    'data': req,
                    'selectors': [req.get('name')]
                })
                terms.update(ent.terms)
                existing_entities.append(ent.id)
                log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
开发者ID:01-,项目名称:aleph,代码行数:33,代码来源:idashboard.py

示例4: crawl

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            print url
            continue

            for req in data.get('paginator', {}).get('object_list'):
                ent = self.update_entity(req, collection)
                if ent is not None:
                    terms.update(ent.terms)
                    existing_entities.append(ent.id)
                    log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
开发者ID:stefanw,项目名称:aleph,代码行数:29,代码来源:idashboard.py

示例5: crawl_collection

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), collection, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
开发者ID:01-,项目名称:aleph,代码行数:36,代码来源:spindle.py

示例6: load_collection

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
 def load_collection(self, data):
     foreign_id = data.get('foreign_id')
     collection = Collection.by_foreign_id(foreign_id)
     if collection is None:
         collection = Collection.create(data)
         db.session.commit()
         update_collection(collection)
     return collection
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:10,代码来源:crawler.py

示例7: analyze

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
def analyze(foreign_id=None):
    """Re-analyze documents in the given collection (or throughout)."""
    if foreign_id:
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            raise ValueError("No such collection: %r" % foreign_id)
        analyze_collection.delay(collection.id)
    else:
        for collection in Collection.all():
            analyze_collection.delay(collection.id)
开发者ID:rlugojr,项目名称:aleph,代码行数:12,代码来源:manage.py

示例8: crawl_collection

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            entity['$schema'] = SCHEMATA.get(entity.get('$schema'), OTHER)
            if 'jurisdiction_code' in entity:
                entity['jurisdiction_code'] = \
                    entity['jurisdiction_code'].lower()
            entity.pop('members', None)
            entity.pop('memberships', None)
            entity.pop('assets', None)
            entity.pop('owners', None)
            entity.pop('family_first', None)
            entity.pop('family_second', None)
            entity.pop('social_first', None)
            entity.pop('social_second', None)

            for date_field in ['birth_date']:
                if date_field in entity and 'T' in entity[date_field]:
                    entity[date_field], _ = entity[date_field].split('T', 1)

            for on in entity.get('other_names', []):
                name = on.pop('alias', None)
                if name is not None:
                    on['name'] = name

            entity['identifiers'] = [{
                'scheme': 'spindle',
                'identifier': entity.pop('id', None)
            }]
            ent = Entity.save(entity, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
开发者ID:stefanw,项目名称:aleph,代码行数:57,代码来源:spindle.py

示例9: index

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
def index(foreign_id=None):
    """Index documents in the given collection (or throughout)."""
    q = Document.all_ids()
    if foreign_id:
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            raise ValueError("No such collection: %r" % foreign_id)
        clause = Collection.id == collection.id
        q = q.filter(Document.collections.any(clause))
    for doc_id, in q:
        index_document_id.delay(doc_id)
    if foreign_id is None:
        reindex_entities()
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:15,代码来源:manage.py

示例10: test_crawler_execute

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def test_crawler_execute(self):
        tdc = TDocumentCrawler()
        ccnt = CrawlerState.all().count()
        assert ccnt == 0, ccnt
        tdc.execute()
        states = CrawlerState.all().all()
        assert len(states) == 2, len(states)
        demo = states[1]
        assert 'kitty' in demo.meta['title'], demo.meta
        assert 'demo.pdf' in demo.meta['source_path'], demo.meta

        coll = Collection.by_foreign_id('test')
        assert coll is not None, coll
        assert len(list(coll.documents)) == 1, list(coll.documents)
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:16,代码来源:test_crawler.py

示例11: crawl_source

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme': 'opennames:%s' % source.get('source_id'),
                    'identifier': entity.get('uid')
                }],
                'other_names': [],
                'name': entity.get('name'),
                '$schema': SCHEMA.get(entity.get('type'),
                                      '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)

            ent = Entity.save(data, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()

        self.emit_collection(collection, terms)
开发者ID:stefanw,项目名称:aleph,代码行数:45,代码来源:opennames.py

示例12: crawl_source

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))

            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))

            ent = Entity.by_foreign_id(entity.get('uid'), collection, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
开发者ID:01-,项目名称:aleph,代码行数:44,代码来源:opennames.py

示例13: test_load_csv

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def test_load_csv(self):
        count = Collection.all().count()
        assert 0 == count, count

        db_uri = 'file://' + self.get_fixture_path('experts.csv')
        os.environ['ALEPH_TEST_BULK_CSV'] = db_uri
        yml_path = self.get_fixture_path('experts.yml')
        config = load_config_file(yml_path)
        bulk_load(config)

        coll = Collection.by_foreign_id('experts')
        assert coll.category == 'scrape', coll.category

        _, headers = self.login(is_admin=True)
        count = Collection.all().count()
        assert 1 == count, count

        url = '/api/2/entities?filter:schemata=Thing&q=Greenfield'
        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
开发者ID:pudo,项目名称:aleph,代码行数:23,代码来源:test_bulk.py

示例14: test_load_sqlite

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
    def test_load_sqlite(self):
        count = Collection.all().count()
        assert 0 == count, count

        db_uri = 'sqlite:///' + self.get_fixture_path('kek.sqlite')
        os.environ['ALEPH_TEST_BULK_DATABASE_URI'] = db_uri
        yml_path = self.get_fixture_path('kek.yml')
        config = load_config_file(yml_path)
        bulk_load(config)

        count = Collection.all().count()
        assert 1 == count, count

        coll = Collection.by_foreign_id('kek')
        assert coll.category == 'scrape', coll.category

        _, headers = self.login(is_admin=True)
        url = '/api/2/entities?filter:schemata=Thing&q=friede+springer'
        res = self.client.get(url, headers=headers)
        assert res.status_code == 200, res
        assert res.json['total'] == 1, res.json
        res0 = res.json['results'][0]
        key = '9895ccc1b3d6444ccc6371ae239a7d55c748a714'
        assert res0['id'].startswith(key), res0
开发者ID:pudo,项目名称:aleph,代码行数:26,代码来源:test_bulk.py

示例15: flush

# 需要导入模块: from aleph.model import Collection [as 别名]
# 或者: from aleph.model.Collection import by_foreign_id [as 别名]
def flush(foreign_id):
    """Reset the crawler state for a given collecton."""
    collection = Collection.by_foreign_id(foreign_id)
    if collection is None:
        raise ValueError("No such collection: %r" % foreign_id)
    delete_collection(collection.id)
开发者ID:CodeForAfrica,项目名称:aleph,代码行数:8,代码来源:manage.py


注:本文中的aleph.model.Collection.by_foreign_id方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。