本文整理汇总了Python中aleph.model.Entity.by_foreign_id方法的典型用法代码示例。如果您正苦于以下问题:Python Entity.by_foreign_id方法的具体用法?Python Entity.by_foreign_id怎么用?Python Entity.by_foreign_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类aleph.model.Entity
的用法示例。
在下文中一共展示了Entity.by_foreign_id方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crawl
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl(self):
url = urljoin(self.host, '/ticket/all_closed/?format=json')
watchlist = Watchlist.by_foreign_id(url, {
'label': 'Investigative Dashboard Requests'
})
Permission.grant_foreign(watchlist, 'idashboard:occrp_staff',
True, False)
existing_entities = []
previous_terms = watchlist.terms
updated_terms = set()
db.session.flush()
for endpoint in ['all_closed', 'all_open']:
url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
data = self.session.get(url).json()
for req in data.get('paginator', {}).get('object_list'):
category = REQUEST_TYPES.get(req.get('ticket_type'))
if category is None:
continue
ent = Entity.by_foreign_id(str(req.get('id')), watchlist, {
'name': req.get('name'),
'category': category,
'data': req,
'selectors': [req.get('name')]
})
updated_terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
watchlist.delete_entities(spare=existing_entities)
terms = previous_terms.symmetric_difference(updated_terms)
self.emit_watchlist(watchlist, terms)
示例2: crawl_collection
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl_collection(self, collection):
if not len(collection.get('subjects', [])):
return
url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
watchlist = Watchlist.by_foreign_id(url, {
'label': collection.get('title')
})
res = requests.get('%s/permissions' % url, headers=self.HEADERS)
for perm in res.json().get('results', []):
Permission.grant_foreign(watchlist, perm.get('role'),
perm.get('read'), perm.get('write'))
log.info(" > Spindle collection: %s", watchlist.label)
res = requests.get('%s/entities' % url, headers=self.HEADERS)
previous_terms = watchlist.terms
updated_terms = set()
existing_entities = []
for entity in res.json().get('results', []):
if entity.get('name') is None:
continue
aliases = [on.get('alias') for on in entity.get('other_names', [])]
ent = Entity.by_foreign_id(entity.get('id'), watchlist, {
'name': entity.get('name'),
'category': SCHEMATA.get(entity.get('$schema'), OTHER),
'data': entity,
'selectors': aliases
})
updated_terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
watchlist.delete_entities(spare=existing_entities)
terms = previous_terms.symmetric_difference(updated_terms)
self.emit_watchlist(watchlist, terms)
示例3: crawl
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl(self):
url = urljoin(self.host, '/ticket/all_closed/?format=json')
collection = Collection.by_foreign_id(url, {
'label': 'Investigative Dashboard Requests'
})
Permission.grant_foreign(collection, 'idashboard:occrp_staff',
True, False)
existing_entities = []
terms = set()
db.session.flush()
for endpoint in ['all_closed', 'all_open']:
url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
data = self.session.get(url).json()
for req in data.get('paginator', {}).get('object_list'):
category = REQUEST_TYPES.get(req.get('ticket_type'))
if category is None:
continue
ent = Entity.by_foreign_id(str(req.get('id')), collection, {
'name': req.get('name'),
'category': category,
'data': req,
'selectors': [req.get('name')]
})
terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
for entity in collection.entities:
if entity.id not in existing_entities:
entity.delete()
self.emit_collection(collection, terms)
示例4: crawl_collection
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl_collection(self, collection):
if not len(collection.get('subjects', [])):
return
url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
collection = Collection.by_foreign_id(url, {
'label': collection.get('title')
})
res = requests.get('%s/permissions' % url, headers=self.HEADERS)
for perm in res.json().get('results', []):
Permission.grant_foreign(collection, perm.get('role'),
perm.get('read'), perm.get('write'))
log.info(" > Spindle collection: %s", collection.label)
res = requests.get('%s/entities' % url, headers=self.HEADERS)
terms = set()
existing_entities = []
for entity in res.json().get('results', []):
if entity.get('name') is None:
continue
aliases = [on.get('alias') for on in entity.get('other_names', [])]
ent = Entity.by_foreign_id(entity.get('id'), collection, {
'name': entity.get('name'),
'category': SCHEMATA.get(entity.get('$schema'), OTHER),
'data': entity,
'selectors': aliases
})
terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
for entity in collection.entities:
if entity.id not in existing_entities:
entity.delete()
self.emit_collection(collection, terms)
示例5: crawl_source
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl_source(self, source):
if source.get('source_id') in IGNORE_SOURCES:
return
json_file = source.get('data', {}).get('json')
url = urljoin(JSON_PATH, json_file)
source_name = source.get('source') or source.get('source_id')
label = '%s - %s' % (source.get('publisher'), source_name)
collection = Collection.by_foreign_id(url, {
'label': label
})
Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
log.info(" > OpenNames collection: %s", collection.label)
terms = set()
existing_entities = []
db.session.flush()
entities = requests.get(url).json().get('entities', [])
for entity in entities:
if entity.get('name') is None:
continue
selectors = []
for on in entity.get('other_names', []):
selectors.append(on.get('other_name'))
for iden in entity.get('identities', []):
if iden.get('number'):
selectors.append(iden.get('number'))
ent = Entity.by_foreign_id(entity.get('uid'), collection, {
'name': entity.get('name'),
'category': CATEGORIES.get(entity.get('type'), OTHER),
'data': entity,
'selectors': selectors
})
terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
for entity in collection.entities:
if entity.id not in existing_entities:
entity.delete()
self.emit_collection(collection, terms)
示例6: crawl_source
# 需要导入模块: from aleph.model import Entity [as 别名]
# 或者: from aleph.model.Entity import by_foreign_id [as 别名]
def crawl_source(self, source):
if source.get('source_id') in IGNORE_SOURCES:
return
json_file = source.get('data', {}).get('json')
url = urljoin(JSON_PATH, json_file)
watchlist = Watchlist.by_foreign_id(url, {
'label': source.get('source_id')
})
Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False)
log.info(" > OpenNames collection: %s", watchlist.label)
previous_terms = watchlist.terms
updated_terms = set()
existing_entities = []
db.session.flush()
entities = requests.get(url).json().get('entities', [])
for entity in entities:
if entity.get('name') is None:
continue
selectors = []
for on in entity.get('other_names', []):
selectors.append(on.get('other_name'))
for iden in entity.get('identities', []):
if iden.get('number'):
selectors.append(iden.get('number'))
ent = Entity.by_foreign_id(entity.get('uid'), watchlist, {
'name': entity.get('name'),
'category': CATEGORIES.get(entity.get('type'), OTHER),
'data': entity,
'selectors': selectors
})
updated_terms.update(ent.terms)
existing_entities.append(ent.id)
log.info(" # %s (%s)", ent.name, ent.category)
watchlist.delete_entities(spare=existing_entities)
terms = previous_terms.symmetric_difference(updated_terms)
self.emit_watchlist(watchlist, terms)