本文整理汇总了Python中arches.app.search.search_engine_factory.SearchEngineFactory.bulk_index方法的典型用法代码示例。如果您正苦于以下问题:Python SearchEngineFactory.bulk_index方法的具体用法?Python SearchEngineFactory.bulk_index怎么用?Python SearchEngineFactory.bulk_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类arches.app.search.search_engine_factory.SearchEngineFactory
的用法示例。
在下文中一共展示了SearchEngineFactory.bulk_index方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bulk_save
# 需要导入模块: from arches.app.search.search_engine_factory import SearchEngineFactory [as 别名]
# 或者: from arches.app.search.search_engine_factory.SearchEngineFactory import bulk_index [as 别名]
def bulk_save(resources):
"""
Saves and indexes a list of resources
Arguments:
resources -- a list of resource models
"""
se = SearchEngineFactory().create()
datatype_factory = DataTypeFactory()
node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}
tiles = []
documents = []
term_list = []
# flatten out the nested tiles into a single array
for resource in resources:
for parent_tile in resource.tiles:
for child_tile in parent_tile.tiles.itervalues():
if len(child_tile) > 0:
resource.tiles.extend(child_tile)
parent_tile.tiles = {}
tiles.extend(resource.tiles)
# need to save the models first before getting the documents for index
Resource.objects.bulk_create(resources)
TileModel.objects.bulk_create(tiles)
for resource in resources:
resource.save_edit(edit_type='create')
document, terms = resource.get_documents_to_index(fetchTiles=False, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
document['root_ontology_class'] = resource.get_root_ontology()
documents.append(se.create_bulk_item(index='resource', doc_type=document['graph_id'], id=document['resourceinstanceid'], data=document))
for term in terms:
term_list.append(se.create_bulk_item(index='strings', doc_type='term', id=term['_id'], data=term['_source']))
for tile in tiles:
tile.save_edit(edit_type='tile create', new_value=tile.data)
# bulk index the resources, tiles and terms
se.bulk_index(documents)
se.bulk_index(term_list)
示例2: index
# 需要导入模块: from arches.app.search.search_engine_factory import SearchEngineFactory [as 别名]
# 或者: from arches.app.search.search_engine_factory.SearchEngineFactory import bulk_index [as 别名]
def index(self, documents, index, type, idfield, processdoc=None, getid=None, bulk=False):
detail = ''
bulkitems = []
errorlist = []
se = SearchEngineFactory().create()
if not isinstance(documents, list):
documents = [documents]
for document in documents:
#print "inserting document: %s" % (document)
sys.stdout.write('.')
if processdoc == None:
data = document
else:
data = processdoc(document)
id = None
if getid != None:
id = getid(document, data)
try:
if bulk:
bulkitem = se.create_bulk_item(index, type, id, data)
bulkitems.append(bulkitem[0])
bulkitems.append(bulkitem[1])
else:
se.index_data(index, type, data, idfield=idfield, id=id)
except Exception as detail:
errorlist.append(id)
if bulk:
try:
se.bulk_index(index, type, bulkitems)
except Exception as detail:
errorlist = bulkitems
print 'bulk inset failed'
if detail != '':
print "\n\nException detail: %s " % (detail)
print "There was a problem indexing the following items:"
print errorlist
示例3: index
# 需要导入模块: from arches.app.search.search_engine_factory import SearchEngineFactory [as 别名]
# 或者: from arches.app.search.search_engine_factory.SearchEngineFactory import bulk_index [as 别名]
def index(documents, index, type, idfield, processdoc=None, getid=None, bulk=False):
print 'index_concepts.index'
detail = ''
bulkitems = []
errorlist = []
se = SearchEngineFactory().create()
if not isinstance(documents, list):
documents = [documents]
for document in documents:
sys.stdout.write('.')
if processdoc == None:
data = document
else:
data = processdoc(document)
id = None
if getid != None:
id = getid(document, data)
try:
if bulk:
bulkitem = se.create_bulk_item(index, type, id, data)
bulkitems.append(bulkitem[0])
bulkitems.append(bulkitem[1])
else:
se.index_data(index, type, data, idfield=idfield, id=id)
#se.index_data('concept_labels', '00000000-0000-0000-0000-000000000005', data, 'id')
for concept in data['labels']:
#se.index_term(concept['label'], concept['labelid'], '00000000-0000-0000-0000-000000000005', settings.PUBLISHED_LABEL, {'conceptid': data['conceptid']})
if concept['label'].strip(' \t\n\r') != '':
already_indexed = False
count = 1
ids = [id]
try:
_id = uuid.uuid3(uuid.NAMESPACE_DNS, '%s%s' % (hash(concept['label']), hash(data['conceptid'])))
result = se.es.get(index='term', doc_type='value', id=_id, ignore=404)
#print 'result: %s' % result
if result['found'] == True:
ids = result['_source']['ids']
if id not in ids:
ids.append(id)
else:
ids = [id]
if data['context'] != '00000000-0000-0000-0000-000000000003' and data['context'] != '00000000-0000-0000-0000-000000000004':
se.index_data('term', 'value', {'term': concept['label'], 'context': data['context'], 'ewstatus': settings.PUBLISHED_LABEL, 'options': {'conceptid': data['conceptid']}, 'count': len(ids), 'ids': ids}, id=_id)
except Exception as detail:
raise detail
except Exception as detail:
print detail
errorlist.append(id)
if bulk:
try:
se.bulk_index(index, type, bulkitems)
except Exception as detail:
errorlist = bulkitems
print 'bulk inset failed'
if detail != '':
print "\n\nException detail: %s " % (detail)
print "There was a problem indexing the following items:"
print errorlist