本文整理汇总了Python中elasticsearch.helpers.streaming_bulk方法的典型用法代码示例。如果您正苦于以下问题:Python helpers.streaming_bulk方法的具体用法?Python helpers.streaming_bulk怎么用?Python helpers.streaming_bulk使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.helpers
的用法示例。
在下文中一共展示了helpers.streaming_bulk方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: index_model
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def index_model(index_name, adapter, timeout=None):
''' Indel all objects given a model'''
model = adapter.model
log.info('Indexing %s objects', model.__name__)
qs = model.objects
if hasattr(model.objects, 'visible'):
qs = qs.visible()
if adapter.exclude_fields:
qs = qs.exclude(*adapter.exclude_fields)
docs = iter_qs(qs, adapter)
docs = iter_for_index(docs, index_name)
for ok, info in streaming_bulk(es.client, docs, raise_on_error=False,
request_timeout=timeout):
if not ok:
log.error('Unable to index %s "%s": %s', model.__name__,
info['index']['_id'], info['index']['error'])
示例2: verbose_run
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def verbose_run(self, model, report_every=100):
name = model._meta.verbose_name
print('Indexing %s: ' % name, end='')
start = time.time()
cnt = 0
for _ in streaming_bulk(
self.es,
(m.to_search().to_dict(True) for m in model.objects.all().iterator()),
index=settings.ES_INDEX,
doc_type=name.lower(),
):
cnt += 1
if cnt % report_every:
print('.', end='')
sys.stdout.flush()
print('DONE\nIndexing %d %s in %.2f seconds'% (
cnt, name, time.time() - start
))
示例3: version_compatible_streaming_bulk
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def version_compatible_streaming_bulk(
es_client, docs, index, chunk_size, raise_on_error, doc_type
):
if is_es_version_7(es_client):
return streaming_bulk(
es_client,
docs,
index=index,
chunk_size=chunk_size,
raise_on_error=raise_on_error,
)
else:
return streaming_bulk(
es_client,
docs,
index=index,
doc_type=doc_type,
chunk_size=chunk_size,
raise_on_error=raise_on_error,
)
示例4: insert_multiple_documents
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def insert_multiple_documents(self, list_of_documents):
actions = [{"_source": document, "_index": self.index, "_type": self.doc_type} for document in list_of_documents]
for success, response in elastic_parallelbulk(client=self.es, actions=actions):
self.logger.info("BulkAPI response: {0}".format(str(response)))
if not success:
self.logger.error(str(response))
raise ValueError(str(response))
示例5: _insert_documents
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def _insert_documents(self, es, index, docs, count, verbose=False):
"""Inserta documentos dentro de un índice.
Args:
es (Elasticsearch): Cliente Elasticsearch.
index (str): Nombre de índice.
docs (Iterator[dict]): Iterator de documentos a insertar.
count (int): Cantidad de documentos a insertar.
verbose (bool): Mostrar más información en pantalla.
"""
operations = self._bulk_update_generator(docs, index)
creations, errors = 0, 0
logger.info('Insertando documentos...')
iterator = helpers.streaming_bulk(es, operations, raise_on_error=False,
request_timeout=ES_TIMEOUT)
if verbose:
iterator = tqdm.tqdm(iterator, total=count, file=sys.stderr)
for ok, response in iterator:
if ok and response['create']['result'] == 'created':
creations += 1
else:
errors += 1
identifier = response['create']['_id']
error = response['create']['error']
logger.warning(
'Error al procesar el documento ID {}:'.format(identifier))
logger.warning(json.dumps(error, indent=4, ensure_ascii=False))
logger.warning('')
logger.info('Resumen:')
logger.info(' + Documentos procesados: {}'.format(count))
logger.info(' + Documentos creados: {}'.format(creations))
logger.info(' + Errores: {}'.format(errors))
logger.info('')
示例6: load_repo
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def load_repo(client, path=None, index='git'):
"""
Parse a git repository with all it's commits and load it into elasticsearch
using `client`. If the index doesn't exist it will be created.
"""
path = dirname(dirname(abspath(__file__))) if path is None else path
repo_name = basename(path)
repo = git.Repo(path)
create_git_index(client, index)
# create the parent document in case it doesn't exist
client.create(
index=index,
doc_type='repos',
id=repo_name,
body={},
ignore=409 # 409 - conflict - would be returned if the document is already there
)
# we let the streaming bulk continuously process the commits as they come
# in - since the `parse_commits` function is a generator this will avoid
# loading all the commits into memory
for ok, result in streaming_bulk(
client,
parse_commits(repo.refs.master.commit, repo_name),
index=index,
doc_type='commits',
chunk_size=50 # keep the batch sizes small for appearances only
):
action, result = result.popitem()
doc_id = '/%s/commits/%s' % (index, result['_id'])
# process the information from ES whether the document has been
# successfully indexed
if not ok:
print('Failed to %s document %s: %r' % (action, doc_id, result))
else:
print(doc_id)
# we manually create es repo document and update elasticsearch-py to include metadata
示例7: index
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def index(self) -> bool:
if not self.get_available_fields().count():
self.task.info(self.task, "No hay series para indexar en este catálogo")
return False
index_ok = False
for success, info in streaming_bulk(self.elastic, self.generate_actions()):
if not success:
self.task.info(self.task, 'Error indexando: {}'.format(info))
else:
index_ok = True
return index_ok
示例8: handle_command
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def handle_command(self, doc, namespace, timestamp):
# Flush buffer before handle command
self.commit()
db = namespace.split(".", 1)[0]
if doc.get("dropDatabase"):
dbs = self.command_helper.map_db(db)
for _db in dbs:
self.elastic.indices.delete(index=_db.lower())
if doc.get("renameCollection"):
raise errors.OperationFailed(
"elastic_doc_manager does not support renaming a mapping."
)
if doc.get("create"):
db, coll = self.command_helper.map_collection(db, doc["create"])
if db and coll:
self.elastic.indices.put_mapping(
index=db.lower(), doc_type=coll, body={"_source": {"enabled": True}}
)
if doc.get("drop"):
db, coll = self.command_helper.map_collection(db, doc["drop"])
if db and coll:
# This will delete the items in coll, but not get rid of the
# mapping.
warnings.warn(
"Deleting all documents of type %s on index %s."
"The mapping definition will persist and must be"
"removed manually." % (coll, db)
)
responses = streaming_bulk(
self.elastic,
(
dict(result, _op_type="delete")
for result in scan(
self.elastic, index=db.lower(), doc_type=coll
)
),
)
for ok, resp in responses:
if not ok:
LOG.error(
"Error occurred while deleting ElasticSearch docum"
"ent during handling of 'drop' command: %r" % resp
)
示例9: bulk_upsert
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def bulk_upsert(self, docs, namespace, timestamp):
"""Insert multiple documents into Elasticsearch."""
def docs_to_upsert():
doc = None
for doc in docs:
# Remove metadata and redundant _id
index, doc_type = self._index_and_mapping(namespace)
doc_id = str(doc.pop("_id"))
document_action = {
"_index": index,
"_type": doc_type,
"_id": doc_id,
"_source": self._formatter.format_document(doc),
}
document_meta = {
"_index": self.meta_index_name,
"_type": self.meta_type,
"_id": doc_id,
"_source": {"ns": namespace, "_ts": timestamp},
}
yield document_action
yield document_meta
if doc is None:
raise errors.EmptyDocsError(
"Cannot upsert an empty sequence of "
"documents into Elastic Search"
)
try:
kw = {}
if self.chunk_size > 0:
kw["chunk_size"] = self.chunk_size
responses = streaming_bulk(
client=self.elastic, actions=docs_to_upsert(), **kw
)
for ok, resp in responses:
if not ok:
LOG.error(
"Could not bulk-upsert document "
"into ElasticSearch: %r" % resp
)
if self.auto_commit_interval == 0:
self.commit()
except errors.EmptyDocsError:
# This can happen when mongo-connector starts up, there is no
# config file, but nothing to dump
pass
示例10: bulk
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import streaming_bulk [as 别名]
def bulk(request, domain):
assert domain
try:
documents = json.loads(request.body.decode('utf-8'))['documents']
except KeyError:
return http.JsonResponse({'error': "Missing 'documents'"}, status=400)
def iterator():
for document in documents:
url = document.get('url', '').strip()
if not url:
continue
title = document.get('title', '').strip()
if not title:
continue
yield TitleDoc(
meta={'id': make_id(domain.name, url)},
**{
'domain': domain.name,
'url': url,
'title': title,
'group': document.get('group', '').strip(),
'popularity': float(document.get('popularity', 0.0)),
}
).to_dict(True)
count = failures = 0
t0 = time.time()
for success, doc in streaming_bulk(
connections.get_connection(),
iterator(),
index=settings.ES_INDEX,
doc_type='title_doc',
):
if not success:
print("NOT SUCCESS!", doc)
failures += 1
count += 1
t1 = time.time()
return http.JsonResponse({
'message': 'OK',
'count': count,
'failures': failures,
'took': t1 - t0,
}, status=201)