本文整理汇总了Python中elasticsearch.helpers.parallel_bulk方法的典型用法代码示例。如果您正苦于以下问题:Python helpers.parallel_bulk方法的具体用法?Python helpers.parallel_bulk怎么用?Python helpers.parallel_bulk使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.helpers
的用法示例。
在下文中一共展示了helpers.parallel_bulk方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bulk_update
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def bulk_update(cls, items, chunk_size=5000, op_type="update", **kwargs):
index = cls._index._name
_type = cls._doc_type.name
obj = [
{
"_op_type": op_type,
"_id": f"{doc.id}",
"_index": index,
"_type": _type,
"_source": get_item_data(doc),
}
for doc in items
]
client = cls.get_es()
rs = list(parallel_bulk(client, obj, chunk_size=chunk_size, **kwargs))
return rs
示例2: run
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def run(self, distribution):
actions = self.generate_es_actions(distribution)
if not actions:
return
for success, info in parallel_bulk(self.elastic, actions):
if not success:
logger.warning(strings.BULK_REQUEST_ERROR, info)
self.update_distribution_indexation_metadata(distribution)
示例3: index_data
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def index_data(self):
"""Indexa la data leía desde el archivo de datos"""
with open(DATA_FILE_PATH) as f:
self.elastic.indices.create(settings.TS_INDEX,
body=INDEX_CREATION_BODY)
actions = [json.loads(row) for row in f.readlines()]
for success, info in parallel_bulk(self.elastic, actions):
if not success:
print("ERROR:", info)
segments = FORCE_MERGE_SEGMENTS
self.elastic.indices.forcemerge(index=settings.TS_INDEX,
max_num_segments=segments)
示例4: index
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def index(self, queryset: QuerySet):
self._init_index()
for success, info in parallel_bulk(self.es_connection, generate_es_query(queryset)):
if not success:
raise RuntimeError(f"Error indexando query a ES: {info}")
示例5: processFiles
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def processFiles(f):
# list for bulk documents
documents = []
for log_line in f:
# Create the body and sanitize
source = {"message": log_line.strip('\n') }
body = {"_index": options.index_name, "_type": options.index_name, "pipeline": options.index_name, "_source": source }
# append record to list before bulk send to ES
documents.append(body)
options.totalDocCount +=1
if len(documents) >= options.bulk_limit:
# bulk send all our entries
status = helpers.parallel_bulk(es, documents)
# look through each result for status
for i in status:
if i[0] == False:
print "There was an error importing a record. Error: ", i[1]
# Using this to have the doc count stay on one line and continually be updated
sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")
sys.stdout.flush()
# now clean out the document list
documents[:] = []
# If we've made it here, then the file ended, and it's possible we still have documents in documents list. Need to send what we have
if len(documents) > 0:
# bulk send all our entries
status = helpers.parallel_bulk(es, documents)
# look through each result for status
for i in status:
if i[0] == False:
print "There was an error importing a record. Error: ", i[1]
# Using this to have the doc count stay on one line and continually be updated
sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")
sys.stdout.flush()
# now clean out the document list
documents[:] = []
# print the final doc count before moving out of the function
sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")
示例6: bulk_import
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def bulk_import(**kwargs) -> Tuple[int, int, int]:
"""Bulk import data to elasticsearch.
Tracks bulk import response metrics, reporting both externally to
prometheus and to the caller.
"""
log.info('Starting bulk import: {}'.format(str(kwargs)))
good, missing, errors = 0, 0, 0
for ok, result in parallel_bulk(raise_on_exception=False, raise_on_error=False, **kwargs):
action, result = result.popitem()
status_code = result.get('status', 500)
if ok:
good += 1
try:
Metric.ACTION_RESULTS[result['result']].inc()
except KeyError:
Metric.OK_UNKNOWN.inc()
elif status_code == 'TIMEOUT':
Metric.TIMEOUT.inc()
errors += 1
elif not isinstance(status_code, int):
# Previously found TIMEOUT status_code here
Metric.FAILED.inc()
log.warning(
'Failed bulk %s request with invalid status_code %s: %s',
action, str(status_code), str(result)[:1024])
errors += 1
elif status_code == 404:
# 404 are quite common so we log them separately. The analytics
# side doesn't know the namespace mappings and attempts to send all
# updates to <wiki>_content, letting the docs that don't exist fail
missing += 1
Metric.MISSING.inc()
elif status_code >= 400 and status_code < 500:
# Bulk contained invalid records, can't do much beyond logging
Metric.FAILED.inc()
log.warning('Failed bulk %s request: %s', action, str(result)[:1024])
errors += 1
elif status_code >= 500 and status_code < 600:
# primary not available, etc. Internal elasticsearch errors. Should be retryable
raise Exception(
"Internal elasticsearch error on {}, status code {}: {}".format(action, status_code, str(result)))
else:
raise Exception(
"Unexpected response on {}, status code {}: {}".format(action, status_code, str(result)))
log.info('Completed import with %d success %d missing and %d errors', good, missing, errors)
return good, missing, errors
示例7: flush_cache
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def flush_cache(self):
if len(self.cache) == 0:
return True
retry = 2
for i in range(retry):
try:
to_upload = helpers.parallel_bulk(
self.es, self.cache_insertable_iterable())
counter = 0
num_items = len(self.cache)
for item in to_upload:
self.logger.debug(
"{} of {} Elastic objects uploaded".format(
num_items, counter))
counter = counter + 1
output = "Pushed {} items to Elasticsearch to index {}".format(
num_items, self.index)
output += " and browbeat UUID {}".format(str(browbeat_uuid))
self.logger.info(output)
self.cache = deque()
self.last_upload = datetime.datetime.utcnow()
return True
except Exception as Err:
self.logger.error(
"Error pushing data to Elasticsearch, going to retry"
" in 10 seconds")
self.logger.error("Exception: {}".format(Err))
time.sleep(10)
if i == (retry - 1):
self.logger.error(
"Pushing Data to Elasticsearch failed in spite of retry,"
" dumping JSON for {} cached items".format(
len(
self.cache)))
for item in self.cache:
filename = item['test_name'] + '-' + item['identifier']
filename += '-elastic' + '.' + 'json'
elastic_file = os.path.join(item['result_dir'],
filename)
with open(elastic_file, 'w') as result_file:
json.dump(item['result'],
result_file,
indent=4,
sort_keys=True)
self.logger.info(
"Saved Elasticsearch consumable result JSON to {}". format(
elastic_file))
self.cache = deque()
self.last_upload = datetime.datetime.utcnow()
return False