本文整理汇总了Python中elasticsearch.helpers.bulk方法的典型用法代码示例。如果您正苦于以下问题:Python helpers.bulk方法的具体用法?Python helpers.bulk怎么用?Python helpers.bulk使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.helpers
的用法示例。
在下文中一共展示了helpers.bulk方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: push
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def push(self):
"""Push built documents to ElasticSearch."""
self._refresh_connection()
self.create_mapping()
if not self.push_queue:
logger.debug("No documents to push, skipping push.")
return
logger.debug(
"Found %s documents to push to Elasticsearch.", len(self.push_queue)
)
bulk(
connections.get_connection(),
(doc.to_dict(True) for doc in self.push_queue),
refresh=True,
)
self.push_queue = []
logger.debug("Finished pushing builded documents to Elasticsearch server.")
示例2: run
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def run(self):
with self.input()['Emotion'].open('r') as fopen:
emotions = json.load(fopen)
es = Elasticsearch()
for i in range(0, len(emotions), self.batch_size):
batch = emotions[i : min(i + self.batch_size, len(emotions))]
actions = [
{
'_index': self.index,
'_type': 'text',
'_id': '%d-%s' % (i + j, self.summary),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例3: pull_to_elastic
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def pull_to_elastic(**kwargs):
ti = kwargs['ti']
sentiments = ti.xcom_pull(task_ids = 'push_sentiment', key = 'sentiment')
es = Elasticsearch()
for i in range(0, len(sentiments), batch_size):
batch = sentiments[i : min(i + batch_size, len(sentiments))]
actions = [
{
'_index': 'test_index',
'_type': 'text',
'_id': '%d-text' % (j + i),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例4: add_remove_outlier_bulk_action
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def add_remove_outlier_bulk_action(self, document):
"""
Creates the bulk action to remove all the outlier traces from all events.
Removing an outlier means that the "outlier" tag is removed, as well as the "outlier" dictionary in the event.
:param document: the document from which the outlier information should be removed
"""
action = {
'_op_type': 'update',
'_index': document["_index"],
'_type': document["_type"],
'_id': document["_id"],
'retry_on_conflict': 10,
'_source': {
"script": {
"source": "ctx._source.remove(\"outliers\"); " +
"if (ctx._source.tags != null && ctx._source.tags.indexOf(\"outlier\") > -1) { " +
"ctx._source.tags.remove(ctx._source.tags.indexOf(\"outlier\")); " +
"}",
"lang": "painless"
}
}
}
self.add_bulk_action(action)
示例5: save_outlier
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def save_outlier(self, outlier=None, extract_derived_fields=False):
"""
Complete (with derived fields) and save outlier to Elasticsearch (via bulk action)
:param outlier: the outlier that need to be save
:param extract_derived_fields: True to save derived fields
"""
if extract_derived_fields:
# add the derived fields as outlier observations
derived_fields = self.extract_derived_fields(outlier.doc["_source"])
for derived_field, derived_value in derived_fields.items():
outlier.outlier_dict["derived_" + derived_field] = derived_value
# delete temporary derived fields
del outlier.doc["_source"][derived_field]
doc = add_outlier_to_document(outlier)
self.add_update_bulk_action(doc)
示例6: _do_bulk_operation
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _do_bulk_operation(self):
if self._to_be_request_queue.qsize() > 0:
t_start = time.time()
count = 0
request_list = []
for _ in range(self._threshold):
try:
request = self._to_be_request_queue.get_nowait()
count += 1
request_list.append(request)
except Empty:
pass
break
if request_list:
# self.base_object.bulk_write(request_list, ordered=False)
helpers.bulk(self.base_object, request_list)
if self._is_print_log:
self.logger.info(f'【{self.base_object}】 批量插入的任务数量是 {count} 消耗的时间是 {round(time.time() - t_start, 6)}')
self._current_time = time.time()
示例7: _do_bulk_op
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _do_bulk_op(self):
while 1:
try:
if self._task_queue.qsize() > 10000:
very_nb_print('防止意外日志积累太多了,不插入es了。')
self.__clear_bulk_task()
return
# noinspection PyUnresolvedReferences
tasks = list(self._task_queue.queue)
self.__clear_bulk_task()
helpers.bulk(self._es_client, tasks)
self._last_es_op_time = time.time()
except Exception as e:
very_nb_print(e)
finally:
time.sleep(1)
示例8: genAddToES
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def genAddToES(self, msgs, component):
def menuAddToES(e):
progress = ProgressMonitor(component, "Feeding ElasticSearch", "", 0, len(msgs))
i = 0
docs = list()
for msg in msgs:
if not Burp_onlyResponses or msg.getResponse():
docs.append(self.genESDoc(msg, timeStampFromResponse=True).to_dict(True))
i += 1
progress.setProgress(i)
success, failed = bulk(self.es, docs, True, raise_on_error=False)
progress.close()
JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Successful imported %d messages, %d messages failed.</p></html>" % (success, failed), "Finished", JOptionPane.INFORMATION_MESSAGE)
return menuAddToES
### Interface to ElasticSearch ###
示例9: _bulk_body
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _bulk_body(documents_actions, request):
kwargs = {
'client': ES.api,
'actions': documents_actions,
}
if request is None:
query_params = {}
else:
query_params = request.params.mixed()
query_params = dictset(query_params)
refresh_enabled = ES.settings.asbool('enable_refresh_query')
if '_refresh_index' in query_params and refresh_enabled:
kwargs['refresh'] = query_params.asbool('_refresh_index')
executed_num, errors = helpers.bulk(**kwargs)
log.info('Successfully executed {} Elasticsearch action(s)'.format(
executed_num))
if errors:
raise Exception('Errors happened when executing Elasticsearch '
'actions'.format('; '.join(errors)))
示例10: process_chunks
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def process_chunks(self, documents, operation):
""" Apply `operation` to chunks of `documents` of size
`self.chunk_size`.
"""
chunk_size = self.chunk_size
start = end = 0
count = len(documents)
while count:
if count < chunk_size:
chunk_size = count
end += chunk_size
bulk = documents[start:end]
operation(documents_actions=bulk)
start += chunk_size
count -= chunk_size
示例11: bulk_index_relations
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def bulk_index_relations(cls, items, request=None, **kwargs):
""" Index objects related to :items: in bulk.
Related items are first grouped in map
{model_name: {item1, item2, ...}} and then indexed.
:param items: Sequence of DB objects related objects if which
should be indexed.
:param request: Pyramid Request instance.
"""
index_map = defaultdict(set)
for item in items:
relations = item.get_related_documents(**kwargs)
for model_cls, related_items in relations:
indexable = getattr(model_cls, '_index_enabled', False)
if indexable and related_items:
index_map[model_cls.__name__].update(related_items)
for model_name, instances in index_map.items():
cls(model_name).index(to_dicts(instances), request=request)
示例12: writeDataToIndex
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def writeDataToIndex(es, es_index, entries, es_doc_type='_doc'):
es_entries = []
for doc in entries:
entry = {"_index": es_index,
"_type": es_doc_type,
"_source": doc }
es_entries.append(entry)
try:
helpers.bulk(es, es_entries, refresh=True, request_timeout=60)
except Exception as e:
# This can happen if the server is restarted or the connection becomes unavilable
print(str(e))
# ------------------- Bluetooth routines ------------------------------------
示例13: send_buffered_operations
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def send_buffered_operations(self):
"""Send buffered operations to Elasticsearch.
This method is periodically called by the AutoCommitThread.
"""
with self.lock:
try:
action_buffer = self.BulkBuffer.get_buffer()
if action_buffer:
successes, errors = bulk(self.elastic, action_buffer)
LOG.debug(
"Bulk request finished, successfully sent %d " "operations",
successes,
)
if errors:
LOG.error("Bulk request finished with errors: %r", errors)
except es_exceptions.ElasticsearchException:
LOG.exception("Bulk request failed with exception")
示例14: __init__
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def __init__(self, docman):
# Parent object
self.docman = docman
# Action buffer for bulk indexing
self.action_buffer = []
# Docs to update
# Dict stores all documents for which firstly
# source has to be retrieved from Elasticsearch
# and then apply_update needs to be performed
# Format: [ (doc, update_spec, action_buffer_index, get_from_ES) ]
self.doc_to_update = []
# Below dictionary contains ids of documents
# which need to be retrieved from Elasticsearch
# It prevents from getting same document multiple times from ES
# Format: {"_index": {"_type": {"_id": True}}}
self.doc_to_get = {}
# Dictionary of sources
# Format: {"_index": {"_type": {"_id": {"_source": actual_source}}}}
self.sources = {}
示例15: should_get_id
# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def should_get_id(self, action):
"""
Mark document to retrieve its source from Elasticsearch.
Returns:
True - if marking document for the first time in this bulk
False - if document has been already marked
"""
mapping_ids = self.doc_to_get.setdefault(action["_index"], {}).setdefault(
action["_type"], set()
)
if action["_id"] in mapping_ids:
# There is an update on this id already
return False
else:
mapping_ids.add(action["_id"])
return True