本文整理匯總了Python中elasticsearch.helpers.bulk方法的典型用法代碼示例。如果您正苦於以下問題:Python helpers.bulk方法的具體用法?Python helpers.bulk怎麽用?Python helpers.bulk使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類elasticsearch.helpers
的用法示例。
在下文中一共展示了helpers.bulk方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: push
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def push(self):
"""Push built documents to ElasticSearch."""
self._refresh_connection()
self.create_mapping()
if not self.push_queue:
logger.debug("No documents to push, skipping push.")
return
logger.debug(
"Found %s documents to push to Elasticsearch.", len(self.push_queue)
)
bulk(
connections.get_connection(),
(doc.to_dict(True) for doc in self.push_queue),
refresh=True,
)
self.push_queue = []
logger.debug("Finished pushing builded documents to Elasticsearch server.")
示例2: run
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def run(self):
with self.input()['Emotion'].open('r') as fopen:
emotions = json.load(fopen)
es = Elasticsearch()
for i in range(0, len(emotions), self.batch_size):
batch = emotions[i : min(i + self.batch_size, len(emotions))]
actions = [
{
'_index': self.index,
'_type': 'text',
'_id': '%d-%s' % (i + j, self.summary),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例3: pull_to_elastic
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def pull_to_elastic(**kwargs):
ti = kwargs['ti']
sentiments = ti.xcom_pull(task_ids = 'push_sentiment', key = 'sentiment')
es = Elasticsearch()
for i in range(0, len(sentiments), batch_size):
batch = sentiments[i : min(i + batch_size, len(sentiments))]
actions = [
{
'_index': 'test_index',
'_type': 'text',
'_id': '%d-text' % (j + i),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例4: add_remove_outlier_bulk_action
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def add_remove_outlier_bulk_action(self, document):
"""
Creates the bulk action to remove all the outlier traces from all events.
Removing an outlier means that the "outlier" tag is removed, as well as the "outlier" dictionary in the event.
:param document: the document from which the outlier information should be removed
"""
action = {
'_op_type': 'update',
'_index': document["_index"],
'_type': document["_type"],
'_id': document["_id"],
'retry_on_conflict': 10,
'_source': {
"script": {
"source": "ctx._source.remove(\"outliers\"); " +
"if (ctx._source.tags != null && ctx._source.tags.indexOf(\"outlier\") > -1) { " +
"ctx._source.tags.remove(ctx._source.tags.indexOf(\"outlier\")); " +
"}",
"lang": "painless"
}
}
}
self.add_bulk_action(action)
示例5: save_outlier
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def save_outlier(self, outlier=None, extract_derived_fields=False):
"""
Complete (with derived fields) and save outlier to Elasticsearch (via bulk action)
:param outlier: the outlier that need to be save
:param extract_derived_fields: True to save derived fields
"""
if extract_derived_fields:
# add the derived fields as outlier observations
derived_fields = self.extract_derived_fields(outlier.doc["_source"])
for derived_field, derived_value in derived_fields.items():
outlier.outlier_dict["derived_" + derived_field] = derived_value
# delete temporary derived fields
del outlier.doc["_source"][derived_field]
doc = add_outlier_to_document(outlier)
self.add_update_bulk_action(doc)
示例6: _do_bulk_operation
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def _do_bulk_operation(self):
if self._to_be_request_queue.qsize() > 0:
t_start = time.time()
count = 0
request_list = []
for _ in range(self._threshold):
try:
request = self._to_be_request_queue.get_nowait()
count += 1
request_list.append(request)
except Empty:
pass
break
if request_list:
# self.base_object.bulk_write(request_list, ordered=False)
helpers.bulk(self.base_object, request_list)
if self._is_print_log:
self.logger.info(f'【{self.base_object}】 批量插入的任務數量是 {count} 消耗的時間是 {round(time.time() - t_start, 6)}')
self._current_time = time.time()
示例7: _do_bulk_op
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def _do_bulk_op(self):
while 1:
try:
if self._task_queue.qsize() > 10000:
very_nb_print('防止意外日誌積累太多了,不插入es了。')
self.__clear_bulk_task()
return
# noinspection PyUnresolvedReferences
tasks = list(self._task_queue.queue)
self.__clear_bulk_task()
helpers.bulk(self._es_client, tasks)
self._last_es_op_time = time.time()
except Exception as e:
very_nb_print(e)
finally:
time.sleep(1)
示例8: genAddToES
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def genAddToES(self, msgs, component):
def menuAddToES(e):
progress = ProgressMonitor(component, "Feeding ElasticSearch", "", 0, len(msgs))
i = 0
docs = list()
for msg in msgs:
if not Burp_onlyResponses or msg.getResponse():
docs.append(self.genESDoc(msg, timeStampFromResponse=True).to_dict(True))
i += 1
progress.setProgress(i)
success, failed = bulk(self.es, docs, True, raise_on_error=False)
progress.close()
JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Successful imported %d messages, %d messages failed.</p></html>" % (success, failed), "Finished", JOptionPane.INFORMATION_MESSAGE)
return menuAddToES
### Interface to ElasticSearch ###
示例9: _bulk_body
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def _bulk_body(documents_actions, request):
kwargs = {
'client': ES.api,
'actions': documents_actions,
}
if request is None:
query_params = {}
else:
query_params = request.params.mixed()
query_params = dictset(query_params)
refresh_enabled = ES.settings.asbool('enable_refresh_query')
if '_refresh_index' in query_params and refresh_enabled:
kwargs['refresh'] = query_params.asbool('_refresh_index')
executed_num, errors = helpers.bulk(**kwargs)
log.info('Successfully executed {} Elasticsearch action(s)'.format(
executed_num))
if errors:
raise Exception('Errors happened when executing Elasticsearch '
'actions'.format('; '.join(errors)))
示例10: process_chunks
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def process_chunks(self, documents, operation):
""" Apply `operation` to chunks of `documents` of size
`self.chunk_size`.
"""
chunk_size = self.chunk_size
start = end = 0
count = len(documents)
while count:
if count < chunk_size:
chunk_size = count
end += chunk_size
bulk = documents[start:end]
operation(documents_actions=bulk)
start += chunk_size
count -= chunk_size
示例11: bulk_index_relations
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def bulk_index_relations(cls, items, request=None, **kwargs):
""" Index objects related to :items: in bulk.
Related items are first grouped in map
{model_name: {item1, item2, ...}} and then indexed.
:param items: Sequence of DB objects related objects if which
should be indexed.
:param request: Pyramid Request instance.
"""
index_map = defaultdict(set)
for item in items:
relations = item.get_related_documents(**kwargs)
for model_cls, related_items in relations:
indexable = getattr(model_cls, '_index_enabled', False)
if indexable and related_items:
index_map[model_cls.__name__].update(related_items)
for model_name, instances in index_map.items():
cls(model_name).index(to_dicts(instances), request=request)
示例12: writeDataToIndex
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def writeDataToIndex(es, es_index, entries, es_doc_type='_doc'):
es_entries = []
for doc in entries:
entry = {"_index": es_index,
"_type": es_doc_type,
"_source": doc }
es_entries.append(entry)
try:
helpers.bulk(es, es_entries, refresh=True, request_timeout=60)
except Exception as e:
# This can happen if the server is restarted or the connection becomes unavilable
print(str(e))
# ------------------- Bluetooth routines ------------------------------------
示例13: send_buffered_operations
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def send_buffered_operations(self):
"""Send buffered operations to Elasticsearch.
This method is periodically called by the AutoCommitThread.
"""
with self.lock:
try:
action_buffer = self.BulkBuffer.get_buffer()
if action_buffer:
successes, errors = bulk(self.elastic, action_buffer)
LOG.debug(
"Bulk request finished, successfully sent %d " "operations",
successes,
)
if errors:
LOG.error("Bulk request finished with errors: %r", errors)
except es_exceptions.ElasticsearchException:
LOG.exception("Bulk request failed with exception")
示例14: __init__
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def __init__(self, docman):
# Parent object
self.docman = docman
# Action buffer for bulk indexing
self.action_buffer = []
# Docs to update
# Dict stores all documents for which firstly
# source has to be retrieved from Elasticsearch
# and then apply_update needs to be performed
# Format: [ (doc, update_spec, action_buffer_index, get_from_ES) ]
self.doc_to_update = []
# Below dictionary contains ids of documents
# which need to be retrieved from Elasticsearch
# It prevents from getting same document multiple times from ES
# Format: {"_index": {"_type": {"_id": True}}}
self.doc_to_get = {}
# Dictionary of sources
# Format: {"_index": {"_type": {"_id": {"_source": actual_source}}}}
self.sources = {}
示例15: should_get_id
# 需要導入模塊: from elasticsearch import helpers [as 別名]
# 或者: from elasticsearch.helpers import bulk [as 別名]
def should_get_id(self, action):
"""
Mark document to retrieve its source from Elasticsearch.
Returns:
True - if marking document for the first time in this bulk
False - if document has been already marked
"""
mapping_ids = self.doc_to_get.setdefault(action["_index"], {}).setdefault(
action["_type"], set()
)
if action["_id"] in mapping_ids:
# There is an update on this id already
return False
else:
mapping_ids.add(action["_id"])
return True