当前位置: 首页>>代码示例>>Python>>正文


Python helpers.bulk方法代码示例

本文整理汇总了Python中elasticsearch.helpers.bulk方法的典型用法代码示例。如果您正苦于以下问题:Python helpers.bulk方法的具体用法?Python helpers.bulk怎么用?Python helpers.bulk使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在elasticsearch.helpers的用法示例。


在下文中一共展示了helpers.bulk方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: push

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def push(self):
        """Push built documents to ElasticSearch."""
        self._refresh_connection()
        self.create_mapping()

        if not self.push_queue:
            logger.debug("No documents to push, skipping push.")
            return

        logger.debug(
            "Found %s documents to push to Elasticsearch.", len(self.push_queue)
        )

        bulk(
            connections.get_connection(),
            (doc.to_dict(True) for doc in self.push_queue),
            refresh=True,
        )
        self.push_queue = []

        logger.debug("Finished pushing builded documents to Elasticsearch server.") 
开发者ID:genialis,项目名称:resolwe,代码行数:23,代码来源:indices.py

示例2: run

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def run(self):
        with self.input()['Emotion'].open('r') as fopen:
            emotions = json.load(fopen)
        es = Elasticsearch()
        for i in range(0, len(emotions), self.batch_size):
            batch = emotions[i : min(i + self.batch_size, len(emotions))]
            actions = [
                {
                    '_index': self.index,
                    '_type': 'text',
                    '_id': '%d-%s' % (i + j, self.summary),
                    '_source': batch[j],
                }
                for j in range(len(batch))
            ]
            helpers.bulk(es, actions) 
开发者ID:huseinzol05,项目名称:Gather-Deployment,代码行数:18,代码来源:function.py

示例3: pull_to_elastic

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def pull_to_elastic(**kwargs):
    ti = kwargs['ti']
    sentiments = ti.xcom_pull(task_ids = 'push_sentiment', key = 'sentiment')
    es = Elasticsearch()
    for i in range(0, len(sentiments), batch_size):
        batch = sentiments[i : min(i + batch_size, len(sentiments))]
        actions = [
            {
                '_index': 'test_index',
                '_type': 'text',
                '_id': '%d-text' % (j + i),
                '_source': batch[j],
            }
            for j in range(len(batch))
        ]
        helpers.bulk(es, actions) 
开发者ID:huseinzol05,项目名称:Gather-Deployment,代码行数:18,代码来源:sentiment_to_elastic.py

示例4: add_remove_outlier_bulk_action

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def add_remove_outlier_bulk_action(self, document):
        """
        Creates the bulk action to remove all the outlier traces from all events.
        Removing an outlier means that the "outlier" tag is removed, as well as the "outlier" dictionary in the event.
        :param document: the document from which the outlier information should be removed
        """
        action = {
            '_op_type': 'update',
            '_index': document["_index"],
            '_type': document["_type"],
            '_id': document["_id"],
            'retry_on_conflict': 10,
            '_source': {
                "script": {
                    "source": "ctx._source.remove(\"outliers\"); " +
                              "if (ctx._source.tags != null && ctx._source.tags.indexOf(\"outlier\") > -1) { " +
                              "ctx._source.tags.remove(ctx._source.tags.indexOf(\"outlier\")); " +
                              "}",
                    "lang": "painless"
                }
            }
        }
        self.add_bulk_action(action) 
开发者ID:NVISO-BE,项目名称:ee-outliers,代码行数:25,代码来源:es.py

示例5: save_outlier

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def save_outlier(self, outlier=None, extract_derived_fields=False):
        """
        Complete (with derived fields) and save outlier to Elasticsearch (via bulk action)

        :param outlier: the outlier that need to be save
        :param extract_derived_fields: True to save derived fields
        """
        if extract_derived_fields:
            # add the derived fields as outlier observations
            derived_fields = self.extract_derived_fields(outlier.doc["_source"])
            for derived_field, derived_value in derived_fields.items():
                outlier.outlier_dict["derived_" + derived_field] = derived_value
                # delete temporary derived fields
                del outlier.doc["_source"][derived_field]

        doc = add_outlier_to_document(outlier)
        self.add_update_bulk_action(doc) 
开发者ID:NVISO-BE,项目名称:ee-outliers,代码行数:19,代码来源:es.py

示例6: _do_bulk_operation

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _do_bulk_operation(self):
        if self._to_be_request_queue.qsize() > 0:
            t_start = time.time()
            count = 0
            request_list = []
            for _ in range(self._threshold):
                try:
                    request = self._to_be_request_queue.get_nowait()
                    count += 1
                    request_list.append(request)
                except Empty:
                    pass
                    break
            if request_list:
                # self.base_object.bulk_write(request_list, ordered=False)
                helpers.bulk(self.base_object, request_list)
            if self._is_print_log:
                self.logger.info(f'【{self.base_object}】  批量插入的任务数量是 {count} 消耗的时间是 {round(time.time() - t_start, 6)}')
            self._current_time = time.time() 
开发者ID:ydf0509,项目名称:distributed_framework,代码行数:21,代码来源:bulk_operation.py

示例7: _do_bulk_op

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _do_bulk_op(self):
        while 1:
            try:
                if self._task_queue.qsize() > 10000:
                    very_nb_print('防止意外日志积累太多了,不插入es了。')
                    self.__clear_bulk_task()
                    return
                # noinspection PyUnresolvedReferences
                tasks = list(self._task_queue.queue)
                self.__clear_bulk_task()
                helpers.bulk(self._es_client, tasks)

                self._last_es_op_time = time.time()
            except Exception as e:
                very_nb_print(e)
            finally:
                time.sleep(1) 
开发者ID:ydf0509,项目名称:distributed_framework,代码行数:19,代码来源:log_manager000.py

示例8: genAddToES

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def genAddToES(self, msgs, component):
        def menuAddToES(e):
            progress = ProgressMonitor(component, "Feeding ElasticSearch", "", 0, len(msgs))
            i = 0
            docs = list()
            for msg in msgs:
                if not Burp_onlyResponses or msg.getResponse():
                    docs.append(self.genESDoc(msg, timeStampFromResponse=True).to_dict(True))
                i += 1
                progress.setProgress(i)
            success, failed = bulk(self.es, docs, True, raise_on_error=False)
            progress.close()
            JOptionPane.showMessageDialog(self.panel, "<html><p style='width: 300px'>Successful imported %d messages, %d messages failed.</p></html>" % (success, failed), "Finished", JOptionPane.INFORMATION_MESSAGE)
        return menuAddToES

    ### Interface to ElasticSearch ### 
开发者ID:thomaspatzke,项目名称:WASE,代码行数:18,代码来源:ElasticBurp.py

示例9: _bulk_body

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def _bulk_body(documents_actions, request):
    kwargs = {
        'client': ES.api,
        'actions': documents_actions,
    }

    if request is None:
        query_params = {}
    else:
        query_params = request.params.mixed()
    query_params = dictset(query_params)
    refresh_enabled = ES.settings.asbool('enable_refresh_query')
    if '_refresh_index' in query_params and refresh_enabled:
        kwargs['refresh'] = query_params.asbool('_refresh_index')

    executed_num, errors = helpers.bulk(**kwargs)
    log.info('Successfully executed {} Elasticsearch action(s)'.format(
        executed_num))
    if errors:
        raise Exception('Errors happened when executing Elasticsearch '
                        'actions'.format('; '.join(errors))) 
开发者ID:ramses-tech,项目名称:nefertari,代码行数:23,代码来源:elasticsearch.py

示例10: process_chunks

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def process_chunks(self, documents, operation):
        """ Apply `operation` to chunks of `documents` of size
        `self.chunk_size`.

        """
        chunk_size = self.chunk_size
        start = end = 0
        count = len(documents)

        while count:
            if count < chunk_size:
                chunk_size = count
            end += chunk_size

            bulk = documents[start:end]
            operation(documents_actions=bulk)

            start += chunk_size
            count -= chunk_size 
开发者ID:ramses-tech,项目名称:nefertari,代码行数:21,代码来源:elasticsearch.py

示例11: bulk_index_relations

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def bulk_index_relations(cls, items, request=None, **kwargs):
        """ Index objects related to :items: in bulk.

        Related items are first grouped in map
        {model_name: {item1, item2, ...}} and then indexed.

        :param items: Sequence of DB objects related objects if which
            should be indexed.
        :param request: Pyramid Request instance.
        """
        index_map = defaultdict(set)
        for item in items:
            relations = item.get_related_documents(**kwargs)
            for model_cls, related_items in relations:
                indexable = getattr(model_cls, '_index_enabled', False)
                if indexable and related_items:
                    index_map[model_cls.__name__].update(related_items)

        for model_name, instances in index_map.items():
            cls(model_name).index(to_dicts(instances), request=request) 
开发者ID:ramses-tech,项目名称:nefertari,代码行数:22,代码来源:elasticsearch.py

示例12: writeDataToIndex

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def writeDataToIndex(es,  es_index, entries, es_doc_type='_doc'):
    es_entries = []
    for doc in entries:
        entry = {"_index": es_index,
                 "_type": es_doc_type, 
                 "_source": doc }

        es_entries.append(entry)    

    try:
        helpers.bulk(es, es_entries, refresh=True, request_timeout=60) 
    except Exception as e:
        # This can happen if the server is restarted or the connection becomes unavilable
        print(str(e))

# ------------------- Bluetooth routines ------------------------------------ 
开发者ID:ghostop14,项目名称:sparrow-wifi,代码行数:18,代码来源:sparrow-elastic.py

示例13: send_buffered_operations

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def send_buffered_operations(self):
        """Send buffered operations to Elasticsearch.

        This method is periodically called by the AutoCommitThread.
        """
        with self.lock:
            try:
                action_buffer = self.BulkBuffer.get_buffer()
                if action_buffer:
                    successes, errors = bulk(self.elastic, action_buffer)
                    LOG.debug(
                        "Bulk request finished, successfully sent %d " "operations",
                        successes,
                    )
                    if errors:
                        LOG.error("Bulk request finished with errors: %r", errors)
            except es_exceptions.ElasticsearchException:
                LOG.exception("Bulk request failed with exception") 
开发者ID:yougov,项目名称:elastic2-doc-manager,代码行数:20,代码来源:elastic2_doc_manager.py

示例14: __init__

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def __init__(self, docman):

        # Parent object
        self.docman = docman

        # Action buffer for bulk indexing
        self.action_buffer = []

        # Docs to update
        # Dict stores all documents for which firstly
        # source has to be retrieved from Elasticsearch
        # and then apply_update needs to be performed
        # Format: [ (doc, update_spec, action_buffer_index, get_from_ES) ]
        self.doc_to_update = []

        # Below dictionary contains ids of documents
        # which need to be retrieved from Elasticsearch
        # It prevents from getting same document multiple times from ES
        # Format: {"_index": {"_type": {"_id": True}}}
        self.doc_to_get = {}

        # Dictionary of sources
        # Format: {"_index": {"_type": {"_id": {"_source": actual_source}}}}
        self.sources = {} 
开发者ID:yougov,项目名称:elastic2-doc-manager,代码行数:26,代码来源:elastic2_doc_manager.py

示例15: should_get_id

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import bulk [as 别名]
def should_get_id(self, action):
        """
        Mark document to retrieve its source from Elasticsearch.
        Returns:
            True - if marking document for the first time in this bulk
            False - if document has been already marked
        """
        mapping_ids = self.doc_to_get.setdefault(action["_index"], {}).setdefault(
            action["_type"], set()
        )
        if action["_id"] in mapping_ids:
            # There is an update on this id already
            return False
        else:
            mapping_ids.add(action["_id"])
            return True 
开发者ID:yougov,项目名称:elastic2-doc-manager,代码行数:18,代码来源:elastic2_doc_manager.py


注:本文中的elasticsearch.helpers.bulk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。