Python helpers.parallel_bulk方法代码示例

本文整理汇总了Python中elasticsearch.helpers.parallel_bulk方法的典型用法代码示例。如果您正苦于以下问题：Python helpers.parallel_bulk方法的具体用法？Python helpers.parallel_bulk怎么用？Python helpers.parallel_bulk使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.helpers的用法示例。

在下文中一共展示了helpers.parallel_bulk方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: bulk_update

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def bulk_update(cls, items, chunk_size=5000, op_type="update", **kwargs):
        index = cls._index._name
        _type = cls._doc_type.name
        obj = [
            {
                "_op_type": op_type,
                "_id": f"{doc.id}",
                "_index": index,
                "_type": _type,
                "_source": get_item_data(doc),
            }
            for doc in items
        ]
        client = cls.get_es()
        rs = list(parallel_bulk(client, obj, chunk_size=chunk_size, **kwargs))
        return rs

开发者ID:hjlarry，项目名称:flask-shop，代码行数:18，代码来源:search.py

示例2: run

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def run(self, distribution):
        actions = self.generate_es_actions(distribution)

        if not actions:
            return

        for success, info in parallel_bulk(self.elastic, actions):
            if not success:
                logger.warning(strings.BULK_REQUEST_ERROR, info)

        self.update_distribution_indexation_metadata(distribution)

开发者ID:datosgobar，项目名称:series-tiempo-ar-api，代码行数:13，代码来源:distribution_indexer.py

示例3: index_data

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def index_data(self):
        """Indexa la data leía desde el archivo de datos"""
        with open(DATA_FILE_PATH) as f:
            self.elastic.indices.create(settings.TS_INDEX,
                                        body=INDEX_CREATION_BODY)

            actions = [json.loads(row) for row in f.readlines()]
            for success, info in parallel_bulk(self.elastic, actions):
                if not success:
                    print("ERROR:", info)

            segments = FORCE_MERGE_SEGMENTS
            self.elastic.indices.forcemerge(index=settings.TS_INDEX,
                                            max_num_segments=segments)

开发者ID:datosgobar，项目名称:series-tiempo-ar-api，代码行数:16，代码来源:generate_data.py

示例4: index

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def index(self, queryset: QuerySet):
        self._init_index()

        for success, info in parallel_bulk(self.es_connection, generate_es_query(queryset)):
            if not success:
                raise RuntimeError(f"Error indexando query a ES: {info}")

开发者ID:datosgobar，项目名称:series-tiempo-ar-api，代码行数:8，代码来源:index.py

示例5: processFiles

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def processFiles(f):
    # list for bulk documents
    documents = []

    for log_line in f:
        # Create the body and sanitize
        source = {"message": log_line.strip('\n') }
        body = {"_index": options.index_name, "_type": options.index_name, "pipeline": options.index_name, "_source": source }

        # append record to list before bulk send to ES
        documents.append(body)
        options.totalDocCount +=1

        if len(documents) >= options.bulk_limit:
            # bulk send all our entries
            status = helpers.parallel_bulk(es, documents)

            # look through each result for status
            for i in status:
                if i[0] == False:
                    print "There was an error importing a record.  Error: ", i[1]

            # Using this to have the doc count stay on one line and continually be updated
            sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")
            sys.stdout.flush()

            # now clean out the document list
            documents[:] = []

    # If we've made it here, then the file ended, and it's possible we still have documents in documents list.  Need to send what we have
    if len(documents) > 0:
        # bulk send all our entries
        status = helpers.parallel_bulk(es, documents)

        # look through each result for status
        for i in status:
            if i[0] == False:
                print "There was an error importing a record.  Error: ", i[1]

        # Using this to have the doc count stay on one line and continually be updated
        sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")
        sys.stdout.flush()

        # now clean out the document list
        documents[:] = []

    # print the final doc count before moving out of the function
    sys.stdout.write("Total Documents sent to Elasticsearch: " + str(options.totalDocCount) + "\r")

开发者ID:mike-mosher，项目名称:aws-la，代码行数:50，代码来源:importLogs.py

示例6: bulk_import

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def bulk_import(**kwargs) -> Tuple[int, int, int]:
    """Bulk import data to elasticsearch.

    Tracks bulk import response metrics, reporting both externally to
    prometheus and to the caller.
    """
    log.info('Starting bulk import: {}'.format(str(kwargs)))
    good, missing, errors = 0, 0, 0
    for ok, result in parallel_bulk(raise_on_exception=False, raise_on_error=False, **kwargs):
        action, result = result.popitem()
        status_code = result.get('status', 500)
        if ok:
            good += 1
            try:
                Metric.ACTION_RESULTS[result['result']].inc()
            except KeyError:
                Metric.OK_UNKNOWN.inc()
        elif status_code == 'TIMEOUT':
            Metric.TIMEOUT.inc()
            errors += 1
        elif not isinstance(status_code, int):
            # Previously found TIMEOUT status_code here
            Metric.FAILED.inc()
            log.warning(
                'Failed bulk %s request with invalid status_code %s: %s',
                action, str(status_code), str(result)[:1024])
            errors += 1
        elif status_code == 404:
            # 404 are quite common so we log them separately. The analytics
            # side doesn't know the namespace mappings and attempts to send all
            # updates to <wiki>_content, letting the docs that don't exist fail
            missing += 1
            Metric.MISSING.inc()
        elif status_code >= 400 and status_code < 500:
            # Bulk contained invalid records, can't do much beyond logging
            Metric.FAILED.inc()
            log.warning('Failed bulk %s request: %s', action, str(result)[:1024])
            errors += 1
        elif status_code >= 500 and status_code < 600:
            # primary not available, etc. Internal elasticsearch errors. Should be retryable
            raise Exception(
                "Internal elasticsearch error on {}, status code {}: {}".format(action, status_code, str(result)))
        else:
            raise Exception(
                "Unexpected response on {}, status code {}: {}".format(action, status_code, str(result)))

    log.info('Completed import with %d success %d missing and %d errors', good, missing, errors)
    return good, missing, errors

开发者ID:wikimedia，项目名称:search-MjoLniR，代码行数:50，代码来源:bulk_daemon.py

示例7: flush_cache

# 需要导入模块: from elasticsearch import helpers [as 别名]
# 或者: from elasticsearch.helpers import parallel_bulk [as 别名]
def flush_cache(self):
        if len(self.cache) == 0:
            return True
        retry = 2
        for i in range(retry):
            try:
                to_upload = helpers.parallel_bulk(
                    self.es, self.cache_insertable_iterable())
                counter = 0
                num_items = len(self.cache)
                for item in to_upload:
                    self.logger.debug(
                        "{} of {} Elastic objects uploaded".format(
                            num_items, counter))
                    counter = counter + 1
                output = "Pushed {} items to Elasticsearch to index {}".format(
                    num_items, self.index)
                output += " and browbeat UUID {}".format(str(browbeat_uuid))
                self.logger.info(output)
                self.cache = deque()
                self.last_upload = datetime.datetime.utcnow()
                return True
            except Exception as Err:
                self.logger.error(
                    "Error pushing data to Elasticsearch, going to retry"
                    " in 10 seconds")
                self.logger.error("Exception: {}".format(Err))
                time.sleep(10)
                if i == (retry - 1):
                    self.logger.error(
                        "Pushing Data to Elasticsearch failed in spite of retry,"
                        " dumping JSON for {} cached items".format(
                            len(
                                self.cache)))
                    for item in self.cache:
                        filename = item['test_name'] + '-' + item['identifier']
                        filename += '-elastic' + '.' + 'json'
                        elastic_file = os.path.join(item['result_dir'],
                                                    filename)

                        with open(elastic_file, 'w') as result_file:
                            json.dump(item['result'],
                                      result_file,
                                      indent=4,
                                      sort_keys=True)

                            self.logger.info(
                                "Saved Elasticsearch consumable result JSON to {}". format(
                                    elastic_file))
                    self.cache = deque()
                    self.last_upload = datetime.datetime.utcnow()
                    return False

开发者ID:cloud-bulldozer，项目名称:browbeat，代码行数:54，代码来源:elastic.py

注：本文中的elasticsearch.helpers.parallel_bulk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。