Python elasticsearch.helpers方法代碼示例

本文整理匯總了Python中elasticsearch.helpers方法的典型用法代碼示例。如果您正苦於以下問題：Python elasticsearch.helpers方法的具體用法？Python elasticsearch.helpers怎麽用？Python elasticsearch.helpers使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類elasticsearch的用法示例。

在下文中一共展示了elasticsearch.helpers方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: sentences

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def sentences(self, exclude_ids=None, query=None, return_estnltk_object=True, **kwargs):
        if query is None:
            query = {}

        if return_estnltk_object:
            if query.get('fields', None) is None:
                query['fields'] = ['estnltk_text_object']
            else:
                if 'estnltk_text_object' not in query['fields']:
                    raise AssertionError('Query contained the "fields" parameter without the "estnltk_text_object" argument'
                                         'Consider setting the "return_estnltk_object" parameter to False to disable respose handling')
                pass

        if exclude_ids is None:
            for document in elasticsearch.helpers.scan(self.client, query=query, doc_type='sentence', **kwargs):
                if return_estnltk_object:
                    yield Text(json.loads(document['fields']['estnltk_text_object'][0]))
                else:
                    yield json.loads(document)
        else:
            raise NotImplementedError('ID exclusion is not implemented')

開發者ID:estnltk，項目名稱:estnltk，代碼行數:23，代碼來源:__init__.py

示例2: next_bulk_action_batch

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def next_bulk_action_batch(self, document_iterator):
        """
        Read a batch of documents from the iterator and convert them into bulk index actions.

        Elasticsearch expects each document to actually be transmitted on two lines the first of which details the
        action to take, and the second contains the actual document.

        See the `Cheaper in Bulk <https://www.elastic.co/guide/en/elasticsearch/guide/1.x/bulk.html>`_ guide.

        Arguments:
            document_iterator (iterator of dicts):

        Returns: A list of dicts that can be transmitted to elasticsearch using the "bulk" request.
        """
        bulk_action_batch = []
        for raw_data in islice(document_iterator, self.batch_size):
            action, data = elasticsearch.helpers.expand_action(raw_data)
            bulk_action_batch.append(action)
            if data is not None:
                bulk_action_batch.append(data)
        return bulk_action_batch

開發者ID:edx，項目名稱:edx-analytics-pipeline，代碼行數:23，代碼來源:elasticsearch_load.py

示例3: store_in_elasticsearch

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_in_elasticsearch(so_it, dry_run, es, index, workers_write, queue_write):
        #write into elasticsearch
        chunk_size = 1000 #TODO make configurable
        actions = elasticsearch_actions(so_it, dry_run, index)
        failcount = 0

        if not dry_run:
            results = None
            if workers_write > 0:
                results = elasticsearch.helpers.parallel_bulk(es, actions,
                        thread_count=workers_write,
                        queue_size=queue_write, 
                        chunk_size=chunk_size)
            else:
                results = elasticsearch.helpers.streaming_bulk(es, actions,
                        chunk_size=chunk_size)
            for success, details in results:
                if not success:
                    failcount += 1

            if failcount:
                raise RuntimeError("%s relations failed to index" % failcount)

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:24，代碼來源:SearchObjects.py

示例4: store_in_elasticsearch

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_in_elasticsearch(results, es, dry_run, workers_write, queue_write, index):
    chunk_size = 1000 #TODO make configurable
    actions = elasticsearch_actions(results, dry_run, index)
    failcount = 0

    if not dry_run:
        results = None
        if workers_write > 0:
            results = elasticsearch.helpers.parallel_bulk(es, actions,
                    thread_count=workers_write,
                    queue_size=queue_write, 
                    chunk_size=chunk_size)
        else:
            results = elasticsearch.helpers.streaming_bulk(es, actions,
                    chunk_size=chunk_size)
        for success, details in results:
            if not success:
                failcount += 1

        if failcount:
            raise RuntimeError("%s relations failed to index" % failcount)

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:23，代碼來源:DataDrivenRelation.py

示例5: get_target_labels

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_target_labels(ids, es, index):
    res = elasticsearch.helpers.scan(client=es,
            query={"query": {
                "ids": {
                    "values": ids,
                }
            },
                '_source': 'approved_symbol',
                'size': 1,
            },
            index=index
        )



    return dict((hit['_id'],hit['_source']['approved_symbol']) for hit in res)

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:18，代碼來源:DataDrivenRelation.py

示例6: get_nulldata_transactions

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_nulldata_transactions(self, index):
            # This is a mess. Apologies if you're looking at this

            query = { "_source": ["hash",
                                  "height",
                                  "txid",
                                  "vin.txid",
                                  "vout.scriptPubKey.asm",
                                  "vout.scriptPubKey.type",
                                  "vout.n"
                                 ],
                      "query" : {
                        "bool": {
                          "must": [
                            {"term": { "vout.scriptPubKey.type": "nulldata" }}
                          ]
                        }
                      }
                    }

            return elasticsearch.helpers.scan(self.es, index=index, query=query, scroll='5m')

開發者ID:joshbressers，項目名稱:blockchain-elasticsearch，代碼行數:23，代碼來源:esbtc.py

示例7: get_opreturn_data

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_opreturn_data(self, bottom = None, top = None):

            query = { "_source": ["tx",
                                  "height",
                                  "n",
                                  "txid",
                                  "vin.txid",
                                 ],
                      "query" : {
                        "match_all" : {}
                      }
                    }

            if bottom is not None and top is not None:
                query['query'] = {"range" : { "height" : { "gte" : bottom, "lte" :  top}}}

            return elasticsearch.helpers.scan(self.es, index="btc-opreturn", query=query, size=100, scroll='1m')

開發者ID:joshbressers，項目名稱:blockchain-elasticsearch，代碼行數:19，代碼來源:esbtc.py

示例8: get_elasticsearch_helper

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_elasticsearch_helper(self):
    """
    Get helpers module for Elasticsearch. Used to bulk index documents.

    :returns: package ``elasticsearch.helpers``
    """
    return helpers

開發者ID:pcbje，項目名稱:gransk，代碼行數:9，代碼來源:injector.py

示例9: bulk_index

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def bulk_index(self, index, doc_type, items):
        # TODO #653: Remove version-specific support for metrics stores before 7.0.0.
        import elasticsearch.helpers
        if self._cluster_version[0] > 6:
            self.guarded(elasticsearch.helpers.bulk, self._client, items, index=index, chunk_size=5000)
        else:
            self.guarded(elasticsearch.helpers.bulk, self._client, items, index=index, doc_type=doc_type, chunk_size=5000)

開發者ID:elastic，項目名稱:rally，代碼行數:9，代碼來源:metrics.py

示例10: main

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def main():

    if len(sys.argv) > 1:
        input_file = sys.argv[1]
    else:
        print("Usage: %s <nvd-xml-file>" % (sys.argv[0]))
        sys.exit(1)

    # First let's see if the index exists
    if es.indices.exists('cve-index') is False:
        # We have to create it and add a mapping
        fh = open('cve-index-json-mapping.json')
        mapping = json.load(fh)
        es.indices.create('cve-index', body=mapping)

    fh = open(input_file)
    json_data = json.load(fh)

    the_cves = CVE()
    for i in json_data['CVE_Items']:

        # ['CVE_Items'][0]['cve']['CVE_data_meta']['ID']
        the_cves.add(i)
        #es.update(id=cve_id, index="cve-index", body={'doc' : cve, 'doc_as_upsert': True})


    for ok, item in elasticsearch.helpers.streaming_bulk(es, the_cves, max_retries=2):
            if not ok:
                print("ERROR:")
                print(item)

開發者ID:joshbressers，項目名稱:cve-analysis，代碼行數:32，代碼來源:json-parse.py

示例11: validation_on_start

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def validation_on_start(eco_scores_uri, schema_uri, excluded_biotypes, 
        datasources_to_datatypes, es_hosts, es_index_gene, es_index_eco, es_index_efo,
        cache_target, cache_target_u2e, cache_target_contains,
        cache_eco, cache_efo, cache_efo_contains):
    logger = logging.getLogger(__name__)

    validator = opentargets_validator.helpers.generate_validator_from_schema(schema_uri)

    lookup_data = LookUpDataRetriever(new_es_client(es_hosts), 
        gene_index=es_index_gene,
        gene_cache_size = cache_target,
        gene_cache_u2e_size = cache_target_u2e,
        gene_cache_contains_size = cache_target_contains,
        eco_index=es_index_eco,
        eco_cache_size = cache_efo_contains,
        efo_index=es_index_efo,
        efo_cache_size = cache_efo,
        efo_cache_contains_size = cache_efo_contains
        ).lookup


    datasources_to_datatypes = datasources_to_datatypes
    evidence_manager = EvidenceManager(lookup_data, eco_scores_uri, 
        excluded_biotypes, datasources_to_datatypes)

    return logger, validator, lookup_data, datasources_to_datatypes, evidence_manager

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:28，代碼來源:Evidences.py

示例12: store

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store(self, es, dry_run, data):
        self.logger.info("Starting drug storage")
        with URLZSource(self.es_mappings).open() as mappings_file:
            mappings = json.load(mappings_file)

        with URLZSource(self.es_settings).open() as settings_file:
            settings = json.load(settings_file)

        with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):
            #write into elasticsearch
            chunk_size = 1000 #TODO make configurable
            actions = elasticsearch_actions(list(data.items()), self.es_index)
            failcount = 0
            if not dry_run:
                results = None
                if self.workers_write > 0:
                    results = elasticsearch.helpers.parallel_bulk(es, actions,
                            thread_count=self.workers_write,
                            queue_size=self.queue_write, 
                            chunk_size=chunk_size)
                else:
                    results = elasticsearch.helpers.streaming_bulk(es, actions,
                            chunk_size=chunk_size)
                for success, details in results:
                    if not success:
                        failcount += 1

                if failcount:
                    raise RuntimeError("%s relations failed to index" % failcount)
        
        self.logger.debug("Completed storage")

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:33，代碼來源:Drug.py

示例13: _store_eco

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def _store_eco(self, dry_run):

        with URLZSource(self.es_mappings).open() as mappings_file:
            mappings = json.load(mappings_file)

        with URLZSource(self.es_settings).open() as settings_file:
            settings = json.load(settings_file)

        es = new_es_client(self.es_hosts)
        with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):

            #write into elasticsearch
            chunk_size = 1000 #TODO make configurable
            actions = elasticsearch_actions(list(self.ecos.items()), self.es_index)
            failcount = 0

            if not dry_run:
                results = None
                if self.workers_write > 0:
                    results = elasticsearch.helpers.parallel_bulk(es, actions,
                            thread_count=self.workers_write,
                            queue_size=self.queue_write, 
                            chunk_size=chunk_size)
                else:
                    results = elasticsearch.helpers.streaming_bulk(es, actions,
                            chunk_size=chunk_size)
                for success, details in results:
                    if not success:
                        failcount += 1

                if failcount:
                    raise RuntimeError("%s relations failed to index" % failcount)

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:34，代碼來源:ECO.py

示例14: store_data

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_data(self, dry_run):
        self.logger.info('store_data called')

        self.logger.debug('calling to create new expression index')

        with URLZSource(self.es_mappings).open() as mappings_file:
            mappings = json.load(mappings_file)

        with URLZSource(self.es_settings).open() as settings_file:
            settings = json.load(settings_file)

        es = new_es_client(self.es_hosts)
        with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):
  
            #write into elasticsearch
            chunk_size = 1000 #TODO make configurable
            actions = elasticsearch_actions(self.hpa_merged_table, dry_run, self.es_index)
            failcount = 0

            if not dry_run:
                results = None
                if self.workers_write > 0:
                    results = elasticsearch.helpers.parallel_bulk(es, actions,
                            thread_count=self.workers_write,
                            queue_size=self.queue_write, 
                            chunk_size=chunk_size)
                else:
                    results = elasticsearch.helpers.streaming_bulk(es, actions,
                            chunk_size=chunk_size)
                for success, details in results:
                    if not success:
                        failcount += 1

                if failcount:
                    raise RuntimeError("%s relations failed to index" % failcount)
        
        if failcount:
            raise RuntimeError("%s failed to index" % failcount)

        self.logger.info('missing tissues %s', str(_missing_tissues))

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:42，代碼來源:HPA.py

示例15: get_disease_labels

# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_disease_labels(ids, es, index):
    res = elasticsearch.helpers.scan(client=es,
            query={
                "query": {
                    "ids": {
                        "values": ids,
                    }
                },
                '_source': 'label',
                'size': 1,
            },
            index=index
        )

    return dict((hit['_id'],hit['_source']['label']) for hit in res)

開發者ID:opentargets，項目名稱:data_pipeline，代碼行數:17，代碼來源:DataDrivenRelation.py

注：本文中的elasticsearch.helpers方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。