本文整理匯總了Python中elasticsearch.helpers方法的典型用法代碼示例。如果您正苦於以下問題:Python elasticsearch.helpers方法的具體用法?Python elasticsearch.helpers怎麽用?Python elasticsearch.helpers使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類elasticsearch
的用法示例。
在下文中一共展示了elasticsearch.helpers方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: sentences
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def sentences(self, exclude_ids=None, query=None, return_estnltk_object=True, **kwargs):
if query is None:
query = {}
if return_estnltk_object:
if query.get('fields', None) is None:
query['fields'] = ['estnltk_text_object']
else:
if 'estnltk_text_object' not in query['fields']:
raise AssertionError('Query contained the "fields" parameter without the "estnltk_text_object" argument'
'Consider setting the "return_estnltk_object" parameter to False to disable respose handling')
pass
if exclude_ids is None:
for document in elasticsearch.helpers.scan(self.client, query=query, doc_type='sentence', **kwargs):
if return_estnltk_object:
yield Text(json.loads(document['fields']['estnltk_text_object'][0]))
else:
yield json.loads(document)
else:
raise NotImplementedError('ID exclusion is not implemented')
示例2: next_bulk_action_batch
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def next_bulk_action_batch(self, document_iterator):
"""
Read a batch of documents from the iterator and convert them into bulk index actions.
Elasticsearch expects each document to actually be transmitted on two lines the first of which details the
action to take, and the second contains the actual document.
See the `Cheaper in Bulk <https://www.elastic.co/guide/en/elasticsearch/guide/1.x/bulk.html>`_ guide.
Arguments:
document_iterator (iterator of dicts):
Returns: A list of dicts that can be transmitted to elasticsearch using the "bulk" request.
"""
bulk_action_batch = []
for raw_data in islice(document_iterator, self.batch_size):
action, data = elasticsearch.helpers.expand_action(raw_data)
bulk_action_batch.append(action)
if data is not None:
bulk_action_batch.append(data)
return bulk_action_batch
示例3: store_in_elasticsearch
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_in_elasticsearch(so_it, dry_run, es, index, workers_write, queue_write):
#write into elasticsearch
chunk_size = 1000 #TODO make configurable
actions = elasticsearch_actions(so_it, dry_run, index)
failcount = 0
if not dry_run:
results = None
if workers_write > 0:
results = elasticsearch.helpers.parallel_bulk(es, actions,
thread_count=workers_write,
queue_size=queue_write,
chunk_size=chunk_size)
else:
results = elasticsearch.helpers.streaming_bulk(es, actions,
chunk_size=chunk_size)
for success, details in results:
if not success:
failcount += 1
if failcount:
raise RuntimeError("%s relations failed to index" % failcount)
示例4: store_in_elasticsearch
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_in_elasticsearch(results, es, dry_run, workers_write, queue_write, index):
chunk_size = 1000 #TODO make configurable
actions = elasticsearch_actions(results, dry_run, index)
failcount = 0
if not dry_run:
results = None
if workers_write > 0:
results = elasticsearch.helpers.parallel_bulk(es, actions,
thread_count=workers_write,
queue_size=queue_write,
chunk_size=chunk_size)
else:
results = elasticsearch.helpers.streaming_bulk(es, actions,
chunk_size=chunk_size)
for success, details in results:
if not success:
failcount += 1
if failcount:
raise RuntimeError("%s relations failed to index" % failcount)
示例5: get_target_labels
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_target_labels(ids, es, index):
res = elasticsearch.helpers.scan(client=es,
query={"query": {
"ids": {
"values": ids,
}
},
'_source': 'approved_symbol',
'size': 1,
},
index=index
)
return dict((hit['_id'],hit['_source']['approved_symbol']) for hit in res)
示例6: get_nulldata_transactions
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_nulldata_transactions(self, index):
# This is a mess. Apologies if you're looking at this
query = { "_source": ["hash",
"height",
"txid",
"vin.txid",
"vout.scriptPubKey.asm",
"vout.scriptPubKey.type",
"vout.n"
],
"query" : {
"bool": {
"must": [
{"term": { "vout.scriptPubKey.type": "nulldata" }}
]
}
}
}
return elasticsearch.helpers.scan(self.es, index=index, query=query, scroll='5m')
示例7: get_opreturn_data
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_opreturn_data(self, bottom = None, top = None):
query = { "_source": ["tx",
"height",
"n",
"txid",
"vin.txid",
],
"query" : {
"match_all" : {}
}
}
if bottom is not None and top is not None:
query['query'] = {"range" : { "height" : { "gte" : bottom, "lte" : top}}}
return elasticsearch.helpers.scan(self.es, index="btc-opreturn", query=query, size=100, scroll='1m')
示例8: get_elasticsearch_helper
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_elasticsearch_helper(self):
"""
Get helpers module for Elasticsearch. Used to bulk index documents.
:returns: package ``elasticsearch.helpers``
"""
return helpers
示例9: bulk_index
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def bulk_index(self, index, doc_type, items):
# TODO #653: Remove version-specific support for metrics stores before 7.0.0.
import elasticsearch.helpers
if self._cluster_version[0] > 6:
self.guarded(elasticsearch.helpers.bulk, self._client, items, index=index, chunk_size=5000)
else:
self.guarded(elasticsearch.helpers.bulk, self._client, items, index=index, doc_type=doc_type, chunk_size=5000)
示例10: main
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def main():
if len(sys.argv) > 1:
input_file = sys.argv[1]
else:
print("Usage: %s <nvd-xml-file>" % (sys.argv[0]))
sys.exit(1)
# First let's see if the index exists
if es.indices.exists('cve-index') is False:
# We have to create it and add a mapping
fh = open('cve-index-json-mapping.json')
mapping = json.load(fh)
es.indices.create('cve-index', body=mapping)
fh = open(input_file)
json_data = json.load(fh)
the_cves = CVE()
for i in json_data['CVE_Items']:
# ['CVE_Items'][0]['cve']['CVE_data_meta']['ID']
the_cves.add(i)
#es.update(id=cve_id, index="cve-index", body={'doc' : cve, 'doc_as_upsert': True})
for ok, item in elasticsearch.helpers.streaming_bulk(es, the_cves, max_retries=2):
if not ok:
print("ERROR:")
print(item)
示例11: validation_on_start
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def validation_on_start(eco_scores_uri, schema_uri, excluded_biotypes,
datasources_to_datatypes, es_hosts, es_index_gene, es_index_eco, es_index_efo,
cache_target, cache_target_u2e, cache_target_contains,
cache_eco, cache_efo, cache_efo_contains):
logger = logging.getLogger(__name__)
validator = opentargets_validator.helpers.generate_validator_from_schema(schema_uri)
lookup_data = LookUpDataRetriever(new_es_client(es_hosts),
gene_index=es_index_gene,
gene_cache_size = cache_target,
gene_cache_u2e_size = cache_target_u2e,
gene_cache_contains_size = cache_target_contains,
eco_index=es_index_eco,
eco_cache_size = cache_efo_contains,
efo_index=es_index_efo,
efo_cache_size = cache_efo,
efo_cache_contains_size = cache_efo_contains
).lookup
datasources_to_datatypes = datasources_to_datatypes
evidence_manager = EvidenceManager(lookup_data, eco_scores_uri,
excluded_biotypes, datasources_to_datatypes)
return logger, validator, lookup_data, datasources_to_datatypes, evidence_manager
示例12: store
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store(self, es, dry_run, data):
self.logger.info("Starting drug storage")
with URLZSource(self.es_mappings).open() as mappings_file:
mappings = json.load(mappings_file)
with URLZSource(self.es_settings).open() as settings_file:
settings = json.load(settings_file)
with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):
#write into elasticsearch
chunk_size = 1000 #TODO make configurable
actions = elasticsearch_actions(list(data.items()), self.es_index)
failcount = 0
if not dry_run:
results = None
if self.workers_write > 0:
results = elasticsearch.helpers.parallel_bulk(es, actions,
thread_count=self.workers_write,
queue_size=self.queue_write,
chunk_size=chunk_size)
else:
results = elasticsearch.helpers.streaming_bulk(es, actions,
chunk_size=chunk_size)
for success, details in results:
if not success:
failcount += 1
if failcount:
raise RuntimeError("%s relations failed to index" % failcount)
self.logger.debug("Completed storage")
示例13: _store_eco
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def _store_eco(self, dry_run):
with URLZSource(self.es_mappings).open() as mappings_file:
mappings = json.load(mappings_file)
with URLZSource(self.es_settings).open() as settings_file:
settings = json.load(settings_file)
es = new_es_client(self.es_hosts)
with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):
#write into elasticsearch
chunk_size = 1000 #TODO make configurable
actions = elasticsearch_actions(list(self.ecos.items()), self.es_index)
failcount = 0
if not dry_run:
results = None
if self.workers_write > 0:
results = elasticsearch.helpers.parallel_bulk(es, actions,
thread_count=self.workers_write,
queue_size=self.queue_write,
chunk_size=chunk_size)
else:
results = elasticsearch.helpers.streaming_bulk(es, actions,
chunk_size=chunk_size)
for success, details in results:
if not success:
failcount += 1
if failcount:
raise RuntimeError("%s relations failed to index" % failcount)
示例14: store_data
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def store_data(self, dry_run):
self.logger.info('store_data called')
self.logger.debug('calling to create new expression index')
with URLZSource(self.es_mappings).open() as mappings_file:
mappings = json.load(mappings_file)
with URLZSource(self.es_settings).open() as settings_file:
settings = json.load(settings_file)
es = new_es_client(self.es_hosts)
with ElasticsearchBulkIndexManager(es, self.es_index, settings, mappings):
#write into elasticsearch
chunk_size = 1000 #TODO make configurable
actions = elasticsearch_actions(self.hpa_merged_table, dry_run, self.es_index)
failcount = 0
if not dry_run:
results = None
if self.workers_write > 0:
results = elasticsearch.helpers.parallel_bulk(es, actions,
thread_count=self.workers_write,
queue_size=self.queue_write,
chunk_size=chunk_size)
else:
results = elasticsearch.helpers.streaming_bulk(es, actions,
chunk_size=chunk_size)
for success, details in results:
if not success:
failcount += 1
if failcount:
raise RuntimeError("%s relations failed to index" % failcount)
if failcount:
raise RuntimeError("%s failed to index" % failcount)
self.logger.info('missing tissues %s', str(_missing_tissues))
示例15: get_disease_labels
# 需要導入模塊: import elasticsearch [as 別名]
# 或者: from elasticsearch import helpers [as 別名]
def get_disease_labels(ids, es, index):
res = elasticsearch.helpers.scan(client=es,
query={
"query": {
"ids": {
"values": ids,
}
},
'_source': 'label',
'size': 1,
},
index=index
)
return dict((hit['_id'],hit['_source']['label']) for hit in res)