本文整理汇总了Python中elasticsearch.client.IndicesClient.optimize方法的典型用法代码示例。如果您正苦于以下问题:Python IndicesClient.optimize方法的具体用法?Python IndicesClient.optimize怎么用?Python IndicesClient.optimize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elasticsearch.client.IndicesClient
的用法示例。
在下文中一共展示了IndicesClient.optimize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from elasticsearch.client import IndicesClient [as 别名]
# 或者: from elasticsearch.client.IndicesClient import optimize [as 别名]
def main(argv):
index = 'user_topics'
client = Elasticsearch('localhost:9200')
index_client = IndicesClient(client)
if index_client.exists(index):
index_client.delete(index)
index_client.create(index=index, body={
'settings': {
'number_of_shards': 4,
'number_of_replicas': 0
},
'mappings': {
'user': {
'properties': {
#'id': {
# 'type': 'long',
# 'doc_values': True
#},
'topics': {
'type': 'integer',
'doc_values': True
},
'n_topics': {
'type': 'integer',
'doc_values': True
}
}
}
}
})
n_users = int(argv[1])
n_topics = int(argv[2]) * 0.15
n_topics_per_user = int(argv[3]) * 4.2
docs_per_chunk = int(2e4)
n_chunks = int(ceil(n_users / docs_per_chunk))
start_time = time.time()
for i_chunk in range(1, n_chunks+1):
docs = []
for i in range(docs_per_chunk):
n_user_topics = rand(n_topics_per_user)[0]
topics = list(set(rand(n_topics, n_user_topics)))
doc_id = str(random.getrandbits(63))
docs.append('{"index":{"_index": "%s", "_type": "user", "_id": "%s"}})' % (index, doc_id))
docs.append(json.dumps({
#'id': doc_id,
'topics': topics,
'n_topics': len(topics)
}))
#print(json.dumps(json.loads(docs[1]), indent=4)); return
try:
response = client.bulk(body='\n'.join(docs))
except:
# Even when an exception is thrown typically documents were stored in ES
sleep_seconds = 10
print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
time.sleep(sleep_seconds)
print('\rChunk %5d/%d, %5.2f%%' % (i_chunk, n_chunks, i_chunk*100.0/n_chunks), end='')
index_time = time.time()
print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
sys.stdout.flush()
index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
print('Optimization done in %.1f s' % (time.time() - index_time))
示例2: main
# 需要导入模块: from elasticsearch.client import IndicesClient [as 别名]
# 或者: from elasticsearch.client.IndicesClient import optimize [as 别名]
def main(index_num):
n_out = int(10e6)
n_batch = int(4e3)
n_batches = n_out // n_batch
index = 'image_hashes_%02d' % index_num
client = Elasticsearch('localhost:9200')
index_client = IndicesClient(client)
if index_client.exists(index):
print('Not deleting %s!' % index); return; sys.exit(1)
index_client.delete(index)
es_short = {
'type': 'short',
}
field_name = lambda i: '%x' % i
fields = {field_name(i): es_short for i in range(n_samples)}
fields['raw'] = {
'type': 'string',
'store': True,
'index': 'not_analyzed',
'doc_values': True
}
index_client.create(index=index, body={
'settings': {
'number_of_shards': 4,
'number_of_replicas': 0
},
'mappings': {
'images': {
'_source': {'enabled': False},
'properties': fields
}
}
})
sampler, pow2 = get_sampler(n_samples, b_p_sample)
start_time = time.time()
for i_batch in range(1, n_batches+1):
data = np.random.randn(n_batch, dim_in)
hash = (data.dot(proj) > 0).astype(np.uint64)
hash_int = hash.dot(2**np.arange(dim_out).astype(np.uint64))
#print('\n'.join(repr(i.astype(np.uint8)) for i in hash)); return
sampled = np.vstack(
hash.dot(sampler[:,:,j]).dot(pow2)
for j in range(n_samples)
).astype(np.int16).T.tolist()
#print(repr(sampled)); print(repr([len(sampled), len(sampled[0])])); return
docs = []
for i in range(n_batch):
doc = {
field_name(j): sampled[i][j] for j in range(n_samples)
}
doc['raw'] = '{0:064b}'.format(hash_int[i])
doc_id = random.getrandbits(63)
docs.append('{"index":{"_index": "%s", "_type": "images", "_id": "%d"}})' % (index, doc_id))
docs.append(json.dumps(doc))
#print(json.dumps(json.loads(docs[1]), indent=4)); return
try:
response = client.bulk(body='\n'.join(docs))
except:
# Even when an exception is thrown typically documents were stored in ES
sleep_seconds = 10
print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
time.sleep(sleep_seconds)
print('\rChunk %5d/%d, %5.2f%%' % (i_batch, n_batches, i_batch*100.0/n_batches), end='')
index_time = time.time()
print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
sys.stdout.flush()
index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
print('Optimization done in %.1f s' % (time.time() - index_time))