当前位置: 首页>>代码示例>>Python>>正文


Python IndicesClient.optimize方法代码示例

本文整理汇总了Python中elasticsearch.client.IndicesClient.optimize方法的典型用法代码示例。如果您正苦于以下问题:Python IndicesClient.optimize方法的具体用法?Python IndicesClient.optimize怎么用?Python IndicesClient.optimize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在elasticsearch.client.IndicesClient的用法示例。


在下文中一共展示了IndicesClient.optimize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from elasticsearch.client import IndicesClient [as 别名]
# 或者: from elasticsearch.client.IndicesClient import optimize [as 别名]
def main(argv):
    index = 'user_topics'
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        index_client.delete(index)
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'user': {
                'properties': {
                    #'id': {
                    #    'type': 'long',
                    #    'doc_values': True
                    #},
                    'topics': {
                        'type': 'integer',
                        'doc_values': True
                    },
                    'n_topics': {
                        'type': 'integer',
                        'doc_values': True
                    }
                }
            }
        }
    })
    
    n_users           = int(argv[1])
    n_topics          = int(argv[2]) * 0.15
    n_topics_per_user = int(argv[3]) * 4.2
    
    docs_per_chunk = int(2e4)
    n_chunks       = int(ceil(n_users / docs_per_chunk))
    
    start_time = time.time()
    
    for i_chunk in range(1, n_chunks+1):
        docs = []
        
        for i in range(docs_per_chunk):
            n_user_topics = rand(n_topics_per_user)[0]
            topics = list(set(rand(n_topics, n_user_topics)))
            
            doc_id = str(random.getrandbits(63))

            docs.append('{"index":{"_index": "%s", "_type": "user", "_id": "%s"}})' % (index, doc_id))
            docs.append(json.dumps({
                #'id':      doc_id,
                'topics':   topics,
                'n_topics': len(topics)
            }))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)
        
        print('\rChunk %5d/%d, %5.2f%%' % (i_chunk, n_chunks, i_chunk*100.0/n_chunks), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
开发者ID:VisBlank,项目名称:stackoverflow-scripts,代码行数:78,代码来源:generate.py

示例2: main

# 需要导入模块: from elasticsearch.client import IndicesClient [as 别名]
# 或者: from elasticsearch.client.IndicesClient import optimize [as 别名]
def main(index_num):
    n_out      = int(10e6)
    n_batch    = int(4e3)
    n_batches  = n_out // n_batch
    index      = 'image_hashes_%02d' % index_num
    
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        print('Not deleting %s!' % index); return; sys.exit(1)
        index_client.delete(index)
    
    es_short = {
        'type': 'short',
    }
    
    field_name = lambda i: '%x' % i
    fields = {field_name(i): es_short for i in range(n_samples)}
    fields['raw'] = {
        'type': 'string',
        'store': True,
        'index': 'not_analyzed',
        'doc_values': True
    }
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'images': {
                '_source': {'enabled': False},
                'properties': fields
            }
        }
    })
    
    sampler, pow2 = get_sampler(n_samples, b_p_sample)
    start_time = time.time()
    
    for i_batch in range(1, n_batches+1):
        data = np.random.randn(n_batch, dim_in)
        hash = (data.dot(proj) > 0).astype(np.uint64)
        hash_int = hash.dot(2**np.arange(dim_out).astype(np.uint64))
		
        #print('\n'.join(repr(i.astype(np.uint8)) for i in hash)); return
        
        sampled = np.vstack(
            hash.dot(sampler[:,:,j]).dot(pow2)
            for j in range(n_samples)
        ).astype(np.int16).T.tolist()
        
        #print(repr(sampled)); print(repr([len(sampled), len(sampled[0])])); return
        
        docs = []
        
        for i in range(n_batch):
            doc = {
                field_name(j): sampled[i][j] for j in range(n_samples)
            }
            doc['raw'] = '{0:064b}'.format(hash_int[i])
            doc_id = random.getrandbits(63)
            
            docs.append('{"index":{"_index": "%s", "_type": "images", "_id": "%d"}})' % (index, doc_id))
            docs.append(json.dumps(doc))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)

        print('\rChunk %5d/%d, %5.2f%%' % (i_batch, n_batches, i_batch*100.0/n_batches), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
开发者ID:VisBlank,项目名称:stackoverflow-scripts,代码行数:88,代码来源:generate.py


注:本文中的elasticsearch.client.IndicesClient.optimize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。