本文整理汇总了Python中pyes.ES.optimize方法的典型用法代码示例。如果您正苦于以下问题:Python ES.optimize方法的具体用法?Python ES.optimize怎么用?Python ES.optimize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyes.ES
的用法示例。
在下文中一共展示了ES.optimize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ProcessSpiderData
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import optimize [as 别名]
class ProcessSpiderData(Task):
def run(self, spider_name):
cities = []
backup_source = []
backup_created_date = None
self.elastic = ES(settings.SEARCH_HOSTS, timeout=22.0, bulk_size=1500)
java = JavaInterface()
self.extractor = java.ArticleSentencesExtractor.INSTANCE
self.logger = ProcessSpiderData.get_logger()
spider = Data.objects.get(name=spider_name)
source = spider.source
if spider and len(source):
backup_created_date = spider.created_date
index_new = '%s_%d' % (spider.name, int(time.time()))
# create new index (not connected to alias)
self.elastic.create_index(index_new)
self.elastic.put_mapping('job', {'job':{'properties':mapping}}, index_new)
for item in source:
item = self._process_content(item)
item = self._get_location(item)
if item.has_key('city'):
cities.append(item['city'])
self._create_index(index_new, item)
backup_source.append(item)
# save new index (in bulk)
self.elastic.force_bulk()
# create alias
indices_old = self.elastic.get_alias(spider.name)
self.elastic.set_alias(spider.name, [index_new])
# delete all indices
for index in indices_old:
self.elastic.delete_index_if_exists(index)
# optimize
self.elastic.optimize(index_new, refresh=True)
# save backup (currently processed data)
if len(backup_source) and backup_created_date:
self._process_cities(set(cities), spider_name)
cache.clear()
obj = DataBackup.objects.get_or_create(
name=spider_name,
created_date=backup_created_date
)
obj[0].source = binascii.hexlify(bz2.compress(
JSONEncoder().encode(backup_source)
))
obj[0].save()
# force java & ES garbage collection
self.elastic.connection.close()
del self.extractor
del java
return True
def _process_content(self, item):
if len(item['content']):
item['content'] = self.extractor.getText(jpype.JString(item['content']))
return item
def _get_location(self, item):
if not item.has_key('city'):
return item
try:
geo = geocoders.GeoNames()
places = geo.geocode(item['city'].encode('utf-8'), exactly_one=False)
if places:
place, (lat, lon) = places[0] if isinstance(places, list) else places
if place: item['pin'] = {
'location': { 'lat': lat, 'lon': lon }
}
except: pass
return item
def _create_index(self, index, item):
id = item['id']
del item['id']
try:
self.elastic.get(index, 'job', id)
except ElasticSearchException:
self.elastic.index(
dumps(item, cls=DjangoJSONEncoder),
#.........这里部分代码省略.........
示例2: ESIndexerBase
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import optimize [as 别名]
class ESIndexerBase(object):
ES_HOST = ES_HOST
ES_INDEX_NAME = ES_INDEX_NAME
ES_INDEX_TYPE = 'gene'
def __init__(self):
self.conn = ES(self.ES_HOST, default_indexes=[self.ES_INDEX_NAME],
timeout=10.0)
self.step = 10000
def create_index(self):
try:
print self.conn.open_index(self.ES_INDEX_NAME)
except IndexMissingException:
print self.conn.create_index(self.ES_INDEX_NAME)
def delete_index_type(self, index_type):
'''Delete all indexes for a given index_type.'''
index_name = self.ES_INDEX_NAME
# index_type = self.ES_INDEX_TYPE
#Check if index_type exists
mapping = self.conn.get_mapping(index_type, index_name)
if index_name not in mapping or index_type not in mapping[index_name]:
print 'Error: index type "%s" does not exist in index "%s".' % (index_type, index_name)
return
path = '/%s/%s' % (index_name, index_type)
if ask('Confirm to delete all data under "%s":' % path) == 'Y':
return self.conn.delete_mapping(index_name, index_type)
def index(self, doc, index_type, id=None):
'''add a doc to the index. If id is not None, the existing doc will be
updated.
'''
# index_type = self.ES_INDEX_TYPE
return self.conn.index(doc, self.ES_INDEX_NAME, index_type, id=id)
def delete_index(self, index_type, id):
'''delete a doc from the index based on passed id.'''
# index_type = self.ES_INDEX_TYPE
return self.conn.delete(self.ES_INDEX_NAME, index_type, id)
def optimize(self):
return self.conn.optimize(self.ES_INDEX_NAME, wait_for_merge=True)
def get_field_mapping(self):
import dataload
reload(dataload)
dataload.register_sources()
return dataload.get_mapping()
def build_index(self, doc_d, update_mapping=False, bulk=True):
index_name = self.ES_INDEX_NAME
index_type = self.ES_INDEX_TYPE
#Test if index exists
try:
print "Opening index...", self.conn.open_index(index_name)
except NotFoundException:
print 'Error: index "%s" does not exist. Create it first.' % index_name
return -1
try:
cur_mapping = self.conn.get_mapping(index_type, index_name)
empty_mapping = False
except ElasticSearchException:
#if no existing mapping available for index_type
#force update_mapping to True
empty_mapping = True
update_mapping = True
# empty_mapping = not cur_mapping[index_name].get(index_type, {})
# if empty_mapping:
# #if no existing mapping available for index_type
# #force update_mapping to True
# update_mapping = True
if update_mapping:
print "Updating mapping...",
if not empty_mapping:
print "\n\tRemoving existing mapping...",
print self.conn.delete_mapping(index_name, index_type)
_mapping = self.get_field_mapping()
print self.conn.put_mapping(index_type,
_mapping,
[index_name])
print "Building index..."
t0 = time.time()
for doc_id, doc in doc_d.items():
self.conn.index(doc, index_name, index_type, doc_id, bulk=bulk)
print self.conn.flush()
print self.conn.refresh()
print "Done[%s]" % timesofar(t0)
def query(self, qs, fields='symbol,name', **kwargs):
_q = StringQuery(qs)
res = self.conn.search(_q, fields=fields, **kwargs)
return res