本文整理汇总了Python中pyes.ES.create_index方法的典型用法代码示例。如果您正苦于以下问题:Python ES.create_index方法的具体用法?Python ES.create_index怎么用?Python ES.create_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyes.ES
的用法示例。
在下文中一共展示了ES.create_index方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUp
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
def setUp(self):
self.es_host = None
self.es_cluster_name = None
self._state = []
super(ElasticSearchSupport, self).setUp()
self.es_host = self.input.param("es_host", "127.0.0.1")
self.es_port = self.input.param("es_port", 9091)
conn = ES(self.es_host + ":9200")
if not self.input.param("skip_cleanup", True) or self.case_number == 1:
conn.delete_index_if_exists("default")
conn.create_index("default")
self.log.warning("waiting for ES index to be ready to use")
time.sleep(30)
self._link_es_cluster()
self._start_es_replication()
self.log.warning("after setUp es")
示例2: DatabaseWrapper
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
class DatabaseWrapper(NonrelDatabaseWrapper):
def _cursor(self):
self._ensure_is_connected()
return self._connection
def __init__(self, *args, **kwds):
super(DatabaseWrapper, self).__init__(*args, **kwds)
self.features = DatabaseFeatures(self)
self.ops = DatabaseOperations(self)
self.client = DatabaseClient(self)
self.creation = DatabaseCreation(self)
self.validation = DatabaseValidation(self)
self.introspection = DatabaseIntrospection(self)
self._is_connected = False
@property
def db_connection(self):
self._ensure_is_connected()
return self._db_connection
def _ensure_is_connected(self):
if not self._is_connected:
try:
port = int(self.settings_dict["PORT"])
except ValueError:
raise ImproperlyConfigured("PORT must be an integer")
self.db_name = self.settings_dict["NAME"]
self._connection = ES(
"%s:%s" % (self.settings_dict["HOST"], port),
decoder=Decoder,
encoder=Encoder,
autorefresh=True,
default_indices=[self.db_name],
)
self._db_connection = self._connection
# auto index creation: check if to remove
try:
self._connection.create_index(self.db_name)
except:
pass
# We're done!
self._is_connected = True
示例3: index
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
def index(fname, index_name, keys_to_tag):
fptr = open(fname, 'rb')
line_count = 0
conn = ES(["localhost:9200"])
if not conn.exists_index(index_name):
conn.create_index(index_name)
start = time.clock()
numb_exceptions = 0
for line in fptr:
if ((line_count % 10000) == 0):
end = time.clock()
minutes = (end - start) / 60.0
print 'File: %s Done with %d took %f min. ' %(fname, line_count, minutes)
print 'number of exceptions ', numb_exceptions
line_count += 1
data = json.loads(line)
if not data.get('tags'):
continue
post_id = int(data['post_id'])
found_content = False
for k in keys_to_tag:
if data.get(k):
found_content = True
if not found_content:
continue
index_data = dict()
for k in keys_to_tag:
value = data.get(k)
if (value and (k == 'content')):
try:
stripped_value = utils.strip_tags(value)
except Exception:
stripped_value = value
index_data[k] = stripped_value
if post_id and data:
try:
conn.index(index_data, index_name, "test-type", post_id)
except Exception:
numb_exceptions += 1
continue
print 'number of exceptions ', numb_exceptions
示例4: init
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
def init():
conn = ES('127.0.0.1:9200')
try:
conn.delete_index("zhihu")
except:
pass
conn.create_index("zhihu")
mapping = {
u'id': {'store': 'yes',
'type': u'integer'},
u'link': {'store': 'yes',
'type': u'string'},
u'title': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string'},
}
conn.put_mapping("answer", {'properties': mapping}, ["zhihu"])
for item in Data().getData():
conn.index(item, "zhihu", "answer", item['id'])
conn.refresh(["zhihu"])
return redirect('/list')
示例5: ext_process
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
def ext_process(listname, hostname, url, filepath, msg):
"""Here's where you put your code to deal with the just archived message.
Arguments here are the list name, the host name, the URL to the just
archived message, the file system path to the just archived message and
the message object.
These can be replaced or augmented as needed.
"""
from pyes import ES
from pyes.exceptions import ClusterBlockException, NoServerAvailable
import datetime
#CHANGE this settings to reflect your configuration
_ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift
_indexname = "mailman"
_doctype = "mail"
date = datetime.datetime.today()
try:
iconn = ES(_ES_SERVERS)
status = None
try:
status = iconn.status(_indexname)
logger.debug("Indexer status:%s" % status)
except:
iconn.create_index(_indexname)
time.sleep(1)
status = iconn.status(_indexname)
mappings = { u'text': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"},
u'url': {'boost': 1.0,
'index': 'not_analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "no"},
u'title': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"},
u'date': {'store': 'yes',
'type': u'date'}}
time.sleep(1)
status = iconn.put_mapping(_doctype, mappings, _indexname)
data = dict(url=url,
title=msg.get('subject'),
date=date,
text=str(msg)
)
iconn.index(data, _indexname, _doctype)
syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
listname, hostname, url, filepath, msg)
except ClusterBlockException:
syslog('error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
listname, hostname, url, filepath, msg)
except NoServerAvailable:
syslog('error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
listname, hostname, url, filepath, msg)
except:
import traceback
syslog('error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s',
listname, hostname, url, filepath, msg, repr(traceback.format_exc()))
return
示例6:
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
dataset = shelve.open("samples.shelve")
mapping = { u'description': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"
},
u'name': {'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': u'string',
"term_vector" : "with_positions_offsets"
},
u'age': {'store': 'yes',
'type': u'integer'},
}
conn.create_index("test-index")
conn.put_mapping("test-type", {'properties':mapping}, ["test-index"])
start = datetime.now()
for k, userdata in dataset.items():
# conn.index(userdata, "test-index", "test-type", k)
conn.index(userdata, "test-index", "test-type", k, bulk=True)
conn.force_bulk()
end = datetime.now()
print "time:", end-start
dataset.close()
示例7: ProcessSpiderData
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
class ProcessSpiderData(Task):
def run(self, spider_name):
cities = []
backup_source = []
backup_created_date = None
self.elastic = ES(settings.SEARCH_HOSTS, timeout=22.0, bulk_size=1500)
java = JavaInterface()
self.extractor = java.ArticleSentencesExtractor.INSTANCE
self.logger = ProcessSpiderData.get_logger()
spider = Data.objects.get(name=spider_name)
source = spider.source
if spider and len(source):
backup_created_date = spider.created_date
index_new = '%s_%d' % (spider.name, int(time.time()))
# create new index (not connected to alias)
self.elastic.create_index(index_new)
self.elastic.put_mapping('job', {'job':{'properties':mapping}}, index_new)
for item in source:
item = self._process_content(item)
item = self._get_location(item)
if item.has_key('city'):
cities.append(item['city'])
self._create_index(index_new, item)
backup_source.append(item)
# save new index (in bulk)
self.elastic.force_bulk()
# create alias
indices_old = self.elastic.get_alias(spider.name)
self.elastic.set_alias(spider.name, [index_new])
# delete all indices
for index in indices_old:
self.elastic.delete_index_if_exists(index)
# optimize
self.elastic.optimize(index_new, refresh=True)
# save backup (currently processed data)
if len(backup_source) and backup_created_date:
self._process_cities(set(cities), spider_name)
cache.clear()
obj = DataBackup.objects.get_or_create(
name=spider_name,
created_date=backup_created_date
)
obj[0].source = binascii.hexlify(bz2.compress(
JSONEncoder().encode(backup_source)
))
obj[0].save()
# force java & ES garbage collection
self.elastic.connection.close()
del self.extractor
del java
return True
def _process_content(self, item):
if len(item['content']):
item['content'] = self.extractor.getText(jpype.JString(item['content']))
return item
def _get_location(self, item):
if not item.has_key('city'):
return item
try:
geo = geocoders.GeoNames()
places = geo.geocode(item['city'].encode('utf-8'), exactly_one=False)
if places:
place, (lat, lon) = places[0] if isinstance(places, list) else places
if place: item['pin'] = {
'location': { 'lat': lat, 'lon': lon }
}
except: pass
return item
def _create_index(self, index, item):
id = item['id']
del item['id']
try:
self.elastic.get(index, 'job', id)
except ElasticSearchException:
self.elastic.index(
dumps(item, cls=DjangoJSONEncoder),
#.........这里部分代码省略.........
示例8: ESIndexerBase
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
class ESIndexerBase(object):
ES_HOST = ES_HOST
ES_INDEX_NAME = ES_INDEX_NAME
ES_INDEX_TYPE = 'gene'
def __init__(self):
self.conn = ES(self.ES_HOST, default_indexes=[self.ES_INDEX_NAME],
timeout=10.0)
self.step = 10000
def create_index(self):
try:
print self.conn.open_index(self.ES_INDEX_NAME)
except IndexMissingException:
print self.conn.create_index(self.ES_INDEX_NAME)
def delete_index_type(self, index_type):
'''Delete all indexes for a given index_type.'''
index_name = self.ES_INDEX_NAME
# index_type = self.ES_INDEX_TYPE
#Check if index_type exists
mapping = self.conn.get_mapping(index_type, index_name)
if index_name not in mapping or index_type not in mapping[index_name]:
print 'Error: index type "%s" does not exist in index "%s".' % (index_type, index_name)
return
path = '/%s/%s' % (index_name, index_type)
if ask('Confirm to delete all data under "%s":' % path) == 'Y':
return self.conn.delete_mapping(index_name, index_type)
def index(self, doc, index_type, id=None):
'''add a doc to the index. If id is not None, the existing doc will be
updated.
'''
# index_type = self.ES_INDEX_TYPE
return self.conn.index(doc, self.ES_INDEX_NAME, index_type, id=id)
def delete_index(self, index_type, id):
'''delete a doc from the index based on passed id.'''
# index_type = self.ES_INDEX_TYPE
return self.conn.delete(self.ES_INDEX_NAME, index_type, id)
def optimize(self):
return self.conn.optimize(self.ES_INDEX_NAME, wait_for_merge=True)
def get_field_mapping(self):
import dataload
reload(dataload)
dataload.register_sources()
return dataload.get_mapping()
def build_index(self, doc_d, update_mapping=False, bulk=True):
index_name = self.ES_INDEX_NAME
index_type = self.ES_INDEX_TYPE
#Test if index exists
try:
print "Opening index...", self.conn.open_index(index_name)
except NotFoundException:
print 'Error: index "%s" does not exist. Create it first.' % index_name
return -1
try:
cur_mapping = self.conn.get_mapping(index_type, index_name)
empty_mapping = False
except ElasticSearchException:
#if no existing mapping available for index_type
#force update_mapping to True
empty_mapping = True
update_mapping = True
# empty_mapping = not cur_mapping[index_name].get(index_type, {})
# if empty_mapping:
# #if no existing mapping available for index_type
# #force update_mapping to True
# update_mapping = True
if update_mapping:
print "Updating mapping...",
if not empty_mapping:
print "\n\tRemoving existing mapping...",
print self.conn.delete_mapping(index_name, index_type)
_mapping = self.get_field_mapping()
print self.conn.put_mapping(index_type,
_mapping,
[index_name])
print "Building index..."
t0 = time.time()
for doc_id, doc in doc_d.items():
self.conn.index(doc, index_name, index_type, doc_id, bulk=bulk)
print self.conn.flush()
print self.conn.refresh()
print "Done[%s]" % timesofar(t0)
def query(self, qs, fields='symbol,name', **kwargs):
_q = StringQuery(qs)
res = self.conn.search(_q, fields=fields, **kwargs)
return res
示例9: ElasticCatalog
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
class ElasticCatalog(object):
default_indexes = {
'zelastic_doc_id': {
'type': 'string',
'index': 'not_analyzed'
}
}
def __init__(self, connection_string, elastic_name, storage, bulk=False,
bulk_size=400):
self.conn = ES(connection_string, bulk_size=bulk_size)
self.bulk_size = bulk_size
self.name = elastic_name
self.storage = storage
self.bulk = bulk
def update_mapping(self, name):
meta = self.storage.meta(name)
indexes = meta['indexes']
properties = self.default_indexes.copy()
try:
self.conn.create_index(self.name)
except IndexAlreadyExistsException:
pass
for index_name, _type in indexes.items():
index = None
if _type == 'str':
index = {
'type': 'string',
'index': 'not_analyzed',
}
elif _type == 'full':
index = {
'type': 'string',
'index': 'analyzed',
}
elif _type == 'bool':
index = {
'type': 'boolean'
}
elif _type == 'int':
index = {
'type': 'integer',
}
elif _type in ('datetime', 'date'):
index = {
'type': 'date',
}
elif _type == 'float':
index = {
'type': 'float',
}
if index is not None:
properties[index_name] = index
self.conn.indices.put_mapping(
doc_type=name,
mapping={
'ignore_conflicts': True,
'properties': properties
},
indices=[self.name])
def id(self, container_name, key):
return '%s-%s' % (container_name, key)
def index(self, container_name, doc, key):
# need to add data to the index that isn't actually persisted
data = {
'zelastic_doc_id': key
}
meta = self.storage.meta(container_name)
indexes = meta['indexes']
for index in indexes.keys():
if index in doc:
data[index] = doc[index]
self.conn.index(
data,
self.name,
container_name,
self.id(container_name, key),
bulk=self.bulk)
def delete(self, container_name, key):
self.conn.delete(
self.name,
container_name,
self.id(container_name, key),
bulk=self.bulk)
def delete_all(self, container_name):
self.conn.delete_mapping(
self.name,
container_name)
def search(self, container_name, query, **kwargs):
return self.conn.search(
query,
indexes=[self.name],
doc_types=[container_name],
**kwargs)
#.........这里部分代码省略.........
示例10: ES
# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import create_index [as 别名]
from __future__ import unicode_literals
from pyes import ES
if __name__ == "__main__":
conn = ES(["localhost:9200"])
indices = ("content_index", "title_index")
for index in indices:
if not conn.exists_index(index):
conn.create_index(index)