本文整理汇总了Python中pyelasticsearch.ElasticSearch.create_index方法的典型用法代码示例。如果您正苦于以下问题:Python ElasticSearch.create_index方法的具体用法?Python ElasticSearch.create_index怎么用?Python ElasticSearch.create_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyelasticsearch.ElasticSearch
的用法示例。
在下文中一共展示了ElasticSearch.create_index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ElasticSearchBackend
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
class ElasticSearchBackend(BaseBackend):
def __init__(self, es_url='http://localhost:9200/', batch_size=10, **kwargs):
"""
Do what is necessary to create/open the index.
"""
self.batch_size = batch_size
self.batch_count = 0
self.es_url = es_url
self.fast = kwargs.get('fast', False)
if kwargs.get('noisy', False):
from logging import getLogger, StreamHandler, DEBUG
import sys
logger = getLogger('pyelasticsearch')
logger.setLevel(DEBUG)
logger.addHandler(StreamHandler(sys.stdout))
self.es = ElasticSearch(self.es_url)
try:
self.es.count('*')
except ConnectionError:
print "Error connecting to ElasticSearch server!"
raise
self.urls = defaultdict(set) #track urls to be deleted before committing new content
self.batches = defaultdict(list) #site: [list of docs]
def create_index(self, name):
name = name.lower()
try:
self.es.create_index(name)
self.update_mapping(name)
except Exception, e:
print e
return
示例2: main
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def main():
"""
Method to kick things off
"""
# Setup workers
pool = Pool(processes=CPU_COUNT)
# Prepare URLs
urls = []
for url in CRAWL_URLS:
urls.append(str(BASE_URL + url))
if USE_ES:
# Create connection
es = ElasticSearch(ES_URL)
try:
# Delete the existing index
es.delete_index(ES_INDEX)
except:
# In case the index does not exist
pass
# Create the index to use
es.create_index(ES_INDEX)
else:
# Setup the database tables, connect
init_db()
# Scrape and store async
pool.map(scrape, urls)
示例3: analyze_post
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def analyze_post(token, text):
response = {
'post_now': False,
'hours_to_wait': 1,
'total_score': 0,
'time_score': 0,
'text_score': 0,
'hint': "Building index",
}
try:
data = Newsfeed.filter_only_posts_by_people(token)
except Exception, e:
es = ElasticSearch('http://localhost:9200/')
try:
es.create_index(token.lower())
Newsfeed.newsfeed(token, [], 0, None, 1)
t = threading.Thread(target=Newsfeed.newsfeed, args=(token, [], 0, None, 1500))
t.setDaemon(True)
t.start()
except Exception, e:
print e.message
示例4: update_process_datetime
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def update_process_datetime(doc_id, timestamp):
''' Updates the last_update_date for the document id passed into function.
The document id in will be the name of another index in the cluster.
'''
connection_string = 'http://localhost:9200'
process_index = 'openfdametadata'
_type = 'last_run'
_map = {}
_map[_type] = {}
_map[_type]['properties'] = {}
_map[_type]['properties']['last_update_date'] = {}
_map[_type]['properties']['last_update_date']['type'] = 'date'
_map[_type]['properties']['last_update_date']['format'] = 'dateOptionalTime'
es = ElasticSearch(connection_string)
try:
es.create_index(process_index)
logging.info('Creating index %s', process_index)
except exceptions.IndexAlreadyExistsError as e:
logging.info('%s already exists', process_index)
try:
es.put_mapping(process_index, doc_type=_type, mapping=_map)
logging.info('Successfully created mapping')
except:
logging.fatal('Could not create the mapping')
new_doc = {}
new_doc['last_update_date'] = timestamp
es.index(process_index,
doc_type=_type,
id=doc_id,
doc=new_doc,
overwrite_existing=True)
示例5: cli
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def cli(index_name, delete_index, mapping_file, settings_file, doc_type,
import_file, delimiter, tab, host, docs_per_chunk, bytes_per_chunk,
parallel, quiet):
"""
Bulk import a delimited file into a target Elasticsearch instance. Common
delimited files include things like CSV and TSV.
\b
Load a CSV file:
csv2es --index-name potatoes --doc-type potato --import-file potatoes.csv
\b
For a TSV file, note the tab delimiter option
csv2es --index-name tomatoes --doc-type tomato \
--import-file tomatoes.tsv --tab
\b
For a nifty pipe-delimited file (delimiters must be one character):
csv2es --index-name pipes --doc-type pipe --import-file pipes.psv \
--delimiter '|'
"""
echo('Using host: ' + host, quiet)
es = ElasticSearch(host)
if delete_index:
try:
es.delete_index(index_name)
echo('Deleted: ' + index_name, quiet)
except ElasticHttpNotFoundError:
echo('Index ' + index_name + ' not found, nothing to delete',
quiet)
try:
if settings_file:
echo('Applying mapping from: ' + settings_file, quiet)
with open(settings_file) as f:
settings = json.loads(f.read())
es.create_index(index_name, settings)
else:
es.create_index(index_name)
echo('Created new index: ' + index_name, quiet)
except ElasticHttpError as e:
if e.error['type'] == 'index_already_exists_exception':
echo('Index ' + index_name + ' already exists', quiet)
else:
raise
echo('Using document type: ' + doc_type, quiet)
if mapping_file:
echo('Applying mapping from: ' + mapping_file, quiet)
with open(mapping_file) as f:
mapping = json.loads(f.read())
es.put_mapping(index_name, doc_type, mapping)
target_delimiter = sanitize_delimiter(delimiter, tab)
documents = documents_from_file(es, import_file, target_delimiter, quiet)
perform_bulk_index(host, index_name, doc_type, documents, docs_per_chunk,
bytes_per_chunk, parallel)
示例6: import_json_into_es
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def import_json_into_es(types, inputfolder, logger):
"""
imports entitied from the *name.json.bz2* files (one entity per line) into local elasticsearch
:param types: json string like {'person': 'http://www.wikidata.org/entity/Q5', 'name': 'Wikidata-URI'}
:param inputfolder:
:param logger:
:return:
"""
es = ElasticSearch(config.ELASTICSEARCH_URL)
try:
es.delete_index('wikidata')
es.create_index('wikidata')
logger.info('rebuild index [wikidata]')
except:
logger.warning('cant delete wikidata index')
# convert type dictionary
wd_types = dict()
for key in types.keys():
value = int(types[key].split('/')[-1][1:])
wd_types[value] = {'type': key,
'filename': path.join(inputfolder, '{}.json.bz2'.format(key))}
# import each given type
for key in wd_types:
logger.info(wd_types[key])
done = 0
items = []
for line in BZ2File(wd_types[key]['filename'],'rb'):
line = line.strip()
item = loads(line)
item['uri'] = 'http://wikidata.org/wiki/' + item['id']
items.append(item)
done += 1
if ( done % 5000 == 0 ):
es.bulk_index('wikidata', wd_types[key]['type'], items, id_field='id')
items = []
# if done % len(wd_types) / 10 == 0: # log 10% steps
# logger.info('imported {}: {:,d} ({:,d})'.format(wd_types[key]['type'],done, 100*len(wd_types)/done ))
if done % 10000 == 0:
logger.info('imported {}: {}'.format(wd_types[key]['type'],format(done, ',d')))
if len(items) > 0:
es.bulk_index('wikidata', wd_types[key]['type'], items, id_field='id')
logger.info('imported {}: {}'.format(wd_types[key]['type'],format(done, ',d')))
示例7: setUp
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def setUp(self):
es_connection = ElasticSearch('http://localhost:9200')
try:
es_connection.delete_index('unit_tests')
except:
pass
es_connection.create_index('unit_tests')
class TestModel(SearchModel):
index_name = 'unit_tests'
self.model = TestModel
示例8: es_indexer
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def es_indexer():
es=ElasticSearch('http://localhost:9200/')
if es:
# Delete index /sentiment_analysis if it already exists
try:
es.delete_index("sentiment_analysis")
print "Deleted index sentiment_analysis if it already existed."
except:
raise 'ElasticHttpNotFoundError'
finally:
print "Creating index sentiment_analysis ...."
es.create_index("sentiment_analysis",{
'settings': {
'index': {
'store': {
'type': "default"
},
'number_of_shards': 1,
'number_of_replicas': 1
},
'analysis': {
'analyzer': {
'default_english': {
'type': 'english'
}
}
}
},
"mappings": {
"document": {
"properties": {
"text": {
"type": "string",
"store": True,
"index": "analyzed",
"term_vector": "with_positions_offsets_payloads",
"analyzer": "default_english"
},
"sentiment": {
"type": "string",
"store": True,
"index": "analyzed",
"analyzer": "default_english"
}
}
}
}
})
print "Created index 'sentiment_analysis' with type 'document' and an analyzed field 'text'."
else:
print "ElasticSearch is not running or the default cluster is down."
示例9: Indexer
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
class Indexer(object):
def __init__(self, input):
self.input = input
self.es = ElasticSearch()
self.index_name = "psim"
self.doc_type = 'book'
def delete_index(self):
# Delete index if already found one
try:
self.es.delete_index(index = self.index_name)
except Exception:
pass
def create_index(self):
self.es.create_index(index=self.index_name, settings = self.get_index_settings())
def get_index_settings(self):
settings = {
"mappings": {
"book": {
"_all" : {"enabled" : "false"},
"properties": {
"codes": {"type": "string",
"term_vector": "yes",
"store": "true"},
"pid" : {"type" : "string"},
"embedding": {"type": "float",
"store": "true"},
"magnitude": {"type": "float", "store": "true"}
}
}
}
}
return settings
def documents(self):
with open(self.input) as input_file:
for line in input_file:
json_doc = json.loads(line)
yield self.es.index_op(json_doc, doc_type=self.doc_type)
def index(self):
self.delete_index()
self.create_index()
for chunk in bulk_chunks(self.documents(), docs_per_chunk=1000):
self.es.bulk(chunk, index = self.index_name, doc_type = self.doc_type)
self.es.refresh(self.index_name)
示例10: feed
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def feed(index='monolith', type='downloads', es_port=9200):
client = ElasticSearch('http://0.0.0.0:%d/' % es_port)
platforms = ['Mac OS X', 'Windows 8', 'Ubuntu']
# indexing a year of data (2012)
first_day = datetime.datetime(2012, 1, 1)
last_day = datetime.datetime(2012, 12, 31)
day_range = last_day - first_day
for month in range(1, 13):
name = 'time_2012-%.2d' % month
try:
client.delete_index(name)
except Exception:
pass
client.create_index(name, settings={
'number_of_shards': 1,
'number_of_replicas': 0,
'analysis': {'analyzer': {'default': {
'type': 'custom', 'tokenizer': 'keyword'
}}},
'store': {'compress': {'stored': 'true'}},
})
# indexing 100 apps
for add_on in range(100):
docs = defaultdict(list)
for delta in range(day_range.days):
date = first_day + datetime.timedelta(days=delta)
data = {'date': date,
'os': random.choice(platforms),
'downloads_count': random.randint(1000, 1500),
'users_count': random.randint(10000, 15000),
'add_on': add_on + 1}
docs[date.month].append(data)
for month, values in docs.items():
client.bulk_index('time_2012-%.2d' % month, type, values)
sys.stdout.write('.')
sys.stdout.flush()
client.optimize('time_*', max_num_segments=1, wait_for_merge=True)
client.flush()
sys.stdout.write('\nDone!\n')
示例11: test_cluster_size_3
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def test_cluster_size_3(self):
cluster = self._make_one(size=3)
cluster.start()
self.assertEqual(len(cluster), 3)
self.assertEqual(len(cluster.hosts), 3)
self.assertEqual(len(os.listdir(cluster.working_path)), 3)
self.assertEqual(len(cluster.urls), 3)
client = ElasticSearch(cluster.urls, max_retries=2)
self.assertEqual(client.health()['number_of_nodes'], 3)
# test if routing works and data is actually distributed across nodes
client.create_index('test_shards', settings={
'number_of_shards': 1,
'number_of_replicas': 2,
})
client.index('test_shards', 'spam', {'eggs': 'bacon'})
client.refresh('test_shards')
shard_info = client.status()['indices']['test_shards']['shards']['0']
nodes = set([s['routing']['node'] for s in shard_info])
self.assertTrue(len(nodes) > 1)
示例12: init_schema
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
def init_schema():
"""Should be called at application startup. Makes sure the mappings and
index exist."""
es = ElasticSearch(settings.ELASTIC_SEARCH_URLS)
try:
es.create_index(settings.ELASTIC_SEARCH_INDEX)
except IndexAlreadyExistsError:
pass
# Does not replace if exact mapping already exists
es.put_mapping(settings.ELASTIC_SEARCH_INDEX, 'reg_tree', {
'reg_tree': {'properties': NODE_SEARCH_SCHEMA}
})
es.put_mapping(settings.ELASTIC_SEARCH_INDEX, 'layer', {
'layer': {'properties': LAYER_SCHEMA}
})
es.put_mapping(settings.ELASTIC_SEARCH_INDEX, 'notice', {
'notice': {'properties': LAYER_SCHEMA}
})
es.put_mapping(settings.ELASTIC_SEARCH_INDEX, 'diff', {
'diff': {'properties': DIFF_SCHEMA}
})
示例13: ElasticSearchProvider
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
#.........这里部分代码省略.........
'type': 'integer'
},
'violation_count': {
'type': 'float'
},
'page_id': {
'type': 'integer'
},
'page_uuid': {
'type': 'string',
'index': 'not_analyzed'
},
'page_url': {
'type': 'string',
'index': 'not_analyzed'
},
'page_last_review_date': {
'type': 'integer'
},
'domain_id': {
'type': 'integer'
},
'domain_name': {
'type': 'string',
'index': 'not_analyzed'
}
}
}
}
def setup_index(self):
try:
settings = self.get_index_settings()
self.syncES.create_index(index=self.index, settings=settings)
mapping = self.get_index_mapping()
self.syncES.put_mapping(index=self.index, doc_type='review', mapping=mapping)
logging.info('Index %s created.' % self.index)
except Exception as e:
raise e
def delete_index(self):
try:
self.syncES.delete_index(index=self.index)
logging.info('Index %s deleted.' % self.index)
except Exception as e:
raise e
def _get_max_page_id_from_index(self, must_have_domain_name=False):
if must_have_domain_name:
inner_query = {
'constant_score': {
'filter': {
'not': {
'missing': {
'field': 'domain_name'
}
}
}
}
}
else:
inner_query = {
'match_all': {}
}
query = {
示例14: ElasticSearch
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
import time
IGNORED_GENRES = ("9", "15", "19") # We only care about stations that play music.
import settings
es = ElasticSearch(settings.ES_URL)
INDEX_NAME = settings.ES_INDEX
try:
es.delete_index(INDEX_NAME)
except ElasticHttpNotFoundError:
pass
try:
es.create_index(INDEX_NAME)
except IndexAlreadyExistsError:
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'
}
failures = 0
pk = 0
while failures < 200:
pk += 1
r = requests.get("http://www.iheart.com/a/live/station/%d/" % pk, headers=headers)
if r.status_code != 200:
if r.status_code > 500:
示例15: len
# 需要导入模块: from pyelasticsearch import ElasticSearch [as 别名]
# 或者: from pyelasticsearch.ElasticSearch import create_index [as 别名]
input = len(sys.argv)
if input < 2:
usage()
sys.exit(1)
else:
qname = sys.argv[1]
from pyelasticsearch import ElasticSearch
es = ElasticSearch(elasticsearch)
try:
s = es.status('oplog')
except:
print "Creating index: oplog"
try:
s = es.create_index('oplog')
print "sleeping for 5 to ensure index exists"
time.sleep(5)
except:
print "ERROR: index creation failed!"
sys.exit()
print "Creating queue: %s" % qname
try:
es.put_mapping('oplog',qname,{"properties" : { "from" : {"type" : "string", "null_value" : "na"}, "sent" : {"type" : "string", "null_value" : "na"}, "submitted" : {"type" : "date"}, "subject" : {"type" : "string", "null_value" : "na"}, "message" : {"type" : "string", "null_value" : "na"} }})
print "Created queue with mapping:"
print es.get_mapping('oplog',qname)
except:
print "ERROR: queue creation failed!"