本文整理汇总了Python中pyes.ES类的典型用法代码示例。如果您正苦于以下问题:Python ES类的具体用法?Python ES怎么用?Python ES使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ES类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: term_facet
def term_facet(host='localhost:9200',
terms=['bibleverse'],
_type='habakkuk',
date_filter=[],
size=10):
ret = []
conn = ES(host)
q = MatchAllQuery()
if date_filter:
start,end = date_filter
q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))
q = q.search(size=0)
for term in terms:
q.facet.add_term_facet(term,order='count',size=size)
print json.dumps(json.loads(q.to_search_json()),indent=2)
resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
for facet in resultset.facets:
print "Total",facet,resultset.facets[facet]['total']
for row in resultset.facets[facet]['terms']:
print "\t",row['term'],row['count']
ret.append((facet,row['term']))
return ret
示例2: StashHandler
class StashHandler(logging.Handler):
def __init__(self, constr, whitelist=None, blacklist=None):
logging.Handler.__init__(self)
self.conn = ES(constr)
if blacklist is None:
blacklist = set()
self.whitelist = whitelist
self.blacklist = blacklist
self.record_type = 'record'
@property
def index_name(self):
return 'logstash-'+datetime.date.today().strftime('%Y.%m.%d')
def emit(self, record):
if self.whitelist is None:
d = { k: record.__dict__[k] for k in record.__dict__ if k not in self.blacklist }
else:
d = { k: record.__dict__[k] for k in record.__dict__ if k in self.whitelist and k not in self.blacklist }
entry = {
"@fields": d,
"@message": record.msg,
"@source": "gelf://localhost",
"@source_host": "gelf://localhost",
"@source_path": "/",
"@tags": [],
"@timestamp": datetime.datetime.utcnow().isoformat(),
"@type": self.record_type}
self.conn.index(entry, self.index_name, self.record_type)
示例3: handle
def handle(self, *args, **kwargs):
elastic = ES(settings.SEARCH_HOSTS)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(elastic.get_indices())
elastic.connection.close()
示例4: ElasticSearchPipeline
class ElasticSearchPipeline(object):
def __init__(self):
self.settings = get_project_settings()
basic_auth = {'username': self.settings['ELASTICSEARCH_USERNAME'], 'password': self.settings['ELASTICSEARCH_PASSWORD']}
if self.settings['ELASTICSEARCH_PORT']:
uri = "%s:%d" % (self.settings['ELASTICSEARCH_SERVER'], self.settings['ELASTICSEARCH_PORT'])
else:
uri = "%s" % (self.settings['ELASTICSEARCH_SERVER'])
self.es = ES([uri], basic_auth=basic_auth)
def process_item(self, item, spider):
if self.__get_uniq_key() is None:
log.msg("ELASTICSEARCH_UNIQ_KEY is NONE")
self.es.index(dict(item), self.settings['ELASTICSEARCH_INDEX'], self.settings['ELASTICSEARCH_TYPE'],
id=item['id'], op_type='create',)
else:
log.msg("Generation SHA1")
self.es.index(dict(item), self.settings['ELASTICSEARCH_INDEX'], self.settings['ELASTICSEARCH_TYPE'],
hashlib.sha1(item[self.__get_uniq_key()]).hexdigest())
log.msg("Item send to Elastic Search %s" %
(self.settings['ELASTICSEARCH_INDEX']),
level=log.DEBUG, spider=spider)
return item
def __get_uniq_key(self):
if not self.settings['ELASTICSEARCH_UNIQ_KEY'] or self.settings['ELASTICSEARCH_UNIQ_KEY'] == "":
return None
return self.settings['ELASTICSEARCH_UNIQ_KEY']
示例5: index
def index():
"""docstring for fname"""
import time
fptr = open(sys.argv[1], 'rb')
line_count = 0
conn = ES(["localhost:9200"])
#conn.create_index('test-index')
start = time.clock()
numb_exceptions = 0
for line in fptr:
if ((line_count % 10000) == 0):
end = time.clock()
minutes = (end - start) / 60.0
print 'Done with %d took %f min. ' %(line_count, minutes)
print 'number of exceptions ', numb_exceptions
line_count += 1
data = json.loads(line)
post_id = int(data['post_id'])
if post_id and data:
try:
conn.index(data, "test-index", "test-type", post_id)
except Exception:
numb_exceptions += 1
continue
print 'number of exceptions ', numb_exceptions
示例6: facets
def facets(host='localhost:9200',
facet_terms=['bibleverse'],
_type='habakkuk',
date_filter=[],
size=10):
ret = {}
conn = ES(host)
q = MatchAllQuery()
if date_filter:
start,end = date_filter
q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',
start.isoformat(),
end.isoformat(),
include_upper=False)))
q = q.search(size=0)
for term in facet_terms:
q.facet.add_term_facet(term,order='count',size=size)
es_logger.info(q.serialize())
resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
for facet in resultset.facets:
ret[facet] = []
for row in resultset.facets[facet]['terms']:
ret[facet].append({"value":row['term'],"count":row['count']})
logger.debug("facets return|'%s'"%json.dumps(ret))
return ret
示例7: ElasticSearchPipeline
class ElasticSearchPipeline(object):
def __init__(self):
self.settings = get_project_settings()
basic_auth = {'username': self.settings['ELASTICSEARCH_USERNAME'], 'password': self.settings['ELASTICSEARCH_PASSWORD']}
if self.settings['ELASTICSEARCH_PORT']:
uri = "%s:%d" % (self.settings['ELASTICSEARCH_SERVER'], self.settings['ELASTICSEARCH_PORT'])
else:
uri = "%s" % (self.settings['ELASTICSEARCH_SERVER'])
self.es = ES([uri], basic_auth=basic_auth)
def index_item(self, item):
if self.settings['ELASTICSEARCH_UNIQ_KEY']:
local_id = hashlib.sha1(item[uniq_key)]).hexdigest()
log.msg("Generated unique key %s" % local_id, level=self.settings['ELASTICSEARCH_LOG_LEVEL'])
op_type = 'none'
else:
op_type = 'create'
local_id = item['id']
self.es.index(dict(item),
self.settings['ELASTICSEARCH_INDEX'],
self.settings['ELASTICSEARCH_TYPE'],
id=local_id,
op_type=op_type)
示例8: get_related_videos
def get_related_videos(video):
related_videos = []
conn = ES(['127.0.0.1:9200'])
conn.default_indices = VIDEO_INDEX
conn.refresh(VIDEO_INDEX)
q = {
"query": {
"bool": {
"should": [
{"term" : { "uid" : video.uid } },
{"terms" : { "category" : [video.category]}},
{"terms" : { "topic" : [video.topic]}},
{"terms" : { "language" : [video.language]}}
],
"minimum_should_match" : 1
}
}
}
try:
query = json.dumps(q)
url = "http://localhost:9200/%s/_search" % VIDEO_INDEX
response = urllib2.urlopen(url, query)
result = json.loads(response.read())
for res in result['hits']['hits']:
related_videos.append(res['_source'])
except Exception:
pass
return related_videos
示例9: es_index
def es_index(self):
conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH)
conn.index(
doc=self.get_search_kwargs(),
index=self.tenant.slug,
doc_type=self.Meta.document_type,
id=unicode(self.id)
)
示例10: get_es
def get_es(**overrides):
"""Return one pyes.es.ES object
:arg overrides: Allows you to override defaults to create the ES.
Things you can override:
* default_indexes
* timeout
* dump_curl
Values for these correspond with the arguments to pyes.es.ES.
For example, if you wanted to create an ES for indexing with a timeout
of 30 seconds, you'd do:
>>> es = get_es(timeout=30)
If you wanted to create an ES for debugging that dumps curl
commands to stdout, you could do:
>>> class CurlDumper(object):
... def write(self, s):
... print s
...
>>> es = get_es(dump_curl=CurlDumper())
"""
if overrides or not hasattr(_local, 'es'):
defaults = {
'default_indexes': DEFAULT_INDEXES,
'timeout': DEFAULT_TIMEOUT,
'dump_curl': DEFAULT_DUMP_CURL,
}
defaults.update(overrides)
if (not thrift_enable and
not settings.ES_HOSTS[0].split(':')[1].startswith('92')):
raise ValueError('ES_HOSTS is not set to a valid port starting '
'with 9200-9299 range. Other ports are valid '
'if using pythrift.')
es = ES(settings.ES_HOSTS, **defaults)
# pyes 0.15 does this lame thing where it ignores dump_curl in
# the ES constructor and always sets it to None. So what we do
# is set it manually after the ES has been created and
# defaults['dump_curl'] is truthy. This might not work for all
# values of dump_curl.
if VERSION[0:2] == (0, 15):
es.dump_curl = (defaults['dump_curl']
if defaults['dump_curl'] else None)
# Cache the es if there weren't any overrides.
if not overrides:
_local.es = es
else:
es = _local.es
return es
示例11: get_es
def get_es(hosts=None, default_indexes=None, timeout=None, dump_curl=None,
**settings):
"""Create an ES object and return it.
:arg hosts: list of uris; ES hosts to connect to, defaults to
``['localhost:9200']``
:arg default_indexes: list of strings; the default indexes to use,
defaults to 'default'
:arg timeout: int; the timeout in seconds, defaults to 5
:arg dump_curl: function or None; function that dumps curl output,
see docs, defaults to None
:arg settings: other settings to pass into `pyes.es.ES`
Examples:
>>> es = get_es()
>>> es = get_es(hosts=['localhost:9200'])
>>> es = get_es(timeout=30) # good for indexing
>>> es = get_es(default_indexes=['sumo_prod_20120627']
>>> class CurlDumper(object):
... def write(self, text):
... print text
...
>>> es = get_es(dump_curl=CurlDumper())
"""
# Cheap way of de-None-ifying things
hosts = hosts or DEFAULT_HOSTS
default_indexes = default_indexes or DEFAULT_INDEXES
timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
dump_curl = dump_curl or DEFAULT_DUMP_CURL
if not isinstance(default_indexes, list):
default_indexes = [default_indexes]
es = ES(hosts,
default_indexes=default_indexes,
timeout=timeout,
dump_curl=dump_curl,
**settings)
# pyes 0.15 does this lame thing where it ignores dump_curl in
# the ES constructor and always sets it to None. So what we do
# is set it manually after the ES has been created and
# defaults['dump_curl'] is truthy. This might not work for all
# values of dump_curl.
if PYES_VERSION[0:2] == (0, 15) and dump_curl is not None:
es.dump_curl = dump_curl
return es
示例12: tearDown
def tearDown(self):
self.log.warning("before tearDown es")
self._unlink_es_cluster()
self._stop_es_replication()
if self.es_host != None:
conn = ES(self.es_host + ":9200")
conn.delete_index_if_exists("default")
super(ElasticSearchSupport, self).tearDown()
self.log.warning("after tearDown es")
示例13: BaseElasticSearchClient
class BaseElasticSearchClient(BaseClient):
def __init__(self, servers, index):
"""
@param servers: Make sure to include the port with the server address
@param index: Document index
@return:
"""
super(BaseElasticSearchClient, self).__init__()
self.connection = None
self.servers = servers
self.index = index if type(index) is list else [index]
def connect(self, connection_pool=1):
update_connection_pool(connection_pool)
try:
self.connection = ES(self.servers)
except NoServerAvailable:
self._log.error('Failed to connect to elastic search server')
return False
return True
def close(self):
self.connection = None
def _create_term_query(self, must_list):
# TODO: add remaining conditional list functionality.
query = BoolQuery()
for term in must_list:
query.add_must(term)
def find_term(self, name, value, size=10):
if not self.connection:
return
query = TermQuery(name, value)
return self.connection.search(query=Search(query, size=size),
indices=self.index)
def find(self, filter_terms, size=10, doc_types=None):
if not self.connection:
return
query = self._create_term_query(must_list=filter_terms)
return self.connection.search(query=Search(query, size=size),
indices=self.index,
doc_types=doc_types)
def find_one(self, filter_terms, size=10, doc_types=None):
if not self.connection:
return
results = self.find(filter_terms=filter_terms, size=size,
doc_types=doc_types)
return results[0] if len(results) > 0 else None
示例14: es_deindex
def es_deindex(self):
conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH)
try:
conn.delete(
index=self.tenant.slug,
doc_type=self.Meta.document_type,
id=meta.id
)
except:
pass
示例15: ElasticSearchPipeline
class ElasticSearchPipeline(object):
def __init__(self):
self.conn = ES('localhost:9200')
# self.file = open('urls.csv', 'wb')
# self.file.write('spider,url' + '\n')
def process_item(self, item, spider):
#self.file.write(spider.name + ',' + spider.start_urls[0] + '\n')
self.conn.index(dict(item), "qrator", spider.name)
return item