本文整理汇总了Python中elastic.search.Search.search方法的典型用法代码示例。如果您正苦于以下问题:Python Search.search方法的具体用法?Python Search.search怎么用?Python Search.search使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elastic.search.Search
的用法示例。
在下文中一共展示了Search.search方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pubs_disease_tags
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def test_pubs_disease_tags(self):
''' Check the number of disease publications against the number of tags.disease and
report differences`. '''
count = True
msg = ''
for disease in DiseasePublicationTest.DISEASES:
pmids = self._get_pmids(disease)
disease_code = disease.lower()
elastic = Search(search_query=ElasticQuery(BoolQuery(
b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']),
idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
res = elastic.get_count()
msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids))
if res['count'] != len(pmids):
count = False
docs = elastic.search().docs
pmids_in_idx = [getattr(doc, 'pmid') for doc in docs]
pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids]
pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx]
if len(pmids_diff1) > 0:
msg += '\textra PMIDs: '+str(pmids_diff1)
if len(pmids_diff2) > 0:
msg += '\tmissing PMIDs: '+str(pmids_diff2)
msg += '\n'
print(msg)
self.assertTrue(count, 'Count for disease tags')
示例2: get_disease_tags
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def get_disease_tags(cls, feature_id, idx=None, idx_type=None):
''' function to get the aggregated list of disease_tags for a given feature id, aggregated
from all criteria_types for a feature type
@type feature_id: string
@keyword feature_id: Id of the feature (gene => gene_id, region=>region_id)
@type idx: string
@param idx: name of the index
@type idx_type: string
@param idx_type: name of the idx type, each criteria is an index type
'''
query = ElasticQuery(Query.term("qid", feature_id))
agg = Agg("criteria_disease_tags", "terms", {"field": "disease_tags", "size": 0})
aggs = Aggs(agg)
if idx_type:
search = Search(query, aggs=aggs, idx=idx, idx_type=idx_type)
else:
search = Search(query, aggs=aggs, idx=idx)
disease_tags = []
try:
r_aggs = search.search().aggs
buckets = r_aggs['criteria_disease_tags'].get_buckets()
disease_tags = [dis_dict['key'].lower() for dis_dict in buckets]
except:
return []
# get disease docs
if (len(disease_tags) > 0):
(core, other) = Disease.get_site_diseases(dis_list=disease_tags)
diseases = list(core)
diseases.extend(other)
return diseases
else:
return None
示例3: _get_pub_docs_by_pmid
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def _get_pub_docs_by_pmid(pmids, sources=None):
""" Get the gene symbols for the corresponding array of ensembl IDs.
A dictionary is returned with the key being the ensembl ID and the
value the gene document. """
query = ElasticQuery(Query.ids(pmids), sources=sources)
elastic = Search(query, idx=ElasticSettings.idx("PUBLICATION"), size=len(pmids))
return {doc.doc_id(): doc for doc in elastic.search().docs}
示例4: filter_queryset
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def filter_queryset(self, request, queryset, view):
''' Override this method to request just the documents required from Rserve. '''
try:
filterable = getattr(view, 'filter_fields', [])
filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
mid1 = filters.get('marker', 'rs2476601')
dataset = filters.get('dataset', 'EUR').replace('-', '')
query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=['seqid', 'start'])
elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=1)
doc = elastic.search().docs[0]
seqid = getattr(doc, 'seqid')
rserve = getattr(settings, 'RSERVE')
conn = pyRserve.connect(host=rserve.get('HOST'), port=rserve.get('PORT'))
pop_str = conn.r.get_pop(dataset, seqid, mid1)
pops = json.loads(str(pop_str))
populations = []
for pop in pops:
pops[pop]['population'] = pop
populations.append(pops[pop])
conn.close()
return [ElasticObject(initial={'populations': populations, 'marker': mid1})]
except (TypeError, ValueError, IndexError, ConnectionError):
return [ElasticObject(initial={'populations': None, 'marker': mid1})]
示例5: get_gene_docs_by_ensembl_id
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def get_gene_docs_by_ensembl_id(cls, ens_ids, sources=None):
''' Get the gene symbols for the corresponding array of ensembl IDs.
A dictionary is returned with the key being the ensembl ID and the
value the gene document. '''
query = ElasticQuery(Query.ids(ens_ids), sources=sources)
elastic = Search(query, idx=ElasticSettings.idx('GENE', idx_type='GENE'), size=len(ens_ids))
return {doc.doc_id(): doc for doc in elastic.search().docs}
示例6: check_hits
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def check_hits(resp_json):
rsids = {}
docs = [Document(hit) for hit in resp_json['hits']['hits']]
for doc in docs:
rsid = getattr(doc, "id")
if rsid is not None:
rsids[rsid] = doc
rsids_keys = list(rsids.keys())
terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
query = ElasticQuery.filtered(Query.match_all(), terms_filter)
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
docs_by_rsid = elastic.search().docs
for doc in docs_by_rsid:
info = getattr(doc, "info")
if 'VC=SNV' not in info:
continue
rsid = getattr(doc, "id")
ic_doc = rsids[rsid]
pos1 = getattr(doc, "start")
pos2 = self._get_highest_build(ic_doc)['position']
if abs(int(pos1) - int(pos2)) > 1:
is_par = getattr(ic_doc, 'is_par')
allele_a = getattr(ic_doc, 'allele_a')
if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
' '+str(pos2)+" "+rsid+' '+str(pos1))
# ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'
query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
Filter(Query.term("start", pos2)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"
query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
" build_id:"+str(getattr(d, "build_id"))+")"
logger.error(msg)
示例7: fetch_overlapping_features
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def fetch_overlapping_features(cls, build, seqid, start, end, idx=None, idx_type=None, disease_id=None):
''' function to create fetch overlapping features for a given stretch of region
the build info is stored as nested document..so nested query is build
@type build: string
@param build: build info eg: 'GRCh38'
@type seqid: string
@param seqid: chromosome number
@type start: string
@param start: region start
@type end: string
@param end: region end
@type idx: string
@param idx: name of the index
@type idx_type: string
@param idx_type: name of the idx type, each criteria is an index type
@type disease_id: string
@param disease_id: disease code
'''
nbuild = build
start_range = start
end_range = end
bool_range = BoolQuery()
bool_range.must(RangeQuery("build_info.start", lte=start_range)) \
.must(RangeQuery("build_info.end", gte=end_range))
or_filter = OrFilter(RangeQuery("build_info.start", gte=start_range, lte=end_range))
or_filter.extend(RangeQuery("build_info.end", gte=start_range, lte=end_range)) \
.extend(bool_range)
bool_query = BoolQuery()
if disease_id:
qnested_buildinfo = Query.nested('build_info', bool_query)
bool_query = BoolQuery()
bool_query.must(Query.term("disease", disease_id.lower())).must(qnested_buildinfo)
qnested = ElasticQuery(bool_query, sources=['build_info.*',
'disease_locus',
'disease',
'chr_band',
'species'])
else:
bool_query.must(Query.term("build_info.build", nbuild)) \
.must(Query.term("build_info.seqid", seqid)) \
.filter(or_filter)
qnested = ElasticQuery(Query.nested('build_info', bool_query), sources=['build_info.*',
'disease_locus',
'disease',
'chr_band',
'species'])
elastic = Search(qnested, idx=idx, idx_type=idx_type)
res = elastic.search()
return res.docs
示例8: is_region_for_disease
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def is_region_for_disease(cls, hit, section=None, config=None, result_container={}):
result_container_populated = result_container
feature_doc = hit['_source']
feature_doc['_id'] = hit['_id']
disease_loci = feature_doc['disease_loci']
region_id = feature_doc['region_id']
diseases = set()
for disease_locus_id in disease_loci:
query = ElasticQuery(Query.ids([disease_locus_id]), sources=['hits'])
elastic = Search(query, idx=ElasticSettings.idx('REGION', idx_type='DISEASE_LOCUS'))
disease_locus_hits = elastic.search().docs
for disease_locus_hit in disease_locus_hits:
hits = getattr(disease_locus_hit, 'hits')
for hit in hits:
query = ElasticQuery(Query.ids([hit]))
elastic = Search(query, idx=ElasticSettings.idx('REGION', idx_type='STUDY_HITS'))
hit_doc = elastic.search().docs[0]
disease = getattr(hit_doc, "disease")
status = getattr(hit_doc, "status")
if status != 'N':
return result_container
disease_loci = getattr(hit_doc, "disease_locus").lower()
if disease_loci == 'tbc':
return result_container
diseases.add(disease)
for disease in diseases:
result_container_populated = cls.populate_container(disease,
disease,
fnotes=None, features=[region_id],
diseases=[disease],
result_container=result_container_populated)
return result_container_populated
示例9: get_rdm_docs
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def get_rdm_docs(cls, idx, idx_type, qbool=Query.match_all(), sources=[], size=1):
''' Get a random doc from the indices. '''
score_function1 = ScoreFunction.create_score_function('random_score', seed=random.randint(0, 1000000))
search_query = ElasticQuery(FunctionScoreQuery(qbool, [score_function1], boost_mode='replace'),
sources=sources)
elastic = Search(search_query=search_query, size=size, idx=idx, idx_type=idx_type)
try:
return elastic.search().docs
except IndexError:
return cls.get_rdm_docs(idx, idx_type, qbool, sources, size)
示例10: test_pub_ini_file2
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def test_pub_ini_file2(self):
''' Test publication pipeline with a list of PMIDs. '''
out = StringIO()
call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
idx = INI_CONFIG['DISEASE']['index']
Search.index_refresh(idx)
query = ElasticQuery.query_string("test", fields=["tags.disease"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertGreater(len(docs), 1)
示例11: filter_queryset
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def filter_queryset(self, request, queryset, view):
''' Override this method to request feature locations. '''
try:
filterable = getattr(view, 'filter_fields', [])
filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
query_str = filters.get('feature', 'PTPN22')
build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
if query_str is None or query_str == '':
return [ElasticObject(initial={'error': 'No feature name provided.'})]
search_fields = ['id',
'symbol', 'dbxrefs.ensembl',
'region_name']
sources = ['start', 'stop', 'seqid', 'chromosome',
'disease_loci']
idxs = ElasticSettings.getattr('IDX')
MARKER_IDX = ''
if build == ElasticSettings.get_label('MARKER', label='build'):
MARKER_IDX = 'MARKER'
if MARKER_IDX == '':
for idx in idxs:
if 'MARKER' in idx:
if build == ElasticSettings.get_label(idx, label='build'):
MARKER_IDX = idx
(idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
(idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
(idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
idx += ',' + idx_r + ',' + idx_g
idx_type += ',' + idx_type_r + ',' + idx_type_g
equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
docs = elastic.search().docs
locs = []
for doc in docs:
if isinstance(doc, RegionDocument):
doc = Region.pad_region_doc(doc)
loc = doc.get_position(build=build).split(':')
pos = loc[1].replace(',', '').split('-')
locs.append(ElasticObject(
{'feature': query_str,
'chr': loc[0],
'start': int(pos[0]),
'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
'locusString': query_str+" ("+str(loc[1])+")"}))
return locs
except (TypeError, ValueError, IndexError, ConnectionError):
raise Http404
示例12: study_page
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def study_page(request, study):
''' Renders a study page. '''
if study is None:
messages.error(request, 'No study id given.')
raise Http404()
query = ElasticQuery(Query.ids(study.split(',')))
elastic = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=5)
res = elastic.search(obj_document=StudyDocument)
if res.hits_total == 0:
messages.error(request, 'Study(s) '+study+' not found.')
elif res.hits_total < 9:
names = ', '.join([getattr(doc, 'study_name') for doc in res.docs])
context = {'features': res.docs, 'title': names}
return render(request, 'study/study.html', context, content_type='text/html')
raise Http404()
示例13: marker_page
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def marker_page(request):
''' Renders a gene page. '''
query_dict = request.GET
marker = query_dict.get("m")
if marker is None:
messages.error(request, 'No gene name given.')
raise Http404()
fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
query = ElasticQuery(Query.query_string(marker, fields=fields))
elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
res = elastic.search()
if res.hits_total >= 1:
types = getattr(res.aggs['types'], 'buckets')
marker_doc = None
ic_docs = []
history_docs = []
for doc_type in types:
hits = doc_type['top_hits']['hits']['hits']
for hit in hits:
doc = Document(hit)
if 'marker' == doc_type['key']:
marker_doc = doc
elif 'immunochip' == doc_type['key']:
ic_docs.append(doc)
elif 'rs_merge' == doc_type['key']:
history_docs.append(doc)
criteria = {}
if marker_doc is not None:
if ElasticSettings.idx('CRITERIA') is not None:
criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))
context = {
'marker': marker_doc,
'old_dbsnp_docs': _get_old_dbsnps(marker),
'ic': ic_docs,
'history': history_docs,
'criteria': criteria
}
return render(request, 'marker/marker.html', context,
content_type='text/html')
elif res.hits_total == 0:
messages.error(request, 'Marker '+marker+' not found.')
raise Http404()
示例14: filter_queryset
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def filter_queryset(self, request, queryset, view):
""" Override this method to request just the documents required from Rserve. """
try:
filterable = getattr(view, "filter_fields", [])
filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
mid1 = filters.get("m1")
if mid1 is None or mid1 == "":
return [ElasticObject(initial={"error": "No marker ID provided."})]
dataset = filters.get("dataset", "EUR").replace("-", "")
mid2 = filters.get("m2")
window_size = int(filters.get("window_size", 1000000))
dprime = filters.get("dprime", 0.0)
rsq = filters.get("rsq", 0.8)
maf = filters.get("maf", False)
if maf:
maf = True
build_version = filters.get("build", "GRCh38").lower()
pos = filters.get("pos", False)
if pos:
pos = True
query = ElasticQuery(BoolQuery(must_arr=[Query.term("id", mid1)]), sources=["seqid", "start"])
elastic = Search(search_query=query, idx=ElasticSettings.idx("MARKER", "MARKER"), size=1)
doc = elastic.search().docs[0]
seqid = getattr(doc, "seqid")
rserve = getattr(settings, "RSERVE")
conn = pyRserve.connect(host=rserve.get("HOST"), port=rserve.get("PORT"))
ld_str = conn.r.ld_run(
dataset,
seqid,
mid1,
marker2=mid2,
window_size=window_size,
dprime=dprime,
rsq=rsq,
maf=maf,
position=pos,
build_version=build_version,
)
ld_str = ld_str.replace("D.prime", "dprime").replace("R.squared", "rsquared")
conn.close()
return [ElasticObject(initial=json.loads(str(ld_str)))]
except (TypeError, ValueError, IndexError, ConnectionError):
raise Http404
示例15: disease_page
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import search [as 别名]
def disease_page(request, disease):
''' Renders a disease page. '''
disease = disease.lower()
if disease is None:
messages.error(request, 'No disease given.')
raise Http404()
query = ElasticQuery(Query.terms("code", [disease.split(',')]))
elastic = Search(query, idx=ElasticSettings.idx('DISEASE', 'DISEASE'), size=5)
res = elastic.search()
if res.hits_total == 0:
messages.error(request, 'Disease(s) '+disease+' not found.')
elif res.hits_total < 9:
names = ', '.join([getattr(doc, 'name') for doc in res.docs])
context = {'features': res.docs, 'title': names}
return render(request, 'disease/index.html', context, content_type='text/html')
raise Http404()