本文整理汇总了Python中elastic.query.Query.query_string方法的典型用法代码示例。如果您正苦于以下问题:Python Query.query_string方法的具体用法?Python Query.query_string怎么用?Python Query.query_string使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elastic.query.Query
的用法示例。
在下文中一共展示了Query.query_string方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: association_stats
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def association_stats(request, sources=None):
''' Get association statistics for a given marker ID. '''
seqid = request.GET.get('chr').replace('chr', '')
idx_type = request.GET.get('idx_type').upper()
start = request.GET.get('start')
end = request.GET.get('end')
data = []
def get_stats(resp_json):
hits = resp_json['hits']['hits']
for hit in hits:
d = Document(hit)
data.append({
"CHROM": getattr(d, 'seqid'),
"POS": getattr(d, 'position'),
"PVALUE": getattr(d, 'p_value'),
"DBSNP_ID": getattr(d, 'marker')
})
query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources)
if start is not None and end is not None:
query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]),
RangeQuery("position", gte=start, lte=end)]),
sources=sources)
ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query)
json = {"variants": data}
return JsonResponse(json)
示例2: post
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def post(self, request, *args, **kwargs):
ens_id = self.request.POST.get('ens_id')
marker = self.request.POST.get('marker')
markers = self.request.POST.getlist('markers[]')
if ens_id:
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
elif marker:
sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
elif markers:
sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
study_hits = elastic.get_json_response()['hits']
ens_ids = []
pmids = []
for hit in study_hits['hits']:
if 'pmid' in hit['_source']:
pmids.append(hit['_source']['pmid'])
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
ens_ids.append(ens_id)
docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])
for hit in study_hits['hits']:
genes = {}
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
try:
genes[ens_id] = getattr(docs[ens_id], 'symbol')
except KeyError:
genes = {ens_id: ens_id}
hit['_source']['genes'] = genes
if 'pmid' in hit['_source']:
pmid = hit['_source']['pmid']
try:
authors = getattr(pub_docs[pmid], 'authors')
journal = getattr(pub_docs[pmid], 'journal')
hit['_source']['pmid'] = \
{'pmid': pmid,
'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
'journal': journal}
except KeyError:
hit['_source']['pmid'] = {'pmid': pmid}
return JsonResponse(study_hits)
示例3: filter_queryset
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def filter_queryset(self, request, queryset, view):
''' Override this method to request feature locations. '''
try:
filterable = getattr(view, 'filter_fields', [])
filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
query_str = filters.get('feature', 'PTPN22')
build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
if query_str is None or query_str == '':
return [ElasticObject(initial={'error': 'No feature name provided.'})]
search_fields = ['id',
'symbol', 'dbxrefs.ensembl',
'region_name']
sources = ['start', 'stop', 'seqid', 'chromosome',
'disease_loci']
idxs = ElasticSettings.getattr('IDX')
MARKER_IDX = ''
if build == ElasticSettings.get_label('MARKER', label='build'):
MARKER_IDX = 'MARKER'
if MARKER_IDX == '':
for idx in idxs:
if 'MARKER' in idx:
if build == ElasticSettings.get_label(idx, label='build'):
MARKER_IDX = idx
(idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
(idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
(idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
idx += ',' + idx_r + ',' + idx_g
idx_type += ',' + idx_type_r + ',' + idx_type_g
equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
docs = elastic.search().docs
locs = []
for doc in docs:
if isinstance(doc, RegionDocument):
doc = Region.pad_region_doc(doc)
loc = doc.get_position(build=build).split(':')
pos = loc[1].replace(',', '').split('-')
locs.append(ElasticObject(
{'feature': query_str,
'chr': loc[0],
'start': int(pos[0]),
'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
'locusString': query_str+" ("+str(loc[1])+")"}))
return locs
except (TypeError, ValueError, IndexError, ConnectionError):
raise Http404
示例4: _gene_lookup
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def _gene_lookup(search_term):
''' Look for any gene symbols (e.g. PTPN22) and get the corresponding
Ensembl ID and append to query string '''
if re.compile(r'[^\w\s]').findall(search_term):
logger.debug('skip gene lookup as contains non-word pattern '+search_term)
return search_term
words = re.sub("[^\w]", " ", search_term)
equery = BoolQuery(b_filter=Filter(Query.query_string(words, fields=['symbol'])))
search_query = ElasticQuery(equery, sources=['symbol'])
(idx, idx_type) = ElasticSettings.idx('GENE', 'GENE').split('/')
result = Search(search_query=search_query, size=10, idx=idx, idx_type=idx_type).search()
if result.hits_total > 0:
return ' '.join([doc.doc_id() for doc in result.docs]) + ' ' + search_term
return search_term
示例5: _build_exon_query
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def _build_exon_query(chrom, segmin, segmax, genes):
# get exonic structure for genes in this section
geneExons = dict()
query_bool = BoolQuery()
query_bool.must([Query.term("seqid", chrom)])
if len(genes) > 0:
for g in genes:
query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
query_bool, sources=utils.snpFields)
elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
result = elastic.get_result()
exons = result['data']
exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
return geneExons
示例6: marker_page
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def marker_page(request):
''' Renders a gene page. '''
query_dict = request.GET
marker = query_dict.get("m")
if marker is None:
messages.error(request, 'No gene name given.')
raise Http404()
fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
query = ElasticQuery(Query.query_string(marker, fields=fields))
elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
res = elastic.search()
if res.hits_total >= 1:
types = getattr(res.aggs['types'], 'buckets')
marker_doc = None
ic_docs = []
history_docs = []
for doc_type in types:
hits = doc_type['top_hits']['hits']['hits']
for hit in hits:
doc = Document(hit)
if 'marker' == doc_type['key']:
marker_doc = doc
elif 'immunochip' == doc_type['key']:
ic_docs.append(doc)
elif 'rs_merge' == doc_type['key']:
history_docs.append(doc)
criteria = {}
if marker_doc is not None:
if ElasticSettings.idx('CRITERIA') is not None:
criteria = views.get_criteria([marker_doc], 'marker', 'id', 'MARKER')
marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))
context = {
'marker': marker_doc,
'old_dbsnp_docs': _get_old_dbsnps(marker),
'ic': ic_docs,
'history': history_docs,
'criteria': criteria
}
return render(request, 'marker/marker.html', context,
content_type='text/html')
elif res.hits_total == 0:
messages.error(request, 'Marker '+marker+' not found.')
raise Http404()
示例7: _get_old_dbsnps
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def _get_old_dbsnps(marker):
''' Get markers from old versions of DBSNP. Assumes the index key is
prefixed by 'MARKER_'. '''
old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys()
if 'MARKER_' in k], reverse=True)
old_dbsnp_docs = []
if len(old_dbsnps_names) > 0:
search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent']))
for idx_name in old_dbsnps_names:
elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker')
docs = elastic2.search().docs
if len(docs) > 0:
old_doc = docs[0]
old_doc.marker_build = _get_marker_build(idx_name)
old_dbsnp_docs.append(old_doc)
return old_dbsnp_docs
示例8: get_marker
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def get_marker(cls, request, marker, context):
if marker is None:
messages.error(request, 'No marker name given.')
raise Http404()
fields = ['id', 'rscurrent'] if marker.startswith("rs") else ['name']
sub_agg = Agg('top_hits', 'top_hits', {"size": 15})
aggs = Aggs(Agg("types", "terms", {"field": "_type"}, sub_agg=sub_agg))
query = ElasticQuery(Query.query_string(marker, fields=fields))
elastic = Search(search_query=query, idx=ElasticSettings.idx('MARKER'), aggs=aggs, size=0)
res = elastic.search()
title = ''
if res.hits_total >= 1:
types = getattr(res.aggs['types'], 'buckets')
marker_doc = None
ic_docs = []
history_docs = []
for doc_type in types:
hits = doc_type['top_hits']['hits']['hits']
for hit in hits:
doc = PydginDocument.factory(hit)
if doc.get_name() is not None:
title = doc.get_name()
if 'marker' == doc_type['key']:
marker_doc = doc
elif 'immunochip' == doc_type['key']:
ic_docs.append(doc)
elif 'rs_merge' == doc_type['key']:
history_docs.append(doc)
if marker_doc is not None:
marker_doc.marker_build = _get_marker_build(ElasticSettings.idx('MARKER'))
criteria_disease_tags = MarkerView.criteria_disease_tags(request, [marker])
context['criteria'] = criteria_disease_tags
context['features'] = [marker_doc]
context['old_dbsnp_docs'] = _get_old_dbsnps(marker)
context['ic'] = ic_docs
context['history'] = history_docs
context['title'] = title
context['jbrowse_tracks'] = "PydginRegions%2Cdbsnp146%2CEnsemblGenes"
return context
elif res.hits_total == 0:
messages.error(request, 'Marker '+marker+' not found.')
raise Http404()
示例9: studies_details
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def studies_details(request):
""" Get studies for a given ensembl ID. """
ens_id = request.POST.get("ens_id")
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx("REGION", "STUDY_HITS"), size=500)
study_hits = elastic.get_json_response()["hits"]
ens_ids = []
pmids = []
for hit in study_hits["hits"]:
if "pmid" in hit["_source"]:
pmids.append(hit["_source"]["pmid"])
for ens_id in hit["_source"]["genes"]:
ens_ids.append(ens_id)
docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
pub_docs = _get_pub_docs_by_pmid(pmids, sources=["authors.name", "journal"])
for hit in study_hits["hits"]:
genes = {}
for ens_id in hit["_source"]["genes"]:
try:
genes[ens_id] = getattr(docs[ens_id], "symbol")
except KeyError:
genes = {ens_id: ens_id}
hit["_source"]["genes"] = genes
if "pmid" in hit["_source"]:
pmid = hit["_source"]["pmid"]
try:
authors = getattr(pub_docs[pmid], "authors")
journal = getattr(pub_docs[pmid], "journal")
hit["_source"]["pmid"] = {
"pmid": pmid,
"author": authors[0]["name"].rsplit(None, 1)[-1],
"journal": journal,
}
except KeyError:
hit["_source"]["pmid"] = {"pmid": pmid}
return JsonResponse(study_hits)
示例10: genesets_details
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def genesets_details(request):
''' Get pathway gene sets for a given ensembl ID. '''
ens_id = request.POST.get('ens_id')
geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
elastic = Search(query, idx=ElasticSettings.idx('GENE', 'PATHWAY'), size=500)
genesets_hits = elastic.get_json_response()['hits']
ens_ids = []
for hit in genesets_hits['hits']:
for ens_id in hit['_source']['gene_sets']:
ens_ids.append(ens_id)
docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
for hit in genesets_hits['hits']:
genesets = {}
for ens_id in hit['_source']['gene_sets']:
try:
genesets[ens_id] = getattr(docs[ens_id], 'symbol')
except KeyError:
genesets[ens_id] = ens_id
hit['_source']['gene_sets'] = genesets
return JsonResponse(genesets_hits)
示例11: genesets_details
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def genesets_details(request):
""" Get pathway gene sets for a given ensembl ID. """
ens_id = request.POST.get("ens_id")
geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
elastic = Search(query, idx=ElasticSettings.idx("GENE", "PATHWAY"), size=500)
genesets_hits = elastic.get_json_response()["hits"]
ens_ids = []
for hit in genesets_hits["hits"]:
for ens_id in hit["_source"]["gene_sets"]:
ens_ids.append(ens_id)
docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
for hit in genesets_hits["hits"]:
genesets = {}
for ens_id in hit["_source"]["gene_sets"]:
try:
genesets[ens_id] = getattr(docs[ens_id], "symbol")
except KeyError:
genesets[ens_id] = ens_id
hit["_source"]["gene_sets"] = genesets
return JsonResponse(genesets_hits)
示例12: _search_engine
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def _search_engine(query_dict, user_filters, user):
''' Carry out a search and add results to the context object. '''
user_query = query_dict.get("query")
query = _gene_lookup(user_query)
source_filter = [
'symbol', 'synonyms', "dbxrefs.*", 'biotype', 'description', # gene
'id', 'rscurrent', 'rshigh', # marker
'journal', 'title', 'tags.disease', # publication
'name', 'code', # disease
'study_id', 'study_name', # study
'region_name', 'marker'] # regions
if re.compile(r'^[0-9 ]+$').findall(query):
source_filter.append('pmid') # publication - possible PMID(s)
search_fields = []
maxsize = 20
if user_filters.getlist("maxsize"):
maxsize = int(user_filters.get("maxsize"))
# build search_fields from user input filter fields
for it in user_filters.items():
if len(it) == 2:
if it[0] == 'query':
continue
parts = it[1].split(":")
if len(parts) == 3:
search_fields.append(parts[1]+"."+parts[2])
elif len(parts) == 2:
search_fields.append(parts[1])
if len(search_fields) == 0:
search_fields = list(source_filter)
search_fields.extend(['abstract', 'authors.name', # publication
'authors', 'pmids', # study
'markers', 'genes']) # study/region
source_filter.extend(['date', 'pmid', 'build_id', 'ref', 'alt', 'chr_band',
'disease_locus', 'disease_loci', 'region_id'])
idx_name = query_dict.get("idx")
idx_dict = ElasticSettings.search_props(idx_name, user)
query_filters = _get_query_filters(user_filters, user)
highlight = Highlight(search_fields, pre_tags="<strong>", post_tags="</strong>", number_of_fragments=0)
sub_agg = Agg('idx_top_hits', 'top_hits', {"size": maxsize, "_source": source_filter,
"highlight": highlight.highlight['highlight']})
aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg),
Agg("biotypes", "terms", {"field": "biotype", "size": 0}),
Agg("categories", "terms", {"field": "_type", "size": 0})])
# create score functions
score_fns = _build_score_functions(idx_dict)
equery = BoolQuery(must_arr=Query.query_string(query, fields=search_fields),
should_arr=_auth_arr(user),
b_filter=query_filters,
minimum_should_match=1)
search_query = ElasticQuery(FunctionScoreQuery(equery, score_fns, boost_mode='replace'))
elastic = Search(search_query=search_query, aggs=aggs, size=0,
idx=idx_dict['idx'], idx_type=idx_dict['idx_type'])
result = elastic.search()
mappings = elastic.get_mapping()
_update_mapping_filters(mappings, result.aggs)
_update_biotypes(user_filters, result)
return {'data': _top_hits(result), 'aggs': result.aggs,
'query': user_query, 'idx_name': idx_name,
'fields': search_fields, 'mappings': mappings,
'hits_total': result.hits_total,
'maxsize': maxsize, 'took': result.took}
示例13: chicpeaSearch
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def chicpeaSearch(request, url):
queryDict = request.GET
user = request.user
targetIdx = queryDict.get("targetIdx")
blueprint = {}
hic = []
addList = []
searchType = 'gene'
searchTerm = queryDict.get("searchTerm").upper()
searchTerm = searchTerm.replace(",", "")
searchTerm = searchTerm.replace("..", "-")
searchTerm = searchTerm.replace(" ", "") # Chris suggestion to prevent issue with spaces in queries
snpTrack = queryDict.get("snp_track")
(idx_keys_auth, idx_type_keys_auth) = get_authenticated_idx_and_idx_types(
user=user, idx_keys=None, idx_type_keys=None)
if snpTrack:
mo = re.match(r"(.*)-(.*)", snpTrack)
(group, track) = mo.group(1, 2) # @UnusedVariable
if group != 'ud' and 'CP_STATS_'+group.upper()+'.'+snpTrack.upper() not in idx_type_keys_auth:
snpTrack = None
if targetIdx not in utils.tissues:
for target in getattr(chicp_settings, 'CP_TARGET'):
if 'CP_TARGET_'+target not in idx_keys_auth:
if targetIdx == target:
retJSON = {'error': 'Sorry, you do not have permission to view this dataset.'}
return JsonResponse(retJSON)
continue
elasticJSON = Search(idx=ElasticSettings.idx('CP_TARGET_'+target)).get_mapping(mapping_type="gene_target")
tissueList = list(elasticJSON[ElasticSettings.idx('CP_TARGET_'+target)]
['mappings']['gene_target']['_meta']['tissue_type'].keys())
utils.tissues['CP_TARGET_'+target] = tissueList
if queryDict.get("region") or re.match(r"(.*):(\d+)-(\d+)", searchTerm):
searchType = 'region'
region = searchTerm
if queryDict.get("region"):
region = queryDict.get("region")
else:
searchTerm = ""
mo = re.match(r"(.*):(\d+)-(\d+)", region)
(chrom, segmin, segmax) = mo.group(1, 2, 3)
chrom = chrom.replace('chr', "")
chrom = chrom.replace('CHR', "")
if re.search("^rs[0-9]+", searchTerm.lower()):
searchTerm = searchTerm.lower()
addList.append(_find_snp_position(snpTrack, searchTerm))
if addList[0].get("error"):
return JsonResponse({'error': addList[0]['error']})
position = addList[0]['end']
if searchType != 'region':
searchType = 'snp'
logger.warn("### "+searchType+" - "+searchTerm+' ###')
if searchType == 'region':
query_bool = BoolQuery()
filter_bool = BoolQuery()
if searchTerm and len(addList) == 0 and re.match(r"(.*):(\d+)-(\d+)",
queryDict.get("searchTerm").replace(",", "")) == None:
query_bool.must([Query.query_string(searchTerm, fields=["name", "ensg"]),
Query.term("baitChr", chrom),
Query.term("oeChr", chrom),
RangeQuery("dist", gte=-2e6, lte=2e6)])
else:
query_bool.must([Query.term("baitChr", chrom),
Query.term("oeChr", chrom),
RangeQuery("dist", gte=-2e6, lte=2e6)])
query_bool = _add_tissue_filter(query_bool, targetIdx)
if len(addList) > 0:
filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
RangeQuery("baitEnd", gte=position)]),
BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
RangeQuery("oeEnd", gte=position)])])
else:
filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", gte=segmin, lte=segmax),
RangeQuery("baitEnd", gte=segmin, lte=segmax)]),
BoolQuery(must_arr=[RangeQuery("oeStart", gte=segmin, lte=segmax),
RangeQuery("oeEnd", gte=segmin, lte=segmax)])])
query = ElasticQuery.filtered_bool(query_bool, filter_bool,
sources=utils.hicFields + utils.tissues['CP_TARGET_'+targetIdx])
(hic, v1, v2) = _build_hic_query(query, targetIdx, segmin, segmax) # @UnusedVariable
if "error" in hic:
return JsonResponse(hic)
if len(hic) == 0:
retJSON = {'error': queryDict.get("searchTerm")+' does not overlap any bait/target regions in this dataset.'}
return JsonResponse(retJSON)
elif searchType == 'snp':
if len(addList) > 0:
chrom = addList[0]['chr']
query_bool = BoolQuery()
query_bool.must([Query.term("baitChr", chrom),
#.........这里部分代码省略.........
示例14: do_identifier_search
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def do_identifier_search(cls, identifiers, user=None):
source_filter = [
'symbol', 'synonyms', "dbxrefs.*", # gene
'id', 'rscurrent', 'rshigh', # marker
'study_id', 'study_name', # study
'region_name', 'marker', "region_id"] # regions
highlight = Highlight(["symbol", "dbxrefs.*", "region", "region_name", "region_id",
"study_id", "study_name", "id", "rscurrent", "rshigh", "marker"])
search_query = ElasticQuery(Query.query_string(" ".join(identifiers), fields=source_filter),
highlight=highlight, sources=source_filter)
search_idx_keys = ['REGION', 'GENE', 'STUDY', 'MARKER']
search_idx_type_keys = ['REGION', 'GENE', 'STUDY', 'MARKER']
idx_all = [ElasticSettings.idx_names(idx, idx_type=idx_type) for idx, idx_type in zip(search_idx_keys,
search_idx_type_keys)]
idx_dict = dict(idx_all)
search_idx = ','.join(idx_dict.keys())
search_idx_types = ','.join(idx_dict.values())
elastic = Search(search_query=search_query, idx=search_idx, idx_type=search_idx_types)
gene_dict = {}
region_dict = {}
marker_dict = {}
study_dict = {}
docs = elastic.search().docs
for doc in docs:
existing_feature_list = []
idx = getattr(doc, '_meta')['_index']
idx_type = getattr(doc, '_meta')['_type']
doc_id = doc.doc_id()
highlight = doc.highlight()
if highlight is not None:
pattern = ".*?<em>(.*?)</em>.*"
result = re.match(pattern, str(highlight))
if result is not None:
highlight_hit = result.group(1)
if idx_type == "studies":
feature_id = getattr(doc, "study_id")
if highlight_hit not in study_dict:
study_dict[highlight_hit] = {}
if feature_id in study_dict[highlight_hit]:
existing_feature_list = study_dict[highlight_hit]
existing_feature_list.append(feature_id)
study_dict[highlight_hit] = existing_feature_list
if idx_type == "gene":
feature_id = doc_id
if highlight_hit not in gene_dict:
gene_dict[highlight_hit] = {}
if feature_id in gene_dict[highlight_hit]:
existing_feature_list = gene_dict[highlight_hit]
existing_feature_list.append(feature_id)
gene_dict[highlight_hit] = existing_feature_list
if idx_type == "marker":
feature_id = getattr(doc, "id")
if highlight_hit not in marker_dict:
marker_dict[highlight_hit] = {}
if feature_id in marker_dict[highlight_hit]:
existing_feature_list = marker_dict[highlight_hit]
existing_feature_list.append(feature_id)
marker_dict[highlight_hit] = existing_feature_list
if idx_type == "region":
feature_id = getattr(doc, "region_id")
if highlight_hit not in region_dict:
region_dict[highlight_hit] = {}
if feature_id in region_dict[highlight_hit]:
existing_feature_list = region_dict[highlight_hit]
existing_feature_list.append(feature_id)
region_dict[highlight_hit] = existing_feature_list
all_result_dict = {}
all_result_dict['gene'] = gene_dict
all_result_dict['marker'] = marker_dict
all_result_dict['region'] = region_dict
all_result_dict['study'] = study_dict
#.........这里部分代码省略.........
示例15: chicpeaSearch
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import query_string [as 别名]
def chicpeaSearch(request, url):
queryDict = request.GET
targetIdx = queryDict.get("targetIdx")
blueprint = {}
hic = []
addList = []
searchType = 'gene'
searchTerm = queryDict.get("searchTerm").upper()
if targetIdx not in utils.tissues:
for idx in getattr(chicp_settings, 'TARGET_IDXS'):
elasticJSON = Search(idx=idx).get_mapping(mapping_type="gene_target")
tissueList = list(elasticJSON[idx]['mappings']['gene_target']['_meta']['tissue_type'].keys())
utils.tissues[idx] = tissueList
if queryDict.get("region") or re.match(r"(.*):(\d+)-(\d+)", queryDict.get("searchTerm")):
searchType = 'region'
region = queryDict.get("searchTerm")
if queryDict.get("region"):
region = queryDict.get("region")
else:
searchTerm = ""
mo = re.match(r"(.*):(\d+)-(\d+)", region)
(chrom, segmin, segmax) = mo.group(1, 2, 3)
chrom = chrom.replace('chr', "")
if re.search("^rs[0-9]+", queryDict.get("searchTerm").lower()):
searchTerm = queryDict.get("searchTerm").lower()
addList.append(_find_snp_position(queryDict.get("snp_track"), searchTerm))
if addList[0].get("error"):
return JsonResponse({'error': addList[0]['error']})
position = addList[0]['end']
if searchType != 'region':
searchType = 'snp'
logger.warn("### "+searchType+" - "+searchTerm+' ###')
if searchType == 'region':
query_bool = BoolQuery()
filter_bool = BoolQuery()
if searchTerm and len(addList) == 0 and re.match(r"(.*):(\d+)-(\d+)", queryDict.get("searchTerm")) == None:
query_bool.must([Query.query_string(searchTerm, fields=["name", "ensg"]),
Query.term("baitChr", chrom),
Query.term("oeChr", chrom),
RangeQuery("dist", gte=-2e6, lte=2e6)])
else:
query_bool.must([Query.term("baitChr", chrom),
Query.term("oeChr", chrom),
RangeQuery("dist", gte=-2e6, lte=2e6)])
query_bool = _add_tissue_filter(query_bool, targetIdx)
if len(addList) > 0:
filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
RangeQuery("baitEnd", gte=position)]),
BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
RangeQuery("oeEnd", gte=position)])])
else:
filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", gte=segmin, lte=segmax),
RangeQuery("baitEnd", gte=segmin, lte=segmax)]),
BoolQuery(must_arr=[RangeQuery("oeStart", gte=segmin, lte=segmax),
RangeQuery("oeEnd", gte=segmin, lte=segmax)])])
query = ElasticQuery.filtered_bool(query_bool, filter_bool, sources=utils.hicFields + utils.tissues[targetIdx])
(hic, v1, v2) = _build_hic_query(query, targetIdx, segmin, segmax)
# print(hic)
if len(hic) == 0:
retJSON = {'error': queryDict.get("searchTerm")+' does not overlap any bait/target regions in this dataset.'}
return JsonResponse(retJSON)
elif searchType == 'snp':
if len(addList) > 0:
chrom = addList[0]['chr']
query_bool = BoolQuery()
query_bool.must([Query.term("baitChr", chrom),
Query.term("oeChr", chrom),
RangeQuery("dist", gte=-2e6, lte=2e6)])
query_bool = _add_tissue_filter(query_bool, targetIdx)
filter_bool = BoolQuery()
filter_bool.should([BoolQuery(must_arr=[RangeQuery("baitStart", lte=position),
RangeQuery("baitEnd", gte=position)]),
BoolQuery(must_arr=[RangeQuery("oeStart", lte=position),
RangeQuery("oeEnd", gte=position)])])
query = ElasticQuery.filtered_bool(query_bool, filter_bool,
sources=utils.hicFields + utils.tissues[targetIdx])
hic, segmin, segmax = _build_hic_query(query, targetIdx)
if len(hic) == 0:
retJSON = {'error': 'Marker '+searchTerm+' does not overlap any bait/target regions in this dataset.'}
return JsonResponse(retJSON)
else:
query_bool = BoolQuery()
query_bool.must([RangeQuery("dist", gte=-2e6, lte=2e6)])
query_bool = _add_tissue_filter(query_bool, targetIdx)
query = ElasticQuery.filtered_bool(Query.query_string(searchTerm, fields=["name", "ensg", "oeName"]),
query_bool, sources=utils.hicFields + utils.tissues[targetIdx])
#.........这里部分代码省略.........