本文整理汇总了Python中elastic.query.Query类的典型用法代码示例。如果您正苦于以下问题:Python Query类的具体用法?Python Query怎么用?Python Query使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Query类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: association_stats
def association_stats(request, sources=None):
''' Get association statistics for a given marker ID. '''
seqid = request.GET.get('chr').replace('chr', '')
idx_type = request.GET.get('idx_type').upper()
start = request.GET.get('start')
end = request.GET.get('end')
data = []
def get_stats(resp_json):
hits = resp_json['hits']['hits']
for hit in hits:
d = Document(hit)
data.append({
"CHROM": getattr(d, 'seqid'),
"POS": getattr(d, 'position'),
"PVALUE": getattr(d, 'p_value'),
"DBSNP_ID": getattr(d, 'marker')
})
query = ElasticQuery(Query.query_string(seqid, fields=["seqid"]), sources=sources)
if start is not None and end is not None:
query = ElasticQuery(BoolQuery(must_arr=[Query.query_string(seqid, fields=["seqid"]),
RangeQuery("position", gte=start, lte=end)]),
sources=sources)
ScanAndScroll.scan_and_scroll(ElasticSettings.idx('IC_STATS', idx_type), call_fun=get_stats, query=query)
json = {"variants": data}
return JsonResponse(json)
示例2: get_hits_by_study_id
def get_hits_by_study_id(cls, study_id, sources=[]):
''' Get visible/authenticated hits. '''
hits_query = ElasticQuery(BoolQuery(must_arr=Query.term('dil_study_id', study_id),
b_filter=Filter(Query.missing_terms("field", "group_name"))),
sources=sources)
docs = Search(hits_query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=1000).search().docs
ens_ids = [gene for doc in docs if getattr(doc, 'genes') for gene in getattr(doc, 'genes')]
gene_docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
for doc in docs:
if getattr(doc, 'genes'):
genes = {}
for ens_id in getattr(doc, 'genes'):
try:
genes[ens_id] = getattr(gene_docs[ens_id], 'symbol')
except KeyError:
genes = {ens_id: ens_id}
setattr(doc, 'genes', genes)
build_info = getattr(doc, 'build_info')
for bi in build_info:
if bi['build'] == settings.DEFAULT_BUILD:
setattr(doc, "loc", "chr" + bi['seqid'] + ":" +
str(locale.format("%d", bi['start'], grouping=True)) + "-" +
str(locale.format("%d", bi['end'], grouping=True)))
setattr(doc, "encoded_loc", "chr" + bi['seqid'] + "%3A" +
str(bi['start']) + ".." + str(bi['end']))
return docs
示例3: get_interaction_doc
def get_interaction_doc(self, interaction_source='intact', parent_id=None):
idx_key = 'GENE'
idx_type_key = 'INTERACTIONS'
parent_idx_key = 'GENE'
idx = ElasticSettings.idx(idx_key, idx_type_key)
(idx, idx_type) = idx.split('/')
if parent_id:
qbool_intact = BoolQuery().must([Query.term("interaction_source", interaction_source),
Query.term("_parent", parent_id)])
else:
qbool_intact = BoolQuery().should([Query.term("interaction_source", interaction_source)])
# Get random doc or specific if id is passed in query
docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=qbool_intact, sources=[], size=1)
doc = docs_by_geneid[0]
# Get parent doc
parent_id = doc.parent()
parent_docs = DataIntegrityUtils.fetch_from_elastic(idx_key, parent_idx_key, [parent_id])
if parent_docs:
self.assertTrue(len(parent_docs) >= 1, "Found 1 parent")
parent_doc = parent_docs[0]
return doc, parent_doc
else:
return self.get_interaction_doc("intact", parent_id)
示例4: test_region_attributes
def test_region_attributes(self):
''' test region attributes '''
idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION')
(idx, idx_type) = idx.split('/')
docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
newRegion = utils.Region.pad_region_doc(docs[0])
if len(getattr(newRegion, "genes")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "genes")))
resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'),
size=len(getattr(newRegion, "genes"))).search()
self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total,
"All genes on region found in GENE index")
if len(getattr(newRegion, "studies")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "studies")))
resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'),
size=len(getattr(newRegion, "studies"))).search()
self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total,
"All study ids for region found in STUDY index")
if len(getattr(newRegion, "pmids")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "pmids")))
resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'),
size=len(getattr(newRegion, "pmids"))).search()
self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total,
"All PMIDs for region found in PUBLICATION index")
示例5: fetch_overlapping_features
def fetch_overlapping_features(cls, build, seqid, start, end, idx=None, idx_type=None, disease_id=None):
''' function to create fetch overlapping features for a given stretch of region
the build info is stored as nested document..so nested query is build
@type build: string
@param build: build info eg: 'GRCh38'
@type seqid: string
@param seqid: chromosome number
@type start: string
@param start: region start
@type end: string
@param end: region end
@type idx: string
@param idx: name of the index
@type idx_type: string
@param idx_type: name of the idx type, each criteria is an index type
@type disease_id: string
@param disease_id: disease code
'''
nbuild = build
start_range = start
end_range = end
bool_range = BoolQuery()
bool_range.must(RangeQuery("build_info.start", lte=start_range)) \
.must(RangeQuery("build_info.end", gte=end_range))
or_filter = OrFilter(RangeQuery("build_info.start", gte=start_range, lte=end_range))
or_filter.extend(RangeQuery("build_info.end", gte=start_range, lte=end_range)) \
.extend(bool_range)
bool_query = BoolQuery()
if disease_id:
qnested_buildinfo = Query.nested('build_info', bool_query)
bool_query = BoolQuery()
bool_query.must(Query.term("disease", disease_id.lower())).must(qnested_buildinfo)
qnested = ElasticQuery(bool_query, sources=['build_info.*',
'disease_locus',
'disease',
'chr_band',
'species'])
else:
bool_query.must(Query.term("build_info.build", nbuild)) \
.must(Query.term("build_info.seqid", seqid)) \
.filter(or_filter)
qnested = ElasticQuery(Query.nested('build_info', bool_query), sources=['build_info.*',
'disease_locus',
'disease',
'chr_band',
'species'])
elastic = Search(qnested, idx=idx, idx_type=idx_type)
res = elastic.search()
return res.docs
示例6: _auth_arr
def _auth_arr(user):
''' Get authentication array for BoolQuery for retrieving public and
authenticated documents. '''
auth_arr = [Query.missing_terms("field", "group_name")] # all public documents
try:
auth_arr.append(Query.terms("group_name", # all documents in the user group
[gp.lower() for gp in get_user_groups(user)]).query_wrap())
except Http404:
# not logged in
pass
return auth_arr
示例7: get_studies
def get_studies(cls, study_ids=None, disease_code=None, sources=[], split_name=True):
studies_query = ElasticQuery(Query.match_all(), sources=sources)
if disease_code is not None:
studies_query = ElasticQuery(BoolQuery(must_arr=Query.term("diseases", disease_code)), sources=sources)
elif study_ids:
studies_query = ElasticQuery(Query.ids(study_ids), sources=sources)
studies = Search(studies_query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=200).search().docs
for doc in studies:
if split_name and getattr(doc, 'study_name') is not None:
setattr(doc, 'study_name', getattr(doc, 'study_name').split(':', 1)[0])
return Document.sorted_alphanum(studies, "study_id")
示例8: get_elastic_query
def get_elastic_query(cls, section=None, config=None):
''' function to build the elastic query object
@type section: string
@keyword section: The section in the criteria.ini file
@type config: string
@keyword config: The config object initialized from criteria.ini.
@return: L{Query}
'''
section_config = config[section]
source_fields = []
if 'source_fields' in section_config:
source_fields_str = section_config['source_fields']
source_fields = source_fields_str.split(',')
if 'mhc' in section:
seqid = '6'
start_range = 25000000
end_range = 35000000
seqid_param = section_config['seqid_param']
start_param = section_config['start_param']
end_param = section_config['end_param']
if section == 'is_gene_in_mhc':
# for region you should make a different query
# Defined MHC region as chr6:25,000,000..35,000,000
query = ElasticUtils.range_overlap_query(seqid, start_range, end_range,
source_fields,
seqid_param,
start_param,
end_param)
elif section == 'is_marker_in_mhc':
query_bool = BoolQuery()
query_bool.must(RangeQuery("start", lte=end_range)) \
.must(RangeQuery("start", gte=start_range)) \
.must(Query.term("seqid", seqid))
query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
elif section == 'is_region_in_mhc':
query = ElasticQuery(Query.term("region_name", "MHC"))
elif section == 'marker_is_gwas_significant_in_ic':
# build a range query
gw_sig_p = 0.00000005
query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p))
else:
if len(source_fields) > 0:
query = ElasticQuery(Query.match_all(), sources=source_fields)
else:
# query = ElasticQuery(Query.match_all())
return None
return query
示例9: show_disease
def show_disease(disease, scores, text=True, selected=None, href="/disease/"):
''' Template inclusion tag to render disease bar. '''
if isinstance(disease, str):
if disease == 'OD':
disease = Document({"_source": {"code": "OD", "colour": "grey", "name": "Other Diseases"}})
else:
query = ElasticQuery(BoolQuery(should_arr=[Query.term('code', disease.lower()),
Query.term('name', disease.lower())]))
disease = Search(query, idx=ElasticSettings.idx('DISEASE'), size=1).search().docs[0]
score = ''
if scores != '':
score = scores[0]
return {'disease': disease, 'score': score, 'text': text, 'selected': selected, 'href': href}
示例10: post
def post(self, request, *args, **kwargs):
ens_id = self.request.POST.get('ens_id')
marker = self.request.POST.get('marker')
markers = self.request.POST.getlist('markers[]')
if ens_id:
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
elif marker:
sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
elif markers:
sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
study_hits = elastic.get_json_response()['hits']
ens_ids = []
pmids = []
for hit in study_hits['hits']:
if 'pmid' in hit['_source']:
pmids.append(hit['_source']['pmid'])
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
ens_ids.append(ens_id)
docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])
for hit in study_hits['hits']:
genes = {}
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
try:
genes[ens_id] = getattr(docs[ens_id], 'symbol')
except KeyError:
genes = {ens_id: ens_id}
hit['_source']['genes'] = genes
if 'pmid' in hit['_source']:
pmid = hit['_source']['pmid']
try:
authors = getattr(pub_docs[pmid], 'authors')
journal = getattr(pub_docs[pmid], 'journal')
hit['_source']['pmid'] = \
{'pmid': pmid,
'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
'journal': journal}
except KeyError:
hit['_source']['pmid'] = {'pmid': pmid}
return JsonResponse(study_hits)
示例11: _build_exon_query
def _build_exon_query(chrom, segmin, segmax, genes):
# get exonic structure for genes in this section
geneExons = dict()
query_bool = BoolQuery()
query_bool.must([Query.term("seqid", chrom)])
if len(genes) > 0:
for g in genes:
query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
query_bool, sources=utils.snpFields)
elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
result = elastic.get_result()
exons = result['data']
exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
return geneExons
示例12: pad_region_doc
def pad_region_doc(cls, region):
'''Adds details of disease_loci & hits for a given region doc'''
hits_idx = ElasticSettings.idx('REGION', 'STUDY_HITS')
disease_loci = getattr(region, "disease_loci")
locus_start = Agg('region_start', 'min', {'field': 'build_info.start'})
locus_end = Agg('region_end', 'max', {'field': 'build_info.end'})
match_agg = Agg('filtered_result', 'filter', Query.match("build_info.build", 38).query_wrap(),
sub_agg=[locus_start, locus_end])
build_info_agg = Agg('build_info', 'nested', {"path": 'build_info'}, sub_agg=[match_agg])
query = ElasticQuery(FilteredQuery(Query.terms("disease_locus", disease_loci),
Filter(BoolQuery(should_arr=[Query.missing_terms("field", "group_name")]
))))
resultObj = Search(search_query=query, idx=hits_idx, aggs=Aggs(build_info_agg)).search()
hit_ids = []
markers = []
genes = []
studies = []
pmids = []
for doc in resultObj.docs:
hit_ids.append(doc.doc_id())
markers.append(getattr(doc, "marker"))
if hasattr(doc, "genes") and getattr(doc, "genes") != None:
genes.extend([g for g in getattr(doc, "genes")])
studies.append(getattr(doc, "dil_study_id"))
pmids.append(getattr(doc, "pmid"))
build_info = getattr(resultObj.aggs['build_info'], 'filtered_result')
region_start = int(build_info['region_start']['value'])
region_end = int(build_info['region_end']['value'])
build_info = {
'build': 38,
'seqid': getattr(region, "seqid"),
'start': region_start,
'end': region_end
}
setattr(region, "build_info", build_info)
setattr(region, "hits", hit_ids)
setattr(region, "markers", list(set(markers)))
setattr(region, "genes", list(set(genes)))
setattr(region, "studies", list(set(studies)))
setattr(region, "pmids", list(set(pmids)))
return region
示例13: get_criteria
def get_criteria(docs, doc_type, doc_attr, idx_type_key):
""" Return a dictionary of gene name:criteria. """
genes = [getattr(doc, doc_attr).lower() for doc in docs if doc.type() == doc_type]
query = Query.terms("Name", genes)
sources = {"exclude": ["Primary id", "Object class", "Total score"]}
if ElasticSettings.idx("CRITERIA", idx_type_key) is None:
return {}
res = Search(
ElasticQuery(query, sources=sources), idx=ElasticSettings.idx("CRITERIA", idx_type_key), size=len(genes)
).search()
criteria = {}
for doc in res.docs:
od = collections.OrderedDict(sorted(doc.__dict__.items(), key=lambda t: t[0]))
gene_name = getattr(doc, "Name")
criteria[gene_name] = [
{attr.replace("_Hs", ""): value.split(":")}
for attr, value in od.items()
if attr != "Name" and attr != "_meta" and attr != "OD_Hs" and not value.startswith("0")
]
if hasattr(doc, "OD_Hs") and not getattr(doc, "OD_Hs").startswith("0"):
if gene_name not in criteria:
criteria[gene_name] = []
criteria[gene_name].append({"OD": getattr(doc, "OD_Hs").split(":")})
return criteria
示例14: _check_gene_history
def _check_gene_history(cls, gene_sets, config):
'''find a way to handle this better'''
section = config['GENE_HISTORY']
newgene_ids = {}
discountinued_geneids = []
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
sources=['geneid', 'discontinued_geneid'])
ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
call_fun=process_hits, query=query)
return (newgene_ids, discountinued_geneids)
示例15: gene_mgi_parse
def gene_mgi_parse(cls, gene_pubs, idx):
''' Parse Ensembl and MGI data from JAX. '''
orthogenes_mgi = {}
for gene_mgi in gene_pubs:
parts = gene_mgi.split('\t')
if 'MGI:' not in parts[0]:
raise PipelineError('MGI not found '+parts[0])
if 'ENSMUSG' not in parts[5]:
raise PipelineError('ENSMUSG not found '+parts[5])
orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')
orthogene_keys = list(orthogenes_mgi.keys())
chunk_size = 450
for i in range(0, len(orthogene_keys), chunk_size):
chunk_gene_keys = orthogene_keys[i:i+chunk_size]
json_data = ''
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
chunk_gene_keys))
docs = Search(query, idx=idx, size=chunk_size).search().docs
for doc in docs:
ens_id = doc.doc_id()
idx_type = doc.type()
mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
mm['MGI'] = orthogenes_mgi[mm['ensembl']]
dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
doc_data = {"update": {"_id": ens_id, "_type": idx_type,
"_index": idx, "_retry_on_conflict": 3}}
json_data += json.dumps(doc_data) + '\n'
json_data += json.dumps({'doc': dbxrefs}) + '\n'
if json_data != '':
Loader().bulk_load(idx, idx_type, json_data)