本文整理汇总了Python中elastic.search.ElasticQuery.filtered方法的典型用法代码示例。如果您正苦于以下问题:Python ElasticQuery.filtered方法的具体用法?Python ElasticQuery.filtered怎么用?Python ElasticQuery.filtered使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elastic.search.ElasticQuery
的用法示例。
在下文中一共展示了ElasticQuery.filtered方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _check_gene_history
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _check_gene_history(cls, gene_sets, config):
'''find a way to handle this better'''
section = config['GENE_HISTORY']
newgene_ids = {}
discountinued_geneids = []
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
sources=['geneid', 'discontinued_geneid'])
ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
call_fun=process_hits, query=query)
return (newgene_ids, discountinued_geneids)
示例2: gene_mgi_parse
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def gene_mgi_parse(cls, gene_pubs, idx):
''' Parse Ensembl and MGI data from JAX. '''
orthogenes_mgi = {}
for gene_mgi in gene_pubs:
parts = gene_mgi.split('\t')
if 'MGI:' not in parts[0]:
raise PipelineError('MGI not found '+parts[0])
if 'ENSMUSG' not in parts[5]:
raise PipelineError('ENSMUSG not found '+parts[5])
orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')
orthogene_keys = list(orthogenes_mgi.keys())
chunk_size = 450
for i in range(0, len(orthogene_keys), chunk_size):
chunk_gene_keys = orthogene_keys[i:i+chunk_size]
json_data = ''
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
chunk_gene_keys))
docs = Search(query, idx=idx, size=chunk_size).search().docs
for doc in docs:
ens_id = doc.doc_id()
idx_type = doc.type()
mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
mm['MGI'] = orthogenes_mgi[mm['ensembl']]
dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
doc_data = {"update": {"_id": ens_id, "_type": idx_type,
"_index": idx, "_retry_on_conflict": 3}}
json_data += json.dumps(doc_data) + '\n'
json_data += json.dumps({'doc': dbxrefs}) + '\n'
if json_data != '':
Loader().bulk_load(idx, idx_type, json_data)
示例3: _ensembl_entrez_lookup
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
''' Get an ensembl:entrez id dictionary. '''
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
示例4: check_hits
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def check_hits(resp_json):
rsids = {}
docs = [Document(hit) for hit in resp_json['hits']['hits']]
for doc in docs:
rsid = getattr(doc, "id")
if rsid is not None:
rsids[rsid] = doc
rsids_keys = list(rsids.keys())
terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
query = ElasticQuery.filtered(Query.match_all(), terms_filter)
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
docs_by_rsid = elastic.search().docs
for doc in docs_by_rsid:
info = getattr(doc, "info")
if 'VC=SNV' not in info:
continue
rsid = getattr(doc, "id")
ic_doc = rsids[rsid]
pos1 = getattr(doc, "start")
pos2 = self._get_highest_build(ic_doc)['position']
if abs(int(pos1) - int(pos2)) > 1:
is_par = getattr(ic_doc, 'is_par')
allele_a = getattr(ic_doc, 'allele_a')
if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
' '+str(pos2)+" "+rsid+' '+str(pos1))
# ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'
query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
Filter(Query.term("start", pos2)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"
query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
" build_id:"+str(getattr(d, "build_id"))+")"
logger.error(msg)
示例5: _entrez_ensembl_lookup
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
''' Get an entrez:ensembl id dictionary. '''
(newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
示例6: _build_frags_query
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _build_frags_query(frags_idx, chrom, segmin, segmax):
query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
Filter(RangeQuery("end", gte=segmin, lte=segmax)),
utils.bedFields)
fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx)
fragsResult = fragsQuery.get_result()
frags = fragsResult['data']
frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
return frags
示例7: post
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def post(self, request, *args, **kwargs):
ens_id = self.request.POST.get('ens_id')
marker = self.request.POST.get('marker')
markers = self.request.POST.getlist('markers[]')
if ens_id:
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
elif marker:
sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
elif markers:
sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
study_hits = elastic.get_json_response()['hits']
ens_ids = []
pmids = []
for hit in study_hits['hits']:
if 'pmid' in hit['_source']:
pmids.append(hit['_source']['pmid'])
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
ens_ids.append(ens_id)
docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])
for hit in study_hits['hits']:
genes = {}
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
try:
genes[ens_id] = getattr(docs[ens_id], 'symbol')
except KeyError:
genes = {ens_id: ens_id}
hit['_source']['genes'] = genes
if 'pmid' in hit['_source']:
pmid = hit['_source']['pmid']
try:
authors = getattr(pub_docs[pmid], 'authors')
journal = getattr(pub_docs[pmid], 'journal')
hit['_source']['pmid'] = \
{'pmid': pmid,
'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
'journal': journal}
except KeyError:
hit['_source']['pmid'] = {'pmid': pmid}
return JsonResponse(study_hits)
示例8: _build_frags_query
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _build_frags_query(frags_idx, chrom, segmin, segmax):
query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
Filter(RangeQuery("end", gte=segmin, lte=segmax)),
utils.bedFields)
fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx)
# fragsResult = fragsQuery.get_result()
# frags = fragsResult['data']
fragsResult = fragsQuery.get_json_response()
frags = []
for hit in fragsResult['hits']['hits']:
frags.append(hit['_source'])
frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
return frags
示例9: _entrez_ensembl_lookup
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
''' Get an entrez:ensembl id dictionary. '''
(newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
lookup = {}
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
lookup.update({getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs})
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
ScanAndScroll.scan_and_scroll(section['index'], call_fun=process_hits, query=equery)
return lookup
示例10: _check_gene_history
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _check_gene_history(cls, gene_sets, section):
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets))
docs = Search(query, idx=section['index'], idx_type=section['index_type_history'], size=1000000).search().docs
newgene_ids = {}
discountinued_geneids = []
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
return (newgene_ids, discountinued_geneids)
示例11: _check_gene_history
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _check_gene_history(cls, gene_sets, config):
'''find a way to handle this better'''
section = config['GENE_HISTORY']
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
sources=['geneid', 'discontinued_geneid'])
docs = Search(query, idx=section['index'], idx_type=section['index_type'],
size=len(gene_sets)).search().docs
newgene_ids = {}
discountinued_geneids = []
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
return (newgene_ids, discountinued_geneids)
示例12: _update_gene
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _update_gene(cls, genes, idx):
''' Use genes data to update the index. '''
gene_keys = list(genes.keys())
chunk_size = 450
for i in range(0, len(genes), chunk_size):
chunk_gene_keys = gene_keys[i:i+chunk_size]
json_data = ''
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", chunk_gene_keys))
docs = Search(query, idx=idx, size=chunk_size).search().docs
for doc in docs:
ens_id = doc._meta['_id']
idx_type = doc.type()
entrez = getattr(doc, 'dbxrefs')['entrez']
doc_data = {"update": {"_id": ens_id, "_type": idx_type,
"_index": idx, "_retry_on_conflict": 3}}
json_data += json.dumps(doc_data) + '\n'
json_data += json.dumps({'doc': genes[entrez]}) + '\n'
if json_data != '':
Loader().bulk_load(idx, idx_type, json_data)
示例13: studies_details
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def studies_details(request):
""" Get studies for a given ensembl ID. """
ens_id = request.POST.get("ens_id")
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx("REGION", "STUDY_HITS"), size=500)
study_hits = elastic.get_json_response()["hits"]
ens_ids = []
pmids = []
for hit in study_hits["hits"]:
if "pmid" in hit["_source"]:
pmids.append(hit["_source"]["pmid"])
for ens_id in hit["_source"]["genes"]:
ens_ids.append(ens_id)
docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
pub_docs = _get_pub_docs_by_pmid(pmids, sources=["authors.name", "journal"])
for hit in study_hits["hits"]:
genes = {}
for ens_id in hit["_source"]["genes"]:
try:
genes[ens_id] = getattr(docs[ens_id], "symbol")
except KeyError:
genes = {ens_id: ens_id}
hit["_source"]["genes"] = genes
if "pmid" in hit["_source"]:
pmid = hit["_source"]["pmid"]
try:
authors = getattr(pub_docs[pmid], "authors")
journal = getattr(pub_docs[pmid], "journal")
hit["_source"]["pmid"] = {
"pmid": pmid,
"author": authors[0]["name"].rsplit(None, 1)[-1],
"journal": journal,
}
except KeyError:
hit["_source"]["pmid"] = {"pmid": pmid}
return JsonResponse(study_hits)
示例14: _convert_entrezid2ensembl
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _convert_entrezid2ensembl(cls, gene_sets, section, log_output_file_handler=None, log_conversion=True):
'''Converts given set of entrez ids to ensembl ids by querying the gene index dbxrefs'''
# first check in gene_history
(newgene_ids, discontinued_ids) = cls._check_gene_history(gene_sets, section)
# replace all old ids with new ids
replaced_gene_sets = cls._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets))
docs = Search(query, idx=section['index'], size=1000000).search().docs
ensembl_ids = []
for doc in docs:
ens_id = doc._meta['_id']
ensembl_ids.append(ens_id)
if log_conversion:
if log_output_file_handler is not None:
cls._log_entrezid2ensembl_coversion(replaced_gene_sets, ensembl_ids, log_output_file_handler)
return ensembl_ids
示例15: genesets_details
# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def genesets_details(request):
""" Get pathway gene sets for a given ensembl ID. """
ens_id = request.POST.get("ens_id")
geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
elastic = Search(query, idx=ElasticSettings.idx("GENE", "PATHWAY"), size=500)
genesets_hits = elastic.get_json_response()["hits"]
ens_ids = []
for hit in genesets_hits["hits"]:
for ens_id in hit["_source"]["gene_sets"]:
ens_ids.append(ens_id)
docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
for hit in genesets_hits["hits"]:
genesets = {}
for ens_id in hit["_source"]["gene_sets"]:
try:
genesets[ens_id] = getattr(docs[ens_id], "symbol")
except KeyError:
genesets[ens_id] = ens_id
hit["_source"]["gene_sets"] = genesets
return JsonResponse(genesets_hits)