本文整理汇总了Python中elastic.search.ElasticQuery类的典型用法代码示例。如果您正苦于以下问题:Python ElasticQuery类的具体用法?Python ElasticQuery怎么用?Python ElasticQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ElasticQuery类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _find_snp_position
def _find_snp_position(snp_track, name):
if snp_track is None:
query = ElasticQuery.query_match("id", name)
elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
snpResult = elastic.get_json_response()
if(len(snpResult['hits']['hits'])) > 0:
snp = snpResult['hits']['hits'][0]['_source']
chrom = snp['seqid'].replace('chr', "")
position = snp['start']
return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
else:
mo = re.match(r"(.*)-(.*)", snp_track)
(group, track) = mo.group(1, 2)
try:
snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
except SettingsError:
snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track
query = ElasticQuery.query_match("name", name)
elastic = Search(query, idx=snp_track_idx)
snpResult = elastic.get_json_response()
if(len(snpResult['hits']['hits'])) > 0:
snp = snpResult['hits']['hits'][0]['_source']
chrom = snp['seqid'].replace('chr', "")
position = snp['start']
return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
示例2: _check_gene_history
def _check_gene_history(cls, gene_sets, config):
'''find a way to handle this better'''
section = config['GENE_HISTORY']
newgene_ids = {}
discountinued_geneids = []
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
sources=['geneid', 'discontinued_geneid'])
ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
call_fun=process_hits, query=query)
return (newgene_ids, discountinued_geneids)
示例3: gene_mgi_parse
def gene_mgi_parse(cls, gene_pubs, idx):
''' Parse Ensembl and MGI data from JAX. '''
orthogenes_mgi = {}
for gene_mgi in gene_pubs:
parts = gene_mgi.split('\t')
if 'MGI:' not in parts[0]:
raise PipelineError('MGI not found '+parts[0])
if 'ENSMUSG' not in parts[5]:
raise PipelineError('ENSMUSG not found '+parts[5])
orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')
orthogene_keys = list(orthogenes_mgi.keys())
chunk_size = 450
for i in range(0, len(orthogene_keys), chunk_size):
chunk_gene_keys = orthogene_keys[i:i+chunk_size]
json_data = ''
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
chunk_gene_keys))
docs = Search(query, idx=idx, size=chunk_size).search().docs
for doc in docs:
ens_id = doc.doc_id()
idx_type = doc.type()
mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
mm['MGI'] = orthogenes_mgi[mm['ensembl']]
dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
doc_data = {"update": {"_id": ens_id, "_type": idx_type,
"_index": idx, "_retry_on_conflict": 3}}
json_data += json.dumps(doc_data) + '\n'
json_data += json.dumps({'doc': dbxrefs}) + '\n'
if json_data != '':
Loader().bulk_load(idx, idx_type, json_data)
示例4: _ensembl_entrez_lookup
def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
''' Get an ensembl:entrez id dictionary. '''
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
示例5: check_hits
def check_hits(resp_json):
rsids = {}
docs = [Document(hit) for hit in resp_json['hits']['hits']]
for doc in docs:
rsid = getattr(doc, "id")
if rsid is not None:
rsids[rsid] = doc
rsids_keys = list(rsids.keys())
terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
query = ElasticQuery.filtered(Query.match_all(), terms_filter)
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
docs_by_rsid = elastic.search().docs
for doc in docs_by_rsid:
info = getattr(doc, "info")
if 'VC=SNV' not in info:
continue
rsid = getattr(doc, "id")
ic_doc = rsids[rsid]
pos1 = getattr(doc, "start")
pos2 = self._get_highest_build(ic_doc)['position']
if abs(int(pos1) - int(pos2)) > 1:
is_par = getattr(ic_doc, 'is_par')
allele_a = getattr(ic_doc, 'allele_a')
if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
' '+str(pos2)+" "+rsid+' '+str(pos1))
# ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'
query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
Filter(Query.term("start", pos2)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"
query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
" build_id:"+str(getattr(d, "build_id"))+")"
logger.error(msg)
示例6: region_page
def region_page(request, region):
''' Region elastic'''
query = ElasticQuery.query_match("attr.region_id", region)
elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
context = elastic.get_result()
context['title'] = "Region"
print(context)
return render(request, 'region/region.html', context,
content_type='text/html')
示例7: _entrez_ensembl_lookup
def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
''' Get an entrez:ensembl id dictionary. '''
(newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
示例8: _build_frags_query
def _build_frags_query(frags_idx, chrom, segmin, segmax):
query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
Filter(RangeQuery("end", gte=segmin, lte=segmax)),
utils.bedFields)
fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx)
fragsResult = fragsQuery.get_result()
frags = fragsResult['data']
frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
return frags
示例9: get_elastic_query
def get_elastic_query(cls, section=None, config=None):
''' function to build the elastic query object
@type section: string
@keyword section: The section in the criteria.ini file
@type config: string
@keyword config: The config object initialized from criteria.ini.
@return: L{Query}
'''
section_config = config[section]
source_fields = []
if 'source_fields' in section_config:
source_fields_str = section_config['source_fields']
source_fields = source_fields_str.split(',')
if 'mhc' in section:
seqid = '6'
start_range = 25000000
end_range = 35000000
seqid_param = section_config['seqid_param']
start_param = section_config['start_param']
end_param = section_config['end_param']
if section == 'is_gene_in_mhc':
# for region you should make a different query
# Defined MHC region as chr6:25,000,000..35,000,000
query = ElasticUtils.range_overlap_query(seqid, start_range, end_range,
source_fields,
seqid_param,
start_param,
end_param)
elif section == 'is_marker_in_mhc':
query_bool = BoolQuery()
query_bool.must(RangeQuery("start", lte=end_range)) \
.must(RangeQuery("start", gte=start_range)) \
.must(Query.term("seqid", seqid))
query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
elif section == 'is_region_in_mhc':
query = ElasticQuery(Query.term("region_name", "MHC"))
elif section == 'marker_is_gwas_significant_in_ic':
# build a range query
gw_sig_p = 0.00000005
query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p))
else:
if len(source_fields) > 0:
query = ElasticQuery(Query.match_all(), sources=source_fields)
else:
# query = ElasticQuery(Query.match_all())
return None
return query
示例10: test_pub_ini_file2
def test_pub_ini_file2(self):
''' Test publication pipeline with a list of PMIDs. '''
out = StringIO()
call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
idx = INI_CONFIG['DISEASE']['index']
Search.index_refresh(idx)
query = ElasticQuery.query_string("test", fields=["tags.disease"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertGreater(len(docs), 1)
示例11: show_es_gene_section
def show_es_gene_section(gene_symbol=None, seqid=None,
start_pos=None, end_pos=None):
''' Template inclusion tag to render a gene section given a
chado gene feature. '''
seqid = str(seqid).replace('chr', '')
if gene_symbol is not None:
''' gene symbol query'''
query = ElasticQuery.query_match("symbol", gene_symbol)
elif end_pos is None:
''' start and end are same, range query for snp'''
query_bool = BoolQuery(must_arr=[Query.match("chromosome", seqid),
RangeQuery("start", lte=start_pos),
RangeQuery("stop", gte=start_pos)])
query = ElasticQuery.bool(query_bool)
else:
''' start and end are same, range query for snp'''
query_bool = BoolQuery(must_arr=[Query.match("chromosome", seqid),
RangeQuery("start", gte=start_pos),
RangeQuery("stop", lte=end_pos)])
query = ElasticQuery.bool(query_bool)
elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
return {'es_genes': elastic.search().docs}
示例12: post
def post(self, request, *args, **kwargs):
ens_id = self.request.POST.get('ens_id')
marker = self.request.POST.get('marker')
markers = self.request.POST.getlist('markers[]')
if ens_id:
sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
elif marker:
sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
elif markers:
sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())
query = ElasticQuery.filtered(Query.match_all(), sfilter)
elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
study_hits = elastic.get_json_response()['hits']
ens_ids = []
pmids = []
for hit in study_hits['hits']:
if 'pmid' in hit['_source']:
pmids.append(hit['_source']['pmid'])
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
ens_ids.append(ens_id)
docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])
for hit in study_hits['hits']:
genes = {}
if 'genes' in hit['_source']:
for ens_id in hit['_source']['genes']:
try:
genes[ens_id] = getattr(docs[ens_id], 'symbol')
except KeyError:
genes = {ens_id: ens_id}
hit['_source']['genes'] = genes
if 'pmid' in hit['_source']:
pmid = hit['_source']['pmid']
try:
authors = getattr(pub_docs[pmid], 'authors')
journal = getattr(pub_docs[pmid], 'journal')
hit['_source']['pmid'] = \
{'pmid': pmid,
'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
'journal': journal}
except KeyError:
hit['_source']['pmid'] = {'pmid': pmid}
return JsonResponse(study_hits)
示例13: _build_frags_query
def _build_frags_query(frags_idx, chrom, segmin, segmax):
query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
Filter(RangeQuery("end", gte=segmin, lte=segmax)),
utils.bedFields)
fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx)
# fragsResult = fragsQuery.get_result()
# frags = fragsResult['data']
fragsResult = fragsQuery.get_json_response()
frags = []
for hit in fragsResult['hits']['hits']:
frags.append(hit['_source'])
frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
return frags
示例14: _build_exon_query
def _build_exon_query(chrom, segmin, segmax, genes):
# get exonic structure for genes in this section
geneExons = dict()
query_bool = BoolQuery()
query_bool.must([Query.term("seqid", chrom)])
if len(genes) > 0:
for g in genes:
query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
query_bool, sources=utils.snpFields)
elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
result = elastic.get_result()
exons = result['data']
exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
return geneExons
示例15: _entrez_ensembl_lookup
def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
''' Get an entrez:ensembl id dictionary. '''
(newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
lookup = {}
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
lookup.update({getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs})
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
ScanAndScroll.scan_and_scroll(section['index'], call_fun=process_hits, query=equery)
return lookup