本文整理汇总了Python中elastic.query.Query.match_all方法的典型用法代码示例。如果您正苦于以下问题:Python Query.match_all方法的具体用法?Python Query.match_all怎么用?Python Query.match_all使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elastic.query.Query
的用法示例。
在下文中一共展示了Query.match_all方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_elastic_query
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def get_elastic_query(cls, section=None, config=None):
''' function to build the elastic query object
@type section: string
@keyword section: The section in the criteria.ini file
@type config: string
@keyword config: The config object initialized from criteria.ini.
@return: L{Query}
'''
section_config = config[section]
source_fields = []
if 'source_fields' in section_config:
source_fields_str = section_config['source_fields']
source_fields = source_fields_str.split(',')
if 'mhc' in section:
seqid = '6'
start_range = 25000000
end_range = 35000000
seqid_param = section_config['seqid_param']
start_param = section_config['start_param']
end_param = section_config['end_param']
if section == 'is_gene_in_mhc':
# for region you should make a different query
# Defined MHC region as chr6:25,000,000..35,000,000
query = ElasticUtils.range_overlap_query(seqid, start_range, end_range,
source_fields,
seqid_param,
start_param,
end_param)
elif section == 'is_marker_in_mhc':
query_bool = BoolQuery()
query_bool.must(RangeQuery("start", lte=end_range)) \
.must(RangeQuery("start", gte=start_range)) \
.must(Query.term("seqid", seqid))
query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
elif section == 'is_region_in_mhc':
query = ElasticQuery(Query.term("region_name", "MHC"))
elif section == 'marker_is_gwas_significant_in_ic':
# build a range query
gw_sig_p = 0.00000005
query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p))
else:
if len(source_fields) > 0:
query = ElasticQuery(Query.match_all(), sources=source_fields)
else:
# query = ElasticQuery(Query.match_all())
return None
return query
示例2: test_region_attributes
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def test_region_attributes(self):
''' test region attributes '''
idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION')
(idx, idx_type) = idx.split('/')
docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
newRegion = utils.Region.pad_region_doc(docs[0])
if len(getattr(newRegion, "genes")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "genes")))
resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'),
size=len(getattr(newRegion, "genes"))).search()
self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total,
"All genes on region found in GENE index")
if len(getattr(newRegion, "studies")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "studies")))
resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'),
size=len(getattr(newRegion, "studies"))).search()
self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total,
"All study ids for region found in STUDY index")
if len(getattr(newRegion, "pmids")) > 0:
query = ElasticQuery(Query.ids(getattr(newRegion, "pmids")))
resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'),
size=len(getattr(newRegion, "pmids"))).search()
self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total,
"All PMIDs for region found in PUBLICATION index")
示例3: filter_queryset
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def filter_queryset(self, request, queryset, view):
''' Override this method to request just the documents required from elastic. '''
q_size = view.paginator.get_limit(request)
q_from = view.paginator.get_offset(request)
filterable = getattr(view, 'filter_fields', [])
print(filterable)
print(request)
filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA'))
idx = criteria_idx
if type(criteria_idx) == list:
idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx)
else:
idx = ElasticSettings.idx(criteria_idx)
q = ElasticQuery(Query.match_all())
s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from)
json_results = s.get_json_response()
results = []
for result in json_results['hits']['hits']:
new_obj = ElasticObject(initial=result['_source'])
new_obj.uuid = result['_id']
new_obj.criteria_type = result['_type']
results.append(new_obj)
view.es_count = json_results['hits']['total']
return results
示例4: _check_gene_history
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def _check_gene_history(cls, gene_sets, config):
'''find a way to handle this better'''
section = config['GENE_HISTORY']
newgene_ids = {}
discountinued_geneids = []
def process_hits(resp_json):
hits = resp_json['hits']['hits']
docs = [Document(hit) for hit in hits]
for doc in docs:
geneid = getattr(doc, 'geneid')
discontinued_geneid = getattr(doc, 'discontinued_geneid')
if geneid is None:
discountinued_geneids.append(str(discontinued_geneid))
else:
newgene_ids[str(discontinued_geneid)] = str(geneid)
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
sources=['geneid', 'discontinued_geneid'])
ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
call_fun=process_hits, query=query)
return (newgene_ids, discountinued_geneids)
示例5: gene_mgi_parse
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def gene_mgi_parse(cls, gene_pubs, idx):
''' Parse Ensembl and MGI data from JAX. '''
orthogenes_mgi = {}
for gene_mgi in gene_pubs:
parts = gene_mgi.split('\t')
if 'MGI:' not in parts[0]:
raise PipelineError('MGI not found '+parts[0])
if 'ENSMUSG' not in parts[5]:
raise PipelineError('ENSMUSG not found '+parts[5])
orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')
orthogene_keys = list(orthogenes_mgi.keys())
chunk_size = 450
for i in range(0, len(orthogene_keys), chunk_size):
chunk_gene_keys = orthogene_keys[i:i+chunk_size]
json_data = ''
query = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
chunk_gene_keys))
docs = Search(query, idx=idx, size=chunk_size).search().docs
for doc in docs:
ens_id = doc.doc_id()
idx_type = doc.type()
mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
mm['MGI'] = orthogenes_mgi[mm['ensembl']]
dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
doc_data = {"update": {"_id": ens_id, "_type": idx_type,
"_index": idx, "_retry_on_conflict": 3}}
json_data += json.dumps(doc_data) + '\n'
json_data += json.dumps({'doc': dbxrefs}) + '\n'
if json_data != '':
Loader().bulk_load(idx, idx_type, json_data)
示例6: _ensembl_entrez_lookup
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
''' Get an ensembl:entrez id dictionary. '''
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
示例7: test_hit_attributes
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def test_hit_attributes(self):
'''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''
for idx_type_key in RegionDataTest.IDX_TYPE_KEYS:
idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, idx_type_key)
(idx, idx_type) = idx.split('/')
docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
示例8: get_rdm_feature_id
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def get_rdm_feature_id(cls, idx, idx_type, qbool=Query.match_all(), sources=[], field=None):
''' Get a random feature id from the indices. '''
doc = cls.get_rdm_docs(idx, idx_type, qbool=qbool, sources=sources, size=1)[0]
if field is not None:
return getattr(doc, field)
return doc.doc_id()
示例9: test_doc
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def test_doc(self):
''' Test return correct type of FeatureDocument. '''
idx = PydginTestSettings.IDX['GENE']['indexName']
idx_type = PydginTestSettings.IDX['GENE']['indexType']
res = Search(search_query=ElasticQuery(Query.match_all(), sources=['symbol']),
idx=idx, idx_type=idx_type, size=2).search()
for doc in res.docs:
self.assertTrue(isinstance(doc, GeneDocument))
示例10: check_hits
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def check_hits(resp_json):
rsids = {}
docs = [Document(hit) for hit in resp_json['hits']['hits']]
for doc in docs:
rsid = getattr(doc, "id")
if rsid is not None:
rsids[rsid] = doc
rsids_keys = list(rsids.keys())
terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
query = ElasticQuery.filtered(Query.match_all(), terms_filter)
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
docs_by_rsid = elastic.search().docs
for doc in docs_by_rsid:
info = getattr(doc, "info")
if 'VC=SNV' not in info:
continue
rsid = getattr(doc, "id")
ic_doc = rsids[rsid]
pos1 = getattr(doc, "start")
pos2 = self._get_highest_build(ic_doc)['position']
if abs(int(pos1) - int(pos2)) > 1:
is_par = getattr(ic_doc, 'is_par')
allele_a = getattr(ic_doc, 'allele_a')
if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
' '+str(pos2)+" "+rsid+' '+str(pos1))
# ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'
query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
Filter(Query.term("start", pos2)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"
query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
docs_by_pos = elastic.search().docs
if len(docs_by_pos) > 0:
for d in docs_by_pos:
msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
" build_id:"+str(getattr(d, "build_id"))+")"
logger.error(msg)
示例11: test_doc2
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def test_doc2(self):
''' Test return correct type of FeatureDocument using multiple index search. '''
idx = PydginTestSettings.IDX['GENE']['indexName'] + ',' + PydginTestSettings.IDX['DISEASE']['indexName']
res = Search(search_query=ElasticQuery(Query.match_all(), sources=['symbol', 'code']),
idx=idx, size=40).search()
for doc in res.docs:
self.assertTrue(isinstance(doc, GeneDocument) or isinstance(doc, DiseaseDocument))
if isinstance(doc, DiseaseDocument):
self.assertTrue(hasattr(doc, 'code'))
示例12: _entrez_ensembl_lookup
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
''' Get an entrez:ensembl id dictionary. '''
(newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
equery = ElasticQuery.filtered(Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
示例13: test_gene_attributes
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def test_gene_attributes(self):
'''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''
idx_key = 'GENE'
idx_type_key = 'GENE'
idx = ElasticSettings.idx(idx_key, idx_type_key)
(idx, idx_type) = idx.split('/')
docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
# "_source":{"symbol": "RP11-376M2.2", "start": 42975689, "biotype": "sense_intronic", "chromosome": "17",
# "source": "havana", "strand": "-", "stop": 42977275}
for doc in docs_by_geneid:
gene_id_pipeline = doc.doc_id()
index_pipeline = doc.index()
start_pipeline = getattr(doc, "start")
stop_pipeline = getattr(doc, "stop")
chromosome_pipeline = getattr(doc, "chromosome")
biotype_pipeline = getattr(doc, "biotype")
strand_pipeline = getattr(doc, "strand")
strand_pipeline = -1 if strand_pipeline == '-' else 1
symbol_pipeline = getattr(doc, "symbol")
source_pipeline = getattr(doc, "source")
# genes_hg38_v0.0.2
pattern = re.compile('genes_\w\w(\d+)', re.IGNORECASE)
match = pattern.match(index_pipeline)
assembly_number_pipeline = None
if match:
assembly_number_pipeline = match.group(1)
ensembl_gene_data = DataIntegrityUtils.fetch_from_ensembl(gene_id_pipeline)
if ensembl_gene_data:
pattern = re.compile('GRCh(\d+)', re.IGNORECASE)
match = pattern.match(ensembl_gene_data['assembly_name'])
assembly_number_ens = None
if match:
assembly_number_ens = match.group(1)
self.assertEqual(assembly_number_pipeline, assembly_number_ens, "Assembly number is ok")
self.assertEqual(gene_id_pipeline, ensembl_gene_data['id'], "Gene Id number is ok")
self.assertEqual(start_pipeline, ensembl_gene_data['start'], "start is ok")
self.assertEqual(stop_pipeline, ensembl_gene_data['end'], "stop is ok")
self.assertEqual(chromosome_pipeline, ensembl_gene_data['seq_region_name'], "chr is ok")
self.assertEqual(strand_pipeline, ensembl_gene_data['strand'], "strand is ok")
self.assertEqual(biotype_pipeline, ensembl_gene_data['biotype'], "biotype is ok")
self.assertEqual(symbol_pipeline, ensembl_gene_data['display_name'], "symbol/display_name is ok")
self.assertEqual(source_pipeline, ensembl_gene_data['source'], "source is ok")
else:
logger.warn("No test run....no ensembl data via ensembl webservice")
示例14: get_studies
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def get_studies(cls, study_ids=None, disease_code=None, sources=[], split_name=True):
studies_query = ElasticQuery(Query.match_all(), sources=sources)
if disease_code is not None:
studies_query = ElasticQuery(BoolQuery(must_arr=Query.term("diseases", disease_code)), sources=sources)
elif study_ids:
studies_query = ElasticQuery(Query.ids(study_ids), sources=sources)
studies = Search(studies_query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=200).search().docs
for doc in studies:
if split_name and getattr(doc, 'study_name') is not None:
setattr(doc, 'study_name', getattr(doc, 'study_name').split(':', 1)[0])
return Document.sorted_alphanum(studies, "study_id")
示例15: get_rdm_docs
# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
def get_rdm_docs(cls, idx, idx_type, qbool=Query.match_all(), sources=[], size=1):
''' Get a random doc from the indices. '''
score_function1 = ScoreFunction.create_score_function('random_score', seed=random.randint(0, 1000000))
search_query = ElasticQuery(FunctionScoreQuery(qbool, [score_function1], boost_mode='replace'),
sources=sources)
elastic = Search(search_query=search_query, size=size, idx=idx, idx_type=idx_type)
try:
return elastic.search().docs
except IndexError:
return cls.get_rdm_docs(idx, idx_type, qbool, sources, size)