当前位置: 首页>>代码示例>>Python>>正文


Python Query.match_all方法代码示例

本文整理汇总了Python中elastic.query.Query.match_all方法的典型用法代码示例。如果您正苦于以下问题:Python Query.match_all方法的具体用法?Python Query.match_all怎么用?Python Query.match_all使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在elastic.query.Query的用法示例。


在下文中一共展示了Query.match_all方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_elastic_query

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def get_elastic_query(cls, section=None, config=None):
        ''' function to build the elastic query object
        @type  section: string
        @keyword section: The section in the criteria.ini file
        @type  config:  string
        @keyword config: The config object initialized from criteria.ini.
        @return: L{Query}
        '''
        section_config = config[section]
        source_fields = []

        if 'source_fields' in section_config:
            source_fields_str = section_config['source_fields']
            source_fields = source_fields_str.split(',')

        if 'mhc' in section:
            seqid = '6'
            start_range = 25000000
            end_range = 35000000

            seqid_param = section_config['seqid_param']
            start_param = section_config['start_param']
            end_param = section_config['end_param']

        if section == 'is_gene_in_mhc':
            # for region you should make a different query
            # Defined MHC region as chr6:25,000,000..35,000,000

            query = ElasticUtils.range_overlap_query(seqid, start_range, end_range,
                                                     source_fields,
                                                     seqid_param,
                                                     start_param,
                                                     end_param)
        elif section == 'is_marker_in_mhc':
            query_bool = BoolQuery()
            query_bool.must(RangeQuery("start", lte=end_range)) \
                      .must(RangeQuery("start", gte=start_range)) \
                      .must(Query.term("seqid", seqid))
            query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
        elif section == 'is_region_in_mhc':
            query = ElasticQuery(Query.term("region_name", "MHC"))
        elif section == 'marker_is_gwas_significant_in_ic':
            # build a range query
            gw_sig_p = 0.00000005
            query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p))
        else:
            if len(source_fields) > 0:
                query = ElasticQuery(Query.match_all(), sources=source_fields)
            else:
                # query = ElasticQuery(Query.match_all())
                return None

        return query
开发者ID:D-I-L,项目名称:django-criteria,代码行数:55,代码来源:criteria.py

示例2: test_region_attributes

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def test_region_attributes(self):
        ''' test region attributes '''
        idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, 'REGION')
        (idx, idx_type) = idx.split('/')
        docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
        newRegion = utils.Region.pad_region_doc(docs[0])

        if len(getattr(newRegion, "genes")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "genes")))
            resultObject = Search(query, idx=ElasticSettings.idx('GENE', 'GENE'),
                                  size=len(getattr(newRegion, "genes"))).search()
            self.assertEqual(len(getattr(newRegion, "genes")), resultObject.hits_total,
                             "All genes on region found in GENE index")

        if len(getattr(newRegion, "studies")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "studies")))
            resultObject = Search(query, idx=ElasticSettings.idx('STUDY', 'STUDY'),
                                  size=len(getattr(newRegion, "studies"))).search()
            self.assertEqual(len(getattr(newRegion, "studies")), resultObject.hits_total,
                             "All study ids for region found in STUDY index")

        if len(getattr(newRegion, "pmids")) > 0:
            query = ElasticQuery(Query.ids(getattr(newRegion, "pmids")))
            resultObject = Search(query, idx=ElasticSettings.idx('PUBLICATION', 'PUBLICATION'),
                                  size=len(getattr(newRegion, "pmids"))).search()
            self.assertEqual(len(getattr(newRegion, "pmids")), resultObject.hits_total,
                             "All PMIDs for region found in PUBLICATION index")
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:29,代码来源:test_regions.py

示例3: filter_queryset

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request just the documents required from elastic. '''
        q_size = view.paginator.get_limit(request)
        q_from = view.paginator.get_offset(request)

        filterable = getattr(view, 'filter_fields', [])
        print(filterable)
        print(request)
        filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
        criteria_idx = self._get_index(filters.get('feature_type', 'GENE_CRITERIA'))

        idx = criteria_idx
        if type(criteria_idx) == list:
            idx = ','.join(ElasticSettings.idx(name) for name in criteria_idx)
        else:
            idx = ElasticSettings.idx(criteria_idx)

        q = ElasticQuery(Query.match_all())
        s = Search(search_query=q, idx=idx, size=q_size, search_from=q_from)
        json_results = s.get_json_response()
        results = []
        for result in json_results['hits']['hits']:
            new_obj = ElasticObject(initial=result['_source'])
            new_obj.uuid = result['_id']
            new_obj.criteria_type = result['_type']
            results.append(new_obj)
        view.es_count = json_results['hits']['total']
        return results
开发者ID:premanand17,项目名称:django-criteria,代码行数:30,代码来源:feature_resources.py

示例4: _check_gene_history

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def _check_gene_history(cls, gene_sets, config):
        '''find a way to handle this better'''

        section = config['GENE_HISTORY']
        newgene_ids = {}
        discountinued_geneids = []

        def process_hits(resp_json):
            hits = resp_json['hits']['hits']
            docs = [Document(hit) for hit in hits]
            for doc in docs:
                geneid = getattr(doc, 'geneid')
                discontinued_geneid = getattr(doc, 'discontinued_geneid')
                if geneid is None:
                    discountinued_geneids.append(str(discontinued_geneid))
                else:
                    newgene_ids[str(discontinued_geneid)] = str(geneid)

        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
                                      sources=['geneid', 'discontinued_geneid'])
        ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
                                      call_fun=process_hits, query=query)

        return (newgene_ids, discountinued_geneids)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:27,代码来源:gene.py

示例5: gene_mgi_parse

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def gene_mgi_parse(cls, gene_pubs, idx):
        ''' Parse Ensembl and MGI data from JAX. '''
        orthogenes_mgi = {}
        for gene_mgi in gene_pubs:
            parts = gene_mgi.split('\t')
            if 'MGI:' not in parts[0]:
                raise PipelineError('MGI not found '+parts[0])
            if 'ENSMUSG' not in parts[5]:
                raise PipelineError('ENSMUSG not found '+parts[5])
            orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')

        orthogene_keys = list(orthogenes_mgi.keys())
        chunk_size = 450
        for i in range(0, len(orthogene_keys), chunk_size):
            chunk_gene_keys = orthogene_keys[i:i+chunk_size]
            json_data = ''
            query = ElasticQuery.filtered(Query.match_all(),
                                          TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
                                                                       chunk_gene_keys))
            docs = Search(query, idx=idx, size=chunk_size).search().docs
            for doc in docs:
                ens_id = doc.doc_id()
                idx_type = doc.type()
                mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
                mm['MGI'] = orthogenes_mgi[mm['ensembl']]
                dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
                doc_data = {"update": {"_id": ens_id, "_type": idx_type,
                                       "_index": idx, "_retry_on_conflict": 3}}
                json_data += json.dumps(doc_data) + '\n'
                json_data += json.dumps({'doc': dbxrefs}) + '\n'

            if json_data != '':
                Loader().bulk_load(idx, idx_type, json_data)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:35,代码来源:gene.py

示例6: _ensembl_entrez_lookup

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
        ''' Get an ensembl:entrez id dictionary. '''
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
        return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:10,代码来源:gene.py

示例7: test_hit_attributes

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def test_hit_attributes(self):
        '''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''

        for idx_type_key in RegionDataTest.IDX_TYPE_KEYS:
            idx = ElasticSettings.idx(RegionDataTest.IDX_KEY, idx_type_key)
            (idx, idx_type) = idx.split('/')

            docs = ElasticUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:10,代码来源:test_regions.py

示例8: get_rdm_feature_id

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def get_rdm_feature_id(cls, idx, idx_type, qbool=Query.match_all(), sources=[], field=None):
        ''' Get a random feature id from the indices. '''
        doc = cls.get_rdm_docs(idx, idx_type, qbool=qbool, sources=sources, size=1)[0]

        if field is not None:
            return getattr(doc, field)

        return doc.doc_id()
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:10,代码来源:utils.py

示例9: test_doc

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
 def test_doc(self):
     ''' Test return correct type of FeatureDocument. '''
     idx = PydginTestSettings.IDX['GENE']['indexName']
     idx_type = PydginTestSettings.IDX['GENE']['indexType']
     res = Search(search_query=ElasticQuery(Query.match_all(), sources=['symbol']),
                  idx=idx, idx_type=idx_type, size=2).search()
     for doc in res.docs:
         self.assertTrue(isinstance(doc, GeneDocument))
开发者ID:tottlefields,项目名称:pydgin,代码行数:10,代码来源:test_feature_doc.py

示例10: check_hits

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
        def check_hits(resp_json):
            rsids = {}
            docs = [Document(hit) for hit in resp_json['hits']['hits']]
            for doc in docs:
                rsid = getattr(doc, "id")
                if rsid is not None:
                    rsids[rsid] = doc
            rsids_keys = list(rsids.keys())
            terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
            query = ElasticQuery.filtered(Query.match_all(), terms_filter)
            elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
            docs_by_rsid = elastic.search().docs
            for doc in docs_by_rsid:
                info = getattr(doc, "info")
                if 'VC=SNV' not in info:
                    continue
                rsid = getattr(doc, "id")
                ic_doc = rsids[rsid]
                pos1 = getattr(doc, "start")
                pos2 = self._get_highest_build(ic_doc)['position']
                if abs(int(pos1) - int(pos2)) > 1:
                    is_par = getattr(ic_doc, 'is_par')
                    allele_a = getattr(ic_doc, 'allele_a')
                    if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
                        msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
                               ' '+str(pos2)+" "+rsid+' '+str(pos1))
#                                ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'

                        query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
                                                      Filter(Query.term("start", pos2)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"

                        query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
                                       " build_id:"+str(getattr(d, "build_id"))+")"

                        logger.error(msg)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:47,代码来源:test_ic_marker.py

示例11: test_doc2

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
 def test_doc2(self):
     ''' Test return correct type of FeatureDocument using multiple index search. '''
     idx = PydginTestSettings.IDX['GENE']['indexName'] + ',' + PydginTestSettings.IDX['DISEASE']['indexName']
     res = Search(search_query=ElasticQuery(Query.match_all(), sources=['symbol', 'code']),
                  idx=idx, size=40).search()
     for doc in res.docs:
         self.assertTrue(isinstance(doc, GeneDocument) or isinstance(doc, DiseaseDocument))
         if isinstance(doc, DiseaseDocument):
             self.assertTrue(hasattr(doc, 'code'))
开发者ID:tottlefields,项目名称:pydgin,代码行数:11,代码来源:test_feature_doc.py

示例12: _entrez_ensembl_lookup

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
        ''' Get an entrez:ensembl id dictionary. '''
        (newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
        replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
        return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:12,代码来源:gene.py

示例13: test_gene_attributes

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def test_gene_attributes(self):
        '''Fetch random genes from elastic and compare the same with the results fetched via ensembl restful query'''
        idx_key = 'GENE'
        idx_type_key = 'GENE'

        idx = ElasticSettings.idx(idx_key, idx_type_key)
        (idx, idx_type) = idx.split('/')

        docs_by_geneid = DataIntegrityUtils.get_rdm_docs(idx, idx_type, qbool=Query.match_all(), sources=[], size=1)

        # "_source":{"symbol": "RP11-376M2.2", "start": 42975689, "biotype": "sense_intronic", "chromosome": "17",
        # "source": "havana", "strand": "-", "stop": 42977275}
        for doc in docs_by_geneid:
            gene_id_pipeline = doc.doc_id()
            index_pipeline = doc.index()
            start_pipeline = getattr(doc, "start")
            stop_pipeline = getattr(doc, "stop")
            chromosome_pipeline = getattr(doc, "chromosome")

            biotype_pipeline = getattr(doc, "biotype")
            strand_pipeline = getattr(doc, "strand")
            strand_pipeline = -1 if strand_pipeline == '-' else 1
            symbol_pipeline = getattr(doc, "symbol")
            source_pipeline = getattr(doc, "source")

            # genes_hg38_v0.0.2
            pattern = re.compile('genes_\w\w(\d+)', re.IGNORECASE)
            match = pattern.match(index_pipeline)
            assembly_number_pipeline = None
            if match:
                assembly_number_pipeline = match.group(1)

            ensembl_gene_data = DataIntegrityUtils.fetch_from_ensembl(gene_id_pipeline)

            if ensembl_gene_data:
                pattern = re.compile('GRCh(\d+)', re.IGNORECASE)
                match = pattern.match(ensembl_gene_data['assembly_name'])

                assembly_number_ens = None
                if match:
                    assembly_number_ens = match.group(1)

                self.assertEqual(assembly_number_pipeline, assembly_number_ens, "Assembly number is ok")
                self.assertEqual(gene_id_pipeline, ensembl_gene_data['id'], "Gene Id number is ok")
                self.assertEqual(start_pipeline, ensembl_gene_data['start'], "start is ok")
                self.assertEqual(stop_pipeline, ensembl_gene_data['end'], "stop is ok")
                self.assertEqual(chromosome_pipeline, ensembl_gene_data['seq_region_name'], "chr is ok")
                self.assertEqual(strand_pipeline, ensembl_gene_data['strand'], "strand is ok")

                self.assertEqual(biotype_pipeline, ensembl_gene_data['biotype'], "biotype is ok")
                self.assertEqual(symbol_pipeline, ensembl_gene_data['display_name'], "symbol/display_name is ok")
                self.assertEqual(source_pipeline, ensembl_gene_data['source'], "source is ok")
            else:
                logger.warn("No test run....no ensembl data via ensembl webservice")
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:56,代码来源:test_gene.py

示例14: get_studies

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
 def get_studies(cls, study_ids=None, disease_code=None, sources=[], split_name=True):
     studies_query = ElasticQuery(Query.match_all(), sources=sources)
     if disease_code is not None:
         studies_query = ElasticQuery(BoolQuery(must_arr=Query.term("diseases", disease_code)), sources=sources)
     elif study_ids:
         studies_query = ElasticQuery(Query.ids(study_ids), sources=sources)
     studies = Search(studies_query, idx=ElasticSettings.idx('STUDY', 'STUDY'), size=200).search().docs
     for doc in studies:
         if split_name and getattr(doc, 'study_name') is not None:
             setattr(doc, 'study_name', getattr(doc, 'study_name').split(':', 1)[0])
     return Document.sorted_alphanum(studies, "study_id")
开发者ID:D-I-L,项目名称:pydgin,代码行数:13,代码来源:document.py

示例15: get_rdm_docs

# 需要导入模块: from elastic.query import Query [as 别名]
# 或者: from elastic.query.Query import match_all [as 别名]
    def get_rdm_docs(cls, idx, idx_type, qbool=Query.match_all(), sources=[], size=1):
        ''' Get a random doc from the indices. '''
        score_function1 = ScoreFunction.create_score_function('random_score', seed=random.randint(0, 1000000))

        search_query = ElasticQuery(FunctionScoreQuery(qbool, [score_function1], boost_mode='replace'),
                                    sources=sources)
        elastic = Search(search_query=search_query, size=size, idx=idx, idx_type=idx_type)
        try:
            return elastic.search().docs
        except IndexError:
            return cls.get_rdm_docs(idx, idx_type, qbool, sources, size)
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:13,代码来源:utils.py


注:本文中的elastic.query.Query.match_all方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。