当前位置: 首页>>代码示例>>Python>>正文


Python search.ElasticQuery类代码示例

本文整理汇总了Python中elastic.search.ElasticQuery的典型用法代码示例。如果您正苦于以下问题:Python ElasticQuery类的具体用法?Python ElasticQuery怎么用?Python ElasticQuery使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ElasticQuery类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _find_snp_position

def _find_snp_position(snp_track, name):
    if snp_track is None:
        query = ElasticQuery.query_match("id", name)
        elastic = Search(query, idx=ElasticSettings.idx('MARKER'))
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}
    else:
        mo = re.match(r"(.*)-(.*)", snp_track)
        (group, track) = mo.group(1, 2)
        try:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper(), snp_track.upper())
        except SettingsError:
            snp_track_idx = ElasticSettings.idx('CP_STATS_'+group.upper())+"/"+track

        query = ElasticQuery.query_match("name", name)
        elastic = Search(query, idx=snp_track_idx)
        snpResult = elastic.get_json_response()
        if(len(snpResult['hits']['hits'])) > 0:
            snp = snpResult['hits']['hits'][0]['_source']
            chrom = snp['seqid'].replace('chr', "")
            position = snp['start']
            return {'chr': chrom, 'start': (position-1), 'end': position, 'name': name}

    return {'error': 'Marker '+name+' does not exist in the currently selected dataset'}
开发者ID:D-I-L,项目名称:django-chicp,代码行数:28,代码来源:views.py

示例2: _check_gene_history

    def _check_gene_history(cls, gene_sets, config):
        '''find a way to handle this better'''

        section = config['GENE_HISTORY']
        newgene_ids = {}
        discountinued_geneids = []

        def process_hits(resp_json):
            hits = resp_json['hits']['hits']
            docs = [Document(hit) for hit in hits]
            for doc in docs:
                geneid = getattr(doc, 'geneid')
                discontinued_geneid = getattr(doc, 'discontinued_geneid')
                if geneid is None:
                    discountinued_geneids.append(str(discontinued_geneid))
                else:
                    newgene_ids[str(discontinued_geneid)] = str(geneid)

        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
                                      sources=['geneid', 'discontinued_geneid'])
        ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
                                      call_fun=process_hits, query=query)

        return (newgene_ids, discountinued_geneids)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:25,代码来源:gene.py

示例3: gene_mgi_parse

    def gene_mgi_parse(cls, gene_pubs, idx):
        ''' Parse Ensembl and MGI data from JAX. '''
        orthogenes_mgi = {}
        for gene_mgi in gene_pubs:
            parts = gene_mgi.split('\t')
            if 'MGI:' not in parts[0]:
                raise PipelineError('MGI not found '+parts[0])
            if 'ENSMUSG' not in parts[5]:
                raise PipelineError('ENSMUSG not found '+parts[5])
            orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')

        orthogene_keys = list(orthogenes_mgi.keys())
        chunk_size = 450
        for i in range(0, len(orthogene_keys), chunk_size):
            chunk_gene_keys = orthogene_keys[i:i+chunk_size]
            json_data = ''
            query = ElasticQuery.filtered(Query.match_all(),
                                          TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
                                                                       chunk_gene_keys))
            docs = Search(query, idx=idx, size=chunk_size).search().docs
            for doc in docs:
                ens_id = doc.doc_id()
                idx_type = doc.type()
                mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
                mm['MGI'] = orthogenes_mgi[mm['ensembl']]
                dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
                doc_data = {"update": {"_id": ens_id, "_type": idx_type,
                                       "_index": idx, "_retry_on_conflict": 3}}
                json_data += json.dumps(doc_data) + '\n'
                json_data += json.dumps({'doc': dbxrefs}) + '\n'

            if json_data != '':
                Loader().bulk_load(idx, idx_type, json_data)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:33,代码来源:gene.py

示例4: _ensembl_entrez_lookup

    def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
        ''' Get an ensembl:entrez id dictionary. '''
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
        return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:8,代码来源:gene.py

示例5: check_hits

        def check_hits(resp_json):
            rsids = {}
            docs = [Document(hit) for hit in resp_json['hits']['hits']]
            for doc in docs:
                rsid = getattr(doc, "id")
                if rsid is not None:
                    rsids[rsid] = doc
            rsids_keys = list(rsids.keys())
            terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
            query = ElasticQuery.filtered(Query.match_all(), terms_filter)
            elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
            docs_by_rsid = elastic.search().docs
            for doc in docs_by_rsid:
                info = getattr(doc, "info")
                if 'VC=SNV' not in info:
                    continue
                rsid = getattr(doc, "id")
                ic_doc = rsids[rsid]
                pos1 = getattr(doc, "start")
                pos2 = self._get_highest_build(ic_doc)['position']
                if abs(int(pos1) - int(pos2)) > 1:
                    is_par = getattr(ic_doc, 'is_par')
                    allele_a = getattr(ic_doc, 'allele_a')
                    if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
                        msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
                               ' '+str(pos2)+" "+rsid+' '+str(pos1))
#                                ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'

                        query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
                                                      Filter(Query.term("start", pos2)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"

                        query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
                                       " build_id:"+str(getattr(d, "build_id"))+")"

                        logger.error(msg)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:45,代码来源:test_ic_marker.py

示例6: region_page

def region_page(request, region):
    ''' Region elastic'''
    query = ElasticQuery.query_match("attr.region_id", region)
    elastic = Search(query, idx=ElasticSettings.idx(name='REGION'))
    context = elastic.get_result()
    context['title'] = "Region"
    print(context)
    return render(request, 'region/region.html', context,
                  content_type='text/html')
开发者ID:D-I-L,项目名称:django_template,代码行数:9,代码来源:views.py

示例7: _entrez_ensembl_lookup

    def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
        ''' Get an entrez:ensembl id dictionary. '''
        (newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
        replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
        return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:10,代码来源:gene.py

示例8: _build_frags_query

def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx)

    fragsResult = fragsQuery.get_result()
    frags = fragsResult['data']
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
开发者ID:premanand17,项目名称:django-chicp,代码行数:11,代码来源:views.py

示例9: get_elastic_query

    def get_elastic_query(cls, section=None, config=None):
        ''' function to build the elastic query object
        @type  section: string
        @keyword section: The section in the criteria.ini file
        @type  config:  string
        @keyword config: The config object initialized from criteria.ini.
        @return: L{Query}
        '''
        section_config = config[section]
        source_fields = []

        if 'source_fields' in section_config:
            source_fields_str = section_config['source_fields']
            source_fields = source_fields_str.split(',')

        if 'mhc' in section:
            seqid = '6'
            start_range = 25000000
            end_range = 35000000

            seqid_param = section_config['seqid_param']
            start_param = section_config['start_param']
            end_param = section_config['end_param']

        if section == 'is_gene_in_mhc':
            # for region you should make a different query
            # Defined MHC region as chr6:25,000,000..35,000,000

            query = ElasticUtils.range_overlap_query(seqid, start_range, end_range,
                                                     source_fields,
                                                     seqid_param,
                                                     start_param,
                                                     end_param)
        elif section == 'is_marker_in_mhc':
            query_bool = BoolQuery()
            query_bool.must(RangeQuery("start", lte=end_range)) \
                      .must(RangeQuery("start", gte=start_range)) \
                      .must(Query.term("seqid", seqid))
            query = ElasticQuery.filtered_bool(Query.match_all(), query_bool, sources=["id", "seqid", "start"])
        elif section == 'is_region_in_mhc':
            query = ElasticQuery(Query.term("region_name", "MHC"))
        elif section == 'marker_is_gwas_significant_in_ic':
            # build a range query
            gw_sig_p = 0.00000005
            query = ElasticQuery(RangeQuery("p_value", lte=gw_sig_p))
        else:
            if len(source_fields) > 0:
                query = ElasticQuery(Query.match_all(), sources=source_fields)
            else:
                # query = ElasticQuery(Query.match_all())
                return None

        return query
开发者ID:D-I-L,项目名称:django-criteria,代码行数:53,代码来源:criteria.py

示例10: test_pub_ini_file2

 def test_pub_ini_file2(self):
     ''' Test publication pipeline with a list of PMIDs. '''
     out = StringIO()
     call_command('publications', '--dir', TEST_DATA_DIR, '--steps', 'load',
                  sections='DISEASE::TEST', ini=MY_PUB_INI_FILE, stdout=out)
     INI_CONFIG = IniParser().read_ini(MY_PUB_INI_FILE)
     idx = INI_CONFIG['DISEASE']['index']
     Search.index_refresh(idx)
     query = ElasticQuery.query_string("test", fields=["tags.disease"])
     elastic = Search(query, idx=idx)
     docs = elastic.search().docs
     self.assertGreater(len(docs), 1)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:12,代码来源:tests_download.py

示例11: show_es_gene_section

def show_es_gene_section(gene_symbol=None, seqid=None,
                         start_pos=None, end_pos=None):
    ''' Template inclusion tag to render a gene section given a
    chado gene feature. '''
    seqid = str(seqid).replace('chr', '')
    if gene_symbol is not None:
        ''' gene symbol query'''
        query = ElasticQuery.query_match("symbol", gene_symbol)
    elif end_pos is None:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[Query.match("chromosome", seqid),
                                         RangeQuery("start", lte=start_pos),
                                         RangeQuery("stop", gte=start_pos)])
        query = ElasticQuery.bool(query_bool)
    else:
        ''' start and end are same, range query for snp'''
        query_bool = BoolQuery(must_arr=[Query.match("chromosome", seqid),
                                         RangeQuery("start", gte=start_pos),
                                         RangeQuery("stop", lte=end_pos)])
        query = ElasticQuery.bool(query_bool)

    elastic = Search(query, idx=ElasticSettings.idx(name='GENE'))
    return {'es_genes': elastic.search().docs}
开发者ID:D-I-L,项目名称:django_template,代码行数:23,代码来源:gene_tags.py

示例12: post

    def post(self, request, *args, **kwargs):
        ens_id = self.request.POST.get('ens_id')
        marker = self.request.POST.get('marker')
        markers = self.request.POST.getlist('markers[]')

        if ens_id:
            sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
        elif marker:
            sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
        elif markers:
            sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())

        query = ElasticQuery.filtered(Query.match_all(), sfilter)
        elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
        study_hits = elastic.get_json_response()['hits']

        ens_ids = []
        pmids = []
        for hit in study_hits['hits']:
            if 'pmid' in hit['_source']:
                pmids.append(hit['_source']['pmid'])
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    ens_ids.append(ens_id)
        docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
        pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])

        for hit in study_hits['hits']:
            genes = {}
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    try:
                        genes[ens_id] = getattr(docs[ens_id], 'symbol')
                    except KeyError:
                        genes = {ens_id: ens_id}
            hit['_source']['genes'] = genes
            if 'pmid' in hit['_source']:
                pmid = hit['_source']['pmid']
                try:
                    authors = getattr(pub_docs[pmid], 'authors')
                    journal = getattr(pub_docs[pmid], 'journal')
                    hit['_source']['pmid'] = \
                        {'pmid': pmid,
                         'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
                         'journal': journal}
                except KeyError:
                    hit['_source']['pmid'] = {'pmid': pmid}

        return JsonResponse(study_hits)
开发者ID:D-I-L,项目名称:pydgin,代码行数:49,代码来源:views.py

示例13: _build_frags_query

def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx)

    # fragsResult = fragsQuery.get_result()
    # frags = fragsResult['data']
    fragsResult = fragsQuery.get_json_response()
    frags = []
    for hit in fragsResult['hits']['hits']:
        frags.append(hit['_source'])
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
开发者ID:D-I-L,项目名称:django-chicp,代码行数:15,代码来源:views.py

示例14: _build_exon_query

def _build_exon_query(chrom, segmin, segmax, genes):
    # get exonic structure for genes in this section
    geneExons = dict()
    query_bool = BoolQuery()
    query_bool.must([Query.term("seqid", chrom)])
    if len(genes) > 0:
        for g in genes:
            query = ElasticQuery.filtered_bool(Query.query_string(g["gene_id"], fields=["name"]),
                                               query_bool, sources=utils.snpFields)
            elastic = Search(query, idx=getattr(chicp_settings, 'CP_GENE_IDX')+'/exons/', search_from=0, size=2000)
            result = elastic.get_result()
            exons = result['data']
            exons = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], exons)
            geneExons[g["gene_id"]] = sorted(exons, key=operator.itemgetter("start"))
    return geneExons
开发者ID:premanand17,项目名称:django-chicp,代码行数:15,代码来源:views.py

示例15: _entrez_ensembl_lookup

    def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
        ''' Get an entrez:ensembl id dictionary. '''
        (newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
        replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
        lookup = {}

        def process_hits(resp_json):
            hits = resp_json['hits']['hits']
            docs = [Document(hit) for hit in hits]
            lookup.update({getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs})

        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
        ScanAndScroll.scan_and_scroll(section['index'], call_fun=process_hits, query=equery)
        return lookup
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:16,代码来源:gene.py


注:本文中的elastic.search.ElasticQuery类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。