当前位置: 首页>>代码示例>>Python>>正文


Python ElasticQuery.filtered方法代码示例

本文整理汇总了Python中elastic.search.ElasticQuery.filtered方法的典型用法代码示例。如果您正苦于以下问题:Python ElasticQuery.filtered方法的具体用法?Python ElasticQuery.filtered怎么用?Python ElasticQuery.filtered使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在elastic.search.ElasticQuery的用法示例。


在下文中一共展示了ElasticQuery.filtered方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _check_gene_history

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _check_gene_history(cls, gene_sets, config):
        '''find a way to handle this better'''

        section = config['GENE_HISTORY']
        newgene_ids = {}
        discountinued_geneids = []

        def process_hits(resp_json):
            hits = resp_json['hits']['hits']
            docs = [Document(hit) for hit in hits]
            for doc in docs:
                geneid = getattr(doc, 'geneid')
                discontinued_geneid = getattr(doc, 'discontinued_geneid')
                if geneid is None:
                    discountinued_geneids.append(str(discontinued_geneid))
                else:
                    newgene_ids[str(discontinued_geneid)] = str(geneid)

        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
                                      sources=['geneid', 'discontinued_geneid'])
        ScanAndScroll.scan_and_scroll(section['index'], idx_type=section['index_type'],
                                      call_fun=process_hits, query=query)

        return (newgene_ids, discountinued_geneids)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:27,代码来源:gene.py

示例2: gene_mgi_parse

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def gene_mgi_parse(cls, gene_pubs, idx):
        ''' Parse Ensembl and MGI data from JAX. '''
        orthogenes_mgi = {}
        for gene_mgi in gene_pubs:
            parts = gene_mgi.split('\t')
            if 'MGI:' not in parts[0]:
                raise PipelineError('MGI not found '+parts[0])
            if 'ENSMUSG' not in parts[5]:
                raise PipelineError('ENSMUSG not found '+parts[5])
            orthogenes_mgi[parts[5]] = parts[0].replace('MGI:', '')

        orthogene_keys = list(orthogenes_mgi.keys())
        chunk_size = 450
        for i in range(0, len(orthogene_keys), chunk_size):
            chunk_gene_keys = orthogene_keys[i:i+chunk_size]
            json_data = ''
            query = ElasticQuery.filtered(Query.match_all(),
                                          TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl",
                                                                       chunk_gene_keys))
            docs = Search(query, idx=idx, size=chunk_size).search().docs
            for doc in docs:
                ens_id = doc.doc_id()
                idx_type = doc.type()
                mm = getattr(doc, 'dbxrefs')['orthologs']['mmusculus']
                mm['MGI'] = orthogenes_mgi[mm['ensembl']]
                dbxrefs = {"dbxrefs": {'orthologs': {"mmusculus": mm}}}
                doc_data = {"update": {"_id": ens_id, "_type": idx_type,
                                       "_index": idx, "_retry_on_conflict": 3}}
                json_data += json.dumps(doc_data) + '\n'
                json_data += json.dumps({'doc': dbxrefs}) + '\n'

            if json_data != '':
                Loader().bulk_load(idx, idx_type, json_data)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:35,代码来源:gene.py

示例3: _ensembl_entrez_lookup

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _ensembl_entrez_lookup(cls, ensembl_gene_sets, section):
        ''' Get an ensembl:entrez id dictionary. '''
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.ensembl", ensembl_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(ensembl_gene_sets)).search().docs
        return {doc.doc_id(): getattr(doc, 'dbxrefs')['entrez'] for doc in docs}
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:10,代码来源:gene.py

示例4: check_hits

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
        def check_hits(resp_json):
            rsids = {}
            docs = [Document(hit) for hit in resp_json['hits']['hits']]
            for doc in docs:
                rsid = getattr(doc, "id")
                if rsid is not None:
                    rsids[rsid] = doc
            rsids_keys = list(rsids.keys())
            terms_filter = TermsFilter.get_terms_filter("id", rsids_keys)
            query = ElasticQuery.filtered(Query.match_all(), terms_filter)
            elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'), size=len(rsids_keys))
            docs_by_rsid = elastic.search().docs
            for doc in docs_by_rsid:
                info = getattr(doc, "info")
                if 'VC=SNV' not in info:
                    continue
                rsid = getattr(doc, "id")
                ic_doc = rsids[rsid]
                pos1 = getattr(doc, "start")
                pos2 = self._get_highest_build(ic_doc)['position']
                if abs(int(pos1) - int(pos2)) > 1:
                    is_par = getattr(ic_doc, 'is_par')
                    allele_a = getattr(ic_doc, 'allele_a')
                    if is_par is None and not (allele_a == 'D' or allele_a == 'I'):
                        msg = ("CHECK IC/DBSNP POSITIONS:: "+getattr(ic_doc, 'name') +
                               ' '+str(pos2)+" "+rsid+' '+str(pos1))
#                                ' ('+ic_doc.doc_id()+' '+json.dumps(getattr(ic_doc, 'build_info'))+')'

                        query = ElasticQuery.filtered(Query.term("seqid", getattr(doc, 'seqid')),
                                                      Filter(Query.term("start", pos2)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'MARKER'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " ("+getattr(d, "id")+":"+str(getattr(d, "start"))+")"

                        query = ElasticQuery.filtered(Query.match_all(), Filter(Query.term("rslow", rsid)))
                        elastic = Search(query, idx=ElasticSettings.idx('MARKER', 'HISTORY'))
                        docs_by_pos = elastic.search().docs
                        if len(docs_by_pos) > 0:
                            for d in docs_by_pos:
                                msg += " (rshigh:"+str(getattr(d, "rshigh")) + \
                                       " build_id:"+str(getattr(d, "build_id"))+")"

                        logger.error(msg)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:47,代码来源:test_ic_marker.py

示例5: _entrez_ensembl_lookup

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
        ''' Get an entrez:ensembl id dictionary. '''
        (newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
        replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])

        docs = Search(equery, idx=section['index'], size=len(replaced_gene_sets)).search().docs
        return {getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs}
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:12,代码来源:gene.py

示例6: _build_frags_query

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=2000000, idx=frags_idx)

    fragsResult = fragsQuery.get_result()
    frags = fragsResult['data']
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
开发者ID:premanand17,项目名称:django-chicp,代码行数:13,代码来源:views.py

示例7: post

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def post(self, request, *args, **kwargs):
        ens_id = self.request.POST.get('ens_id')
        marker = self.request.POST.get('marker')
        markers = self.request.POST.getlist('markers[]')

        if ens_id:
            sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
        elif marker:
            sfilter = Filter(Query.query_string(marker, fields=["marker"]).query_wrap())
        elif markers:
            sfilter = Filter(Query.query_string(' '.join(markers), fields=["marker"]).query_wrap())

        query = ElasticQuery.filtered(Query.match_all(), sfilter)
        elastic = Search(query, idx=ElasticSettings.idx('REGION', 'STUDY_HITS'), size=500)
        study_hits = elastic.get_json_response()['hits']

        ens_ids = []
        pmids = []
        for hit in study_hits['hits']:
            if 'pmid' in hit['_source']:
                pmids.append(hit['_source']['pmid'])
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    ens_ids.append(ens_id)
        docs = utils.get_gene_docs_by_ensembl_id(ens_ids, ['symbol'])
        pub_docs = PublicationDocument.get_pub_docs_by_pmid(pmids, sources=['authors.name', 'journal'])

        for hit in study_hits['hits']:
            genes = {}
            if 'genes' in hit['_source']:
                for ens_id in hit['_source']['genes']:
                    try:
                        genes[ens_id] = getattr(docs[ens_id], 'symbol')
                    except KeyError:
                        genes = {ens_id: ens_id}
            hit['_source']['genes'] = genes
            if 'pmid' in hit['_source']:
                pmid = hit['_source']['pmid']
                try:
                    authors = getattr(pub_docs[pmid], 'authors')
                    journal = getattr(pub_docs[pmid], 'journal')
                    hit['_source']['pmid'] = \
                        {'pmid': pmid,
                         'author': authors[0]['name'].rsplit(None, 1)[-1] if authors else "",
                         'journal': journal}
                except KeyError:
                    hit['_source']['pmid'] = {'pmid': pmid}

        return JsonResponse(study_hits)
开发者ID:D-I-L,项目名称:pydgin,代码行数:51,代码来源:views.py

示例8: _build_frags_query

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def _build_frags_query(frags_idx, chrom, segmin, segmax):

    query = ElasticQuery.filtered(Query.terms("seqid", [chrom, str("chr"+chrom)]),
                                  Filter(RangeQuery("end", gte=segmin, lte=segmax)),
                                  utils.bedFields)
    fragsQuery = Search(search_query=query, search_from=0, size=10000, idx=frags_idx)

    # fragsResult = fragsQuery.get_result()
    # frags = fragsResult['data']
    fragsResult = fragsQuery.get_json_response()
    frags = []
    for hit in fragsResult['hits']['hits']:
        frags.append(hit['_source'])
    frags = utils.makeRelative(int(segmin), int(segmax), ['start', 'end'], frags)
    return frags
开发者ID:D-I-L,项目名称:django-chicp,代码行数:17,代码来源:views.py

示例9: _entrez_ensembl_lookup

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _entrez_ensembl_lookup(cls, gene_sets, section, config=None):
        ''' Get an entrez:ensembl id dictionary. '''
        (newgene_ids, discontinued_ids) = Gene._check_gene_history(gene_sets, config)
        replaced_gene_sets = Gene._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)
        lookup = {}

        def process_hits(resp_json):
            hits = resp_json['hits']['hits']
            docs = [Document(hit) for hit in hits]
            lookup.update({getattr(doc, 'dbxrefs')['entrez']: doc.doc_id() for doc in docs})

        equery = ElasticQuery.filtered(Query.match_all(),
                                       TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets),
                                       sources=['dbxrefs.ensembl', 'dbxrefs.entrez'])
        ScanAndScroll.scan_and_scroll(section['index'], call_fun=process_hits, query=equery)
        return lookup
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:18,代码来源:gene.py

示例10: _check_gene_history

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _check_gene_history(cls, gene_sets, section):
        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("discontinued_geneid", gene_sets))
        docs = Search(query, idx=section['index'], idx_type=section['index_type_history'], size=1000000).search().docs

        newgene_ids = {}
        discountinued_geneids = []
        for doc in docs:
            geneid = getattr(doc, 'geneid')
            discontinued_geneid = getattr(doc, 'discontinued_geneid')

            if geneid is None:
                discountinued_geneids.append(str(discontinued_geneid))
            else:
                newgene_ids[str(discontinued_geneid)] = str(geneid)

        return (newgene_ids, discountinued_geneids)
开发者ID:tottlefields,项目名称:django-data-pipeline,代码行数:19,代码来源:gene.py

示例11: _check_gene_history

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _check_gene_history(cls, gene_sets, config):
        '''find a way to handle this better'''

        section = config['GENE_HISTORY']
        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("discontinued_geneid", gene_sets),
                                      sources=['geneid', 'discontinued_geneid'])
        docs = Search(query, idx=section['index'], idx_type=section['index_type'],
                      size=len(gene_sets)).search().docs

        newgene_ids = {}
        discountinued_geneids = []
        for doc in docs:
            geneid = getattr(doc, 'geneid')
            discontinued_geneid = getattr(doc, 'discontinued_geneid')
            if geneid is None:
                discountinued_geneids.append(str(discontinued_geneid))
            else:
                newgene_ids[str(discontinued_geneid)] = str(geneid)
        return (newgene_ids, discountinued_geneids)
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:22,代码来源:gene.py

示例12: _update_gene

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _update_gene(cls, genes, idx):
        ''' Use genes data to update the index. '''
        gene_keys = list(genes.keys())
        chunk_size = 450
        for i in range(0, len(genes), chunk_size):
            chunk_gene_keys = gene_keys[i:i+chunk_size]
            json_data = ''

            query = ElasticQuery.filtered(Query.match_all(),
                                          TermsFilter.get_terms_filter("dbxrefs.entrez", chunk_gene_keys))
            docs = Search(query, idx=idx, size=chunk_size).search().docs
            for doc in docs:
                ens_id = doc._meta['_id']
                idx_type = doc.type()
                entrez = getattr(doc, 'dbxrefs')['entrez']
                doc_data = {"update": {"_id": ens_id, "_type": idx_type,
                                       "_index": idx, "_retry_on_conflict": 3}}
                json_data += json.dumps(doc_data) + '\n'
                json_data += json.dumps({'doc': genes[entrez]}) + '\n'
            if json_data != '':
                Loader().bulk_load(idx, idx_type, json_data)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:23,代码来源:gene.py

示例13: studies_details

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def studies_details(request):
    """ Get studies for a given ensembl ID. """
    ens_id = request.POST.get("ens_id")
    sfilter = Filter(Query.query_string(ens_id, fields=["genes"]).query_wrap())
    query = ElasticQuery.filtered(Query.match_all(), sfilter)
    elastic = Search(query, idx=ElasticSettings.idx("REGION", "STUDY_HITS"), size=500)
    study_hits = elastic.get_json_response()["hits"]

    ens_ids = []
    pmids = []
    for hit in study_hits["hits"]:
        if "pmid" in hit["_source"]:
            pmids.append(hit["_source"]["pmid"])
        for ens_id in hit["_source"]["genes"]:
            ens_ids.append(ens_id)
    docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])
    pub_docs = _get_pub_docs_by_pmid(pmids, sources=["authors.name", "journal"])

    for hit in study_hits["hits"]:
        genes = {}
        for ens_id in hit["_source"]["genes"]:
            try:
                genes[ens_id] = getattr(docs[ens_id], "symbol")
            except KeyError:
                genes = {ens_id: ens_id}
        hit["_source"]["genes"] = genes
        if "pmid" in hit["_source"]:
            pmid = hit["_source"]["pmid"]
            try:
                authors = getattr(pub_docs[pmid], "authors")
                journal = getattr(pub_docs[pmid], "journal")
                hit["_source"]["pmid"] = {
                    "pmid": pmid,
                    "author": authors[0]["name"].rsplit(None, 1)[-1],
                    "journal": journal,
                }
            except KeyError:
                hit["_source"]["pmid"] = {"pmid": pmid}

    return JsonResponse(study_hits)
开发者ID:tottlefields,项目名称:pydgin,代码行数:42,代码来源:views.py

示例14: _convert_entrezid2ensembl

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
    def _convert_entrezid2ensembl(cls, gene_sets, section, log_output_file_handler=None, log_conversion=True):
        '''Converts given set of entrez ids to ensembl ids by querying the gene index dbxrefs'''

        # first check in gene_history
        (newgene_ids, discontinued_ids) = cls._check_gene_history(gene_sets, section)

        # replace all old ids with new ids
        replaced_gene_sets = cls._replace_oldids_with_newids(gene_sets, newgene_ids, discontinued_ids)

        query = ElasticQuery.filtered(Query.match_all(),
                                      TermsFilter.get_terms_filter("dbxrefs.entrez", replaced_gene_sets))
        docs = Search(query, idx=section['index'], size=1000000).search().docs
        ensembl_ids = []
        for doc in docs:
            ens_id = doc._meta['_id']
            ensembl_ids.append(ens_id)

        if log_conversion:
            if log_output_file_handler is not None:
                cls._log_entrezid2ensembl_coversion(replaced_gene_sets, ensembl_ids, log_output_file_handler)

        return ensembl_ids
开发者ID:tottlefields,项目名称:django-data-pipeline,代码行数:24,代码来源:gene.py

示例15: genesets_details

# 需要导入模块: from elastic.search import ElasticQuery [as 别名]
# 或者: from elastic.search.ElasticQuery import filtered [as 别名]
def genesets_details(request):
    """ Get pathway gene sets for a given ensembl ID. """
    ens_id = request.POST.get("ens_id")
    geneset_filter = Filter(Query.query_string(ens_id, fields=["gene_sets"]).query_wrap())
    query = ElasticQuery.filtered(Query.match_all(), geneset_filter)
    elastic = Search(query, idx=ElasticSettings.idx("GENE", "PATHWAY"), size=500)
    genesets_hits = elastic.get_json_response()["hits"]
    ens_ids = []
    for hit in genesets_hits["hits"]:
        for ens_id in hit["_source"]["gene_sets"]:
            ens_ids.append(ens_id)
    docs = _get_gene_docs_by_ensembl_id(ens_ids, ["symbol"])

    for hit in genesets_hits["hits"]:
        genesets = {}
        for ens_id in hit["_source"]["gene_sets"]:
            try:
                genesets[ens_id] = getattr(docs[ens_id], "symbol")
            except KeyError:
                genesets[ens_id] = ens_id
        hit["_source"]["gene_sets"] = genesets
    return JsonResponse(genesets_hits)
开发者ID:tottlefields,项目名称:pydgin,代码行数:24,代码来源:views.py


注:本文中的elastic.search.ElasticQuery.filtered方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。