当前位置: 首页>>代码示例>>Python>>正文


Python Search.get_count方法代码示例

本文整理汇总了Python中elastic.search.Search.get_count方法的典型用法代码示例。如果您正苦于以下问题:Python Search.get_count方法的具体用法?Python Search.get_count怎么用?Python Search.get_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在elastic.search.Search的用法示例。


在下文中一共展示了Search.get_count方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_pubs_disease_tags

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_pubs_disease_tags(self):
        ''' Check the number of disease publications against the number of tags.disease and
        report differences`. '''
        count = True
        msg = ''
        for disease in DiseasePublicationTest.DISEASES:
            pmids = self._get_pmids(disease)
            disease_code = disease.lower()
            elastic = Search(search_query=ElasticQuery(BoolQuery(
                         b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']),
                         idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
            res = elastic.get_count()
            msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids))
            if res['count'] != len(pmids):
                count = False
                docs = elastic.search().docs
                pmids_in_idx = [getattr(doc, 'pmid') for doc in docs]
                pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids]
                pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx]
                if len(pmids_diff1) > 0:
                    msg += '\textra PMIDs: '+str(pmids_diff1)
                if len(pmids_diff2) > 0:
                    msg += '\tmissing PMIDs: '+str(pmids_diff2)
            msg += '\n'

        print(msg)
        self.assertTrue(count, 'Count for disease tags')
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:29,代码来源:test_publications.py

示例2: test_marker_pipeline

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_marker_pipeline(self):
        """ Test marker pipeline. """
        call_command("pipeline", "--steps", "load", sections="DBSNP", dir=TEST_DATA_DIR, ini=MY_INI_FILE)

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["DBSNP"]["index"]
        idx_type = INI_CONFIG["DBSNP"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)
        self.assertGreater(elastic.get_count()["count"], 0)

        call_command("pipeline", "--steps", "load", sections="RSMERGEARCH", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        idx = INI_CONFIG["RSMERGEARCH"]["index"]
        idx_type = INI_CONFIG["RSMERGEARCH"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)
        self.assertGreater(elastic.get_count()["count"], 0)
开发者ID:tottlefields,项目名称:django-data-pipeline,代码行数:19,代码来源:tests_stage_load.py

示例3: get_pmids

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
        def get_pmids(resp_json):
            pmids = []
            for hit in resp_json['hits']['hits']:
                doc = Document(hit)
                pmids.append(getattr(doc, "pmid"))

            pmids = list(set(pmids))
            elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
                                                       sources=['pmid']),
                             idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)

            if len(pmids) != elastic.get_count()['count']:
                # check for differences in pmids
                docs = elastic.search().docs
                pmids_in_pub_idx = [getattr(doc, 'pmid') for doc in docs]
                pmids_diff = list(set(pmids) - set(pmids_in_pub_idx))
                self.assertListEqual([], pmids_diff, "PMIDs list empty ("+str(pmids_diff)+")")

            self.assertEqual(len(pmids), elastic.get_count()['count'], 'Count for region publications')
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:21,代码来源:test_publications.py

示例4: test_gene_history_loader

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_gene_history_loader(self):
        """ Test the gene history loading. """
        call_command("pipeline", "--steps", "load", sections="GENE_HISTORY", dir=TEST_DATA_DIR, ini=MY_INI_FILE)

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["GENE_HISTORY"]["index"]
        idx_type = INI_CONFIG["GENE_HISTORY"]["index_type"]
        elastic = Search(idx=idx, idx_type=idx_type)
        Search.index_refresh(idx)

        self.assertTrue(elastic.get_count()["count"] > 1, "Count documents in the index")
        map1_props = Gene.gene_history_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)
开发者ID:tottlefields,项目名称:django-data-pipeline,代码行数:18,代码来源:tests_stage_load.py

示例5: test_gene_pubs

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_gene_pubs(self):
        ''' Check the difference between the pubs indexed and those from the gene_pub file
        from the NCBI. If the publication pipeline has not been run recently there is likely
        to be a difference. This is allowed for with the NUM_DIFF variable. If there is a
        larger difference than this then the publication pipeline should be run. '''
        ini = IniParser()
        config = ini.read_ini('publications.ini')
        section = config['GENE']

        file_name = 'gene_pub_test.tmp'
        download_file = os.path.join(DiseasePublicationTest.TEST_DATA_DIR, file_name)
        success = FTPDownload().download(urljoin(section['location'], section['files']),
                                         DiseasePublicationTest.TEST_DATA_DIR, file_name=file_name)
        self.assertTrue(success, 'downloaded gene publications file')

        pmids = set()
        with gzip.open(download_file, 'rt') as outf:
            seen_add = pmids.add
            for x in outf:
                if not x.startswith('9606\t'):
                    continue
                pmid = re.split('\t', x)[2].strip()
                if pmid not in pmids:
                    seen_add(pmid)
        pmids = list(pmids)
        elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
                                                   sources=['pmid']),
                         idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
        self.assertLess(len(pmids)-elastic.get_count()['count'], GenePublicationTest.NUM_DIFF,
                        'Count for gene publications')

        # check for differences in pmids
#         pmids_in_idx = []
#
#         def get_pmids(resp_json):
#             hits = resp_json['hits']['hits']
#             pmids_in_idx.extend([getattr(Document(h), "pmid") for h in hits])
#
#         ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids,
#                                       query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
#                                                          sources=['pmid']),
#                                       time_to_keep_scoll=30)
#         pmids_diff = list(set(pmids) - set(pmids_in_idx))
#         self.assertLess(len(pmids_diff), GenePublicationTest.NUM_DIFF)
        os.remove(download_file)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:47,代码来源:test_publications.py

示例6: test_pub_disease_counts

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_pub_disease_counts(self):
        ''' Check all publications exist in the publication index. '''
        for disease in DiseasePublicationTest.DISEASES:
            pmids = self._get_pmids(disease)
            disease_code = disease.lower()
            elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids)))),
                             idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
            self.assertEqual(elastic.get_count()['count'], len(pmids), 'Count for '+disease_code)

            # check for differences in pmids
            pmids_in_idx = []

            def get_pmids(resp_json):
                pmids_in_idx.extend([getattr(Document(h), "pmid") for h in resp_json['hits']['hits']])

            ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids,
                                          query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
                                                             sources=['pmid']))
            pmids_diff = list(set(pmids) - set(pmids_in_idx))
            self.assertEqual(len(pmids_diff), 0)
开发者ID:D-I-L,项目名称:django-data-pipeline,代码行数:22,代码来源:test_publications.py

示例7: get_docs_count

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
 def get_docs_count(cls, idx, idx_type):
     '''Get doc counts'''
     elastic = Search(idx=idx, idx_type=idx_type)
     return elastic.get_count()['count']
开发者ID:premanand17,项目名称:django-data-pipeline,代码行数:6,代码来源:utils.py

示例8: test_gene_pipeline

# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
    def test_gene_pipeline(self):
        """ Test gene pipeline. """

        INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
        idx = INI_CONFIG["ENSEMBL_GENE_GTF"]["index"]
        idx_type = INI_CONFIG["ENSEMBL_GENE_GTF"]["index_type"]

        """ 1. Test ensembl GTF loading. """
        call_command(
            "pipeline", "--steps", "stage", "load", sections="ENSEMBL_GENE_GTF", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)

        elastic = Search(idx=idx, idx_type=idx_type)
        self.assertGreaterEqual(elastic.get_count()["count"], 1, "Count documents in the index")
        map1_props = Gene.gene_mapping(idx, idx_type, test_mode=True).mapping_properties
        map2_props = elastic.get_mapping()
        if idx not in map2_props:
            logger.error("MAPPING ERROR: " + json.dumps(map2_props))
        self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)

        """ 2. Test adding entrez ID to documents """
        call_command("pipeline", "--steps", "load", sections="GENE2ENSEMBL", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertEqual(len(docs), 1)
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertEqual(getattr(docs[0], "dbxrefs")["entrez"], "26191")

        """ 3. Add uniprot and fill in missing entrez fields. """
        call_command(
            "pipeline", "--steps", "download", "load", sections="ENSMART_GENE", dir=TEST_DATA_DIR, ini=MY_INI_FILE
        )
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("DNMT3L", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
        self.assertTrue("swissprot" in getattr(docs[0], "dbxrefs"))

        """ 4. Add gene synonyms and dbxrefs. """
        call_command("pipeline", "--steps", "load", sections="GENE_INFO", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertTrue("PTPN8" in getattr(docs[0], "synonyms"))

        """ 5. Add PMIDs to gene docs. """
        call_command("pipeline", "--steps", "load", sections="GENE_PUBS", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        self.assertGreater(len(getattr(docs[0], "pmids")), 0)

        """ 6. Add ortholog data. """
        call_command("pipeline", "--steps", "load", sections="ENSMART_HOMOLOG", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
        elastic = Search(query, idx=idx)
        docs = elastic.search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertTrue("orthologs" in dbxrefs, dbxrefs)
        self.assertTrue("mmusculus" in dbxrefs["orthologs"], dbxrefs)
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])

        query = ElasticQuery.filtered(
            Query.match_all(),
            TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", ["ENSMUSG00000027843"]),
        )
        docs = Search(query, idx=idx, size=1).search().docs
        self.assertEqual(len(docs), 1)

        """ 7. Add mouse ortholog link to MGI """
        call_command("pipeline", "--steps", "load", sections="ENSEMBL2MGI", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
        Search.index_refresh(idx)
        docs = Search(query, idx=idx, size=1).search().docs
        dbxrefs = getattr(docs[0], "dbxrefs")
        self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])
        self.assertEqual("107170", dbxrefs["orthologs"]["mmusculus"]["MGI"])
开发者ID:tottlefields,项目名称:django-data-pipeline,代码行数:85,代码来源:tests_stage_load.py


注:本文中的elastic.search.Search.get_count方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。