本文整理汇总了Python中elastic.search.Search.get_count方法的典型用法代码示例。如果您正苦于以下问题:Python Search.get_count方法的具体用法?Python Search.get_count怎么用?Python Search.get_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类elastic.search.Search
的用法示例。
在下文中一共展示了Search.get_count方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pubs_disease_tags
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_pubs_disease_tags(self):
''' Check the number of disease publications against the number of tags.disease and
report differences`. '''
count = True
msg = ''
for disease in DiseasePublicationTest.DISEASES:
pmids = self._get_pmids(disease)
disease_code = disease.lower()
elastic = Search(search_query=ElasticQuery(BoolQuery(
b_filter=Filter(Query.term('tags.disease', disease_code))), sources=['pmid']),
idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
res = elastic.get_count()
msg += disease_code+'\tINDEX: '+str(res['count'])+'\tNCBI: '+str(len(pmids))
if res['count'] != len(pmids):
count = False
docs = elastic.search().docs
pmids_in_idx = [getattr(doc, 'pmid') for doc in docs]
pmids_diff1 = [pmid for pmid in pmids_in_idx if pmid not in pmids]
pmids_diff2 = [pmid for pmid in pmids if pmid not in pmids_in_idx]
if len(pmids_diff1) > 0:
msg += '\textra PMIDs: '+str(pmids_diff1)
if len(pmids_diff2) > 0:
msg += '\tmissing PMIDs: '+str(pmids_diff2)
msg += '\n'
print(msg)
self.assertTrue(count, 'Count for disease tags')
示例2: test_marker_pipeline
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_marker_pipeline(self):
""" Test marker pipeline. """
call_command("pipeline", "--steps", "load", sections="DBSNP", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
idx = INI_CONFIG["DBSNP"]["index"]
idx_type = INI_CONFIG["DBSNP"]["index_type"]
elastic = Search(idx=idx, idx_type=idx_type)
Search.index_refresh(idx)
self.assertGreater(elastic.get_count()["count"], 0)
call_command("pipeline", "--steps", "load", sections="RSMERGEARCH", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
idx = INI_CONFIG["RSMERGEARCH"]["index"]
idx_type = INI_CONFIG["RSMERGEARCH"]["index_type"]
elastic = Search(idx=idx, idx_type=idx_type)
Search.index_refresh(idx)
self.assertGreater(elastic.get_count()["count"], 0)
示例3: get_pmids
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def get_pmids(resp_json):
pmids = []
for hit in resp_json['hits']['hits']:
doc = Document(hit)
pmids.append(getattr(doc, "pmid"))
pmids = list(set(pmids))
elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
sources=['pmid']),
idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
if len(pmids) != elastic.get_count()['count']:
# check for differences in pmids
docs = elastic.search().docs
pmids_in_pub_idx = [getattr(doc, 'pmid') for doc in docs]
pmids_diff = list(set(pmids) - set(pmids_in_pub_idx))
self.assertListEqual([], pmids_diff, "PMIDs list empty ("+str(pmids_diff)+")")
self.assertEqual(len(pmids), elastic.get_count()['count'], 'Count for region publications')
示例4: test_gene_history_loader
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_gene_history_loader(self):
""" Test the gene history loading. """
call_command("pipeline", "--steps", "load", sections="GENE_HISTORY", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
idx = INI_CONFIG["GENE_HISTORY"]["index"]
idx_type = INI_CONFIG["GENE_HISTORY"]["index_type"]
elastic = Search(idx=idx, idx_type=idx_type)
Search.index_refresh(idx)
self.assertTrue(elastic.get_count()["count"] > 1, "Count documents in the index")
map1_props = Gene.gene_history_mapping(idx, idx_type, test_mode=True).mapping_properties
map2_props = elastic.get_mapping()
if idx not in map2_props:
logger.error("MAPPING ERROR: " + json.dumps(map2_props))
self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)
示例5: test_gene_pubs
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_gene_pubs(self):
''' Check the difference between the pubs indexed and those from the gene_pub file
from the NCBI. If the publication pipeline has not been run recently there is likely
to be a difference. This is allowed for with the NUM_DIFF variable. If there is a
larger difference than this then the publication pipeline should be run. '''
ini = IniParser()
config = ini.read_ini('publications.ini')
section = config['GENE']
file_name = 'gene_pub_test.tmp'
download_file = os.path.join(DiseasePublicationTest.TEST_DATA_DIR, file_name)
success = FTPDownload().download(urljoin(section['location'], section['files']),
DiseasePublicationTest.TEST_DATA_DIR, file_name=file_name)
self.assertTrue(success, 'downloaded gene publications file')
pmids = set()
with gzip.open(download_file, 'rt') as outf:
seen_add = pmids.add
for x in outf:
if not x.startswith('9606\t'):
continue
pmid = re.split('\t', x)[2].strip()
if pmid not in pmids:
seen_add(pmid)
pmids = list(pmids)
elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
sources=['pmid']),
idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
self.assertLess(len(pmids)-elastic.get_count()['count'], GenePublicationTest.NUM_DIFF,
'Count for gene publications')
# check for differences in pmids
# pmids_in_idx = []
#
# def get_pmids(resp_json):
# hits = resp_json['hits']['hits']
# pmids_in_idx.extend([getattr(Document(h), "pmid") for h in hits])
#
# ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids,
# query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
# sources=['pmid']),
# time_to_keep_scoll=30)
# pmids_diff = list(set(pmids) - set(pmids_in_idx))
# self.assertLess(len(pmids_diff), GenePublicationTest.NUM_DIFF)
os.remove(download_file)
示例6: test_pub_disease_counts
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_pub_disease_counts(self):
''' Check all publications exist in the publication index. '''
for disease in DiseasePublicationTest.DISEASES:
pmids = self._get_pmids(disease)
disease_code = disease.lower()
elastic = Search(search_query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids)))),
idx=ElasticSettings.idx('PUBLICATION'), size=len(pmids)*2)
self.assertEqual(elastic.get_count()['count'], len(pmids), 'Count for '+disease_code)
# check for differences in pmids
pmids_in_idx = []
def get_pmids(resp_json):
pmids_in_idx.extend([getattr(Document(h), "pmid") for h in resp_json['hits']['hits']])
ScanAndScroll.scan_and_scroll(idx=ElasticSettings.idx('PUBLICATION'), call_fun=get_pmids,
query=ElasticQuery(BoolQuery(b_filter=Filter(Query.ids(pmids))),
sources=['pmid']))
pmids_diff = list(set(pmids) - set(pmids_in_idx))
self.assertEqual(len(pmids_diff), 0)
示例7: get_docs_count
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def get_docs_count(cls, idx, idx_type):
'''Get doc counts'''
elastic = Search(idx=idx, idx_type=idx_type)
return elastic.get_count()['count']
示例8: test_gene_pipeline
# 需要导入模块: from elastic.search import Search [as 别名]
# 或者: from elastic.search.Search import get_count [as 别名]
def test_gene_pipeline(self):
""" Test gene pipeline. """
INI_CONFIG = IniParser().read_ini(MY_INI_FILE)
idx = INI_CONFIG["ENSEMBL_GENE_GTF"]["index"]
idx_type = INI_CONFIG["ENSEMBL_GENE_GTF"]["index_type"]
""" 1. Test ensembl GTF loading. """
call_command(
"pipeline", "--steps", "stage", "load", sections="ENSEMBL_GENE_GTF", dir=TEST_DATA_DIR, ini=MY_INI_FILE
)
Search.index_refresh(idx)
elastic = Search(idx=idx, idx_type=idx_type)
self.assertGreaterEqual(elastic.get_count()["count"], 1, "Count documents in the index")
map1_props = Gene.gene_mapping(idx, idx_type, test_mode=True).mapping_properties
map2_props = elastic.get_mapping()
if idx not in map2_props:
logger.error("MAPPING ERROR: " + json.dumps(map2_props))
self._cmpMappings(map2_props[idx]["mappings"], map1_props, idx_type)
""" 2. Test adding entrez ID to documents """
call_command("pipeline", "--steps", "load", sections="GENE2ENSEMBL", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
Search.index_refresh(idx)
query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertEqual(len(docs), 1)
self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
self.assertEqual(getattr(docs[0], "dbxrefs")["entrez"], "26191")
""" 3. Add uniprot and fill in missing entrez fields. """
call_command(
"pipeline", "--steps", "download", "load", sections="ENSMART_GENE", dir=TEST_DATA_DIR, ini=MY_INI_FILE
)
Search.index_refresh(idx)
query = ElasticQuery.query_string("DNMT3L", fields=["symbol"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertTrue("entrez" in getattr(docs[0], "dbxrefs"))
self.assertTrue("swissprot" in getattr(docs[0], "dbxrefs"))
""" 4. Add gene synonyms and dbxrefs. """
call_command("pipeline", "--steps", "load", sections="GENE_INFO", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
Search.index_refresh(idx)
query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertTrue("PTPN8" in getattr(docs[0], "synonyms"))
""" 5. Add PMIDs to gene docs. """
call_command("pipeline", "--steps", "load", sections="GENE_PUBS", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
Search.index_refresh(idx)
query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
self.assertGreater(len(getattr(docs[0], "pmids")), 0)
""" 6. Add ortholog data. """
call_command("pipeline", "--steps", "load", sections="ENSMART_HOMOLOG", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
Search.index_refresh(idx)
query = ElasticQuery.query_string("PTPN22", fields=["symbol"])
elastic = Search(query, idx=idx)
docs = elastic.search().docs
dbxrefs = getattr(docs[0], "dbxrefs")
self.assertTrue("orthologs" in dbxrefs, dbxrefs)
self.assertTrue("mmusculus" in dbxrefs["orthologs"], dbxrefs)
self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])
query = ElasticQuery.filtered(
Query.match_all(),
TermsFilter.get_terms_filter("dbxrefs.orthologs.mmusculus.ensembl", ["ENSMUSG00000027843"]),
)
docs = Search(query, idx=idx, size=1).search().docs
self.assertEqual(len(docs), 1)
""" 7. Add mouse ortholog link to MGI """
call_command("pipeline", "--steps", "load", sections="ENSEMBL2MGI", dir=TEST_DATA_DIR, ini=MY_INI_FILE)
Search.index_refresh(idx)
docs = Search(query, idx=idx, size=1).search().docs
dbxrefs = getattr(docs[0], "dbxrefs")
self.assertEqual("ENSMUSG00000027843", dbxrefs["orthologs"]["mmusculus"]["ensembl"])
self.assertEqual("107170", dbxrefs["orthologs"]["mmusculus"]["MGI"])