本文整理汇总了Python中Bio.Entrez.esearch方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.esearch方法的具体用法?Python Entrez.esearch怎么用?Python Entrez.esearch使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Entrez
的用法示例。
在下文中一共展示了Entrez.esearch方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_taxid_from_species
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_taxid_from_species(self, species):
#if it is a species name try to get the taxid
taxid = species
if not species.isdigit():
Entrez.email = EMBL.PREVIOUS_VALUES["email"]
#fetch taxid from ncbi taxomomy
logging.debug("Fetch the taxid from species name using Entrez.esearch")
species = species.replace(" ", "+").strip()
try:
search = Entrez.esearch(term=species, db="taxonomy", retmode="xml")
record = Entrez.read(search)
if not record['IdList']: #no taxid found
logging.warning("Please verify the species name. '%s' species is unknown into the NCBI taxonomy databse. Impossible to check the taxonomic classification. We will use the default value 'Life' to populate the OC line.",self.species)
taxid=None
else:
taxid = record['IdList'][0]
except IOError as e:
logging.error("Could not get taxid from species: %s" % e)
return taxid
示例2: get_GIs
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_GIs(self, accessions, n_entrez=2500, **kwargs):
'''
Use entrez esearch to get genbank identifiers from accession numbers
'''
retmax = 10**5 # max records to retrieve at once; 10^5 is documented limit, but >2500 reproducibly throws errors
queries = []
giList = []
for i in sorted(xrange(0, len(accessions), n_entrez)): # split accessions list into 2500-long portions
queries.append(" ".join(accessions[i:i+n_entrez])) # convert list to ' ' separated string
assert sum([len(q.split()) for q in queries]) == len(accessions) # sanity check
for q in queries:
handle = Entrez.esearch(db=self.gbdb, term=q, retmax=retmax) # retrieve xml of search results
giList += Entrez.read(handle)['IdList'] # pull GI numbers from handle
return giList
示例3: get_asm_uids
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_asm_uids(args, taxon_uid):
"""Return a set of NCBI UIDs associated with the passed taxon.
:param args: Namespace, command-line arguments
:param taxon_uid: str, NCBI taxon ID
This query at NCBI returns all assemblies for the taxon subtree
rooted at the passed taxon_uid.
"""
logger = logging.getLogger(__name__)
query = f"txid{taxon_uid}[Organism:exp]"
logger.info("Entrez ESearch with query: %s", query)
# Perform initial search for assembly UIDs with taxon ID as query.
# Use NCBI history for the search.
handle = entrez_retry(
args,
logger,
Entrez.esearch,
db="assembly",
term=query,
format="xml",
usehistory="y",
)
record = Entrez.read(handle, validate=False)
result_count = int(record["Count"])
logger.info("Entrez ESearch returns %d assembly IDs", result_count)
# Recover assembly UIDs from the web history
asm_ids = entrez_batch_webhistory(
args, logger, record, result_count, 250, db="assembly", retmode="xml"
)
logger.info("Identified %d unique assemblies", len(asm_ids))
return asm_ids
# Extract filestem from Entrez eSummary
示例4: get_tax_id
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_tax_id(self, query_name):
"""to get data from ncbi taxomomy, we need to have the taxid. we can
get that by passing the species name to esearch, which will return
the tax id"""
query_name = query_name.replace(' ', "+").strip()
Entrez.email = 'A.N.Other@example.com'
search = Entrez.esearch(term=query_name, db="taxonomy", retmode="xml")
record = Entrez.read(search)
return record['IdList'][0] if record['IdList'] else None
示例5: searchEntrez
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def searchEntrez(accession_list,bio_type):
start_time = time.time()
Entrez.email = "nsalomonis@gmail.com" # Always tell NCBI who you are
index=0; gi_list=[]
while index<len(accession_list)+20:
try: new_accession_list = accession_list[index:index+20]
except IndexError: new_accession_list = accession_list[index:]
if len(new_accession_list)<1: break
search_handle = Entrez.esearch(db=bio_type,term=string.join(new_accession_list,','))
search_results = Entrez.read(search_handle)
gi_list += search_results["IdList"]
index+=20
end_time = time.time(); time_diff = int(end_time-start_time)
print "finished in %s seconds" % time_diff
return gi_list
示例6: test
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def test(self):
'''
Test Entrez API is connecting and working.
Looks up symbol APOBEC3G, Entrez ID 60489 should be
amongst the results
'''
Ent = Entrez.esearch(db="gene", term="(APOBEC3G[Preferred+Symbol])",
retmode="text", retmax=1000000)
res = Entrez.read(Ent)
Ent.close()
if "60489" in res['IdList']:
return 1
else:
return 0
示例7: download_all
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def download_all(self, host, count=1000000000):
'''
Gets all the Gene IDs for a particular host, specified in PARAMS, from
Entrez Gene and returns them as a list.
'''
# Limited to IDs which are current and not obsolete (i.e. "alive")
term = '("alive"[Properties]) AND %s[Taxonomy ID]' % host
Ent = Entrez.esearch(db="gene", term=term, retmode="text",
retmax=count)
res = Entrez.read(Ent)
Ent.close()
return res['IdList']
示例8: get_gene_sequence
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def get_gene_sequence(gene_name):
try:
gene_file = '../../gene_sequences/%s_sequence.txt' % gene_name
#gene_file = '../gene_sequences/%s_sequence.txt' % gene_name
#gene_file = 'gene_sequences/%s_sequence.txt' % gene_name
with open(gene_file, 'rb') as f:
seq = f.read()
seq = seq.replace('\r\n', '')
except:
raise Exception("could not find gene sequence file %s, please see examples and generate one for your gene as needed, with this filename" % gene_file)
return seq
# gene_positions = {'CCDC101': [28553928,28591790]}
# search = Entrez.esearch(db="gene", term='%s[Gene Name] AND Homo Sapiens[Organism]' % (gene_name))
# records = Entrez.read(search)
# if len(records['IdList']) > 1:
# print "warning, multiple hits found for entrez gene search %s" % gene_name
# elink = Entrez.read(Entrez.elink(dbfrom="gene", db='nucleotide', id=records['IdList'][0]))
# nucl_id = elink[0]['LinkSetDb'][3]
# cut = False
# if nucl_id['LinkName'] != 'gene_nuccore_refseqgene':
# if gene_name in gene_positions.keys():
# nucl_id = elink[0]['LinkSetDb'][0]['Link'][0]['Id']
# cut = True
# else:
# print "sorry not enough information to return sequence"
# return None
# else:
# nucl_id = nucl_id['Link'][0]['Id']
# handle = Entrez.efetch(db="nucleotide", id=nucl_id, rettype="gb", retmode="text")
# record = SeqIO.read(handle, "genbank")
# handle.close()
# if cut:
# start, end = gene_positions[gene_name]
# return str(record.seq)[start:end]
# else:
# return str(record.seq)
示例9: search
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import esearch [as 别名]
def search(self):
try:
self.searchSig.emit("searching")
self.init_list()
self.ctrl_text() #文本还原
self.NCBI_model.list_checked = []
self.database = self.comboBox_2.currentText()
keywords = self.lineEdit.text()
email = self.lineEdit_2.text()
email = email if email else "A.N.Other@example.com"
Entrez.email = email
search_handle = Entrez.esearch(db=self.database,term=keywords,
usehistory="y")
search_results = Entrez.read(search_handle)
self.webenv = search_results["WebEnv"]
self.query_key = search_results["QueryKey"]
self.count = int(search_results["Count"])
self.ctrlItemsSig.emit(self.count) #如果只有2个序列,self.display_items也会变成2
search_handle.close()
batch_size = 20
self.searchSig.emit("fetching")
# time_start = time.time()
total_displayed = self.display_items
if self.count < total_displayed:
total_displayed = self.count
for start in range(0, total_displayed, batch_size):
# try:
if self.interrupt:
return
end = min(total_displayed, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > total_displayed:
batch_size = total_displayed - start
fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
retstart=start, retmax=batch_size,
webenv=self.webenv, query_key=self.query_key)
fetch_records = Entrez.read(fetch_handle)
for num, record in enumerate(fetch_records):
list_ = []
for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
"GBSeq_update-date",
"GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
"GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
"GBSeq_comment"]:
if i in record:
list_.append(str(record[i]))
else:
list_.append("N/A")
self.updateSig.emit(list_)
self.progressBarSig.emit((start + num + 1) * 100 / total_displayed)
fetch_handle.close()
# except:
# pass
self.searchSig.emit("finished")
except:
self.searchSig.emit("except")
self.exception_signal.emit(''.join(
traceback.format_exception(
*sys.exc_info())))
# time_end = time.time()
# print("time:", time_end - time_start)