本文整理汇总了Python中Bio.Entrez.efetch方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.efetch方法的具体用法?Python Entrez.efetch怎么用?Python Entrez.efetch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Entrez
的用法示例。
在下文中一共展示了Entrez.efetch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_ncbi_seq
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_ncbi_seq(email, db, rettype, accession):
# fetch
print("Fetching accession %s from GenBank\n" % (accession))
Entrez.email = email
try:
handle = Entrez.efetch(
db=db,
rettype=rettype,
retmode="text",
id=accession
)
res = handle.read()
# for testing only
# pickle_item(res, accession)
return res
except Exception:
sys.stderr.write("Error! Cannot fetch: %s \n" % accession)
示例2: fetch_names
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetch_names(id_list):
organism_names = {}
# Doing 100 by 100 to make sure requests to NCBI are not too big
for i in range(0, len(id_list), 100):
j = i + 100
if j >= len(id_list):
j = len(id_list)
sys.stderr.write(
"Fetching entries from %s to %s from GenBank\n" % (i, j))
sys.stderr.flush()
result_handle = Entrez.efetch(db=db, rettype="gb", id=id_list[i:j])
# Populate result per organism name
for record in parse(result_handle, 'genbank'):
# Using NCBI name, which should match accession number passed
organism_names[record.name] = record.annotations['organism']
return organism_names
示例3: get_species_from_taxid
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_species_from_taxid(self, taxid):
#if it is an integer (a taxid), try to get the species name
species = taxid
if taxid.isdigit():
Entrez.email = EMBL.PREVIOUS_VALUES["email"]
# fetch the classification sufing the taxid
logging.debug("Fetch The Lineage using Entrez.efetch")
try:
search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
data = Entrez.read(search)
species = data[0]['ScientificName']
except IOError as e:
logging.error("Could not get species from taxid: %s" % e)
return "%s%s" % (species[0].upper(), species[1:].lower())
#if species is a taxid we change by the species name
示例4: populate_organism
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def populate_organism():
def add_organism(name, accession):
# get the object, this also checks for duplicates
o, created = Organism.objects.get_or_create(
name=name, accession=accession)
return o
def merge_acc_names(accession_list):
acc_name_dict = {}
db = "nuccore"
# Doing batches of 200 to make sure requests to NCBI are not too big
for i in range(0, len(accession_list), 200):
j = i + 200
result_handle = Entrez.efetch(
db=db, rettype="gb", id=accession_list[i:j])
# Populate result per organism name
records = SeqIO.parse(result_handle, 'genbank')
for record in tqdm(records):
# Using NCBI name, which should match accession number passed
acc_name_dict[record.name] = record.annotations['organism']
return acc_name_dict
with open(os.path.join(DATA_DIR, 'bac_accession_list.txt')) as f:
acc_name_dict = list(read_accession_file(f))
# acc_name_dict = merge_acc_names(accession_list)
for acc in acc_name_dict:
add_organism(name=acc_name_dict[acc], accession=acc)
示例5: populate_anticrispr
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def populate_anticrispr():
with open(os.path.join(DATA_DIR, 'antiCRISPR_accessions.txt')) as f:
accession_list = list(read_accession_file(f))
print("Fetching AntiCRISPR entries")
result_handle = Entrez.efetch(
db='protein', rettype="fasta", id=accession_list)
for record in tqdm(SeqIO.parse(result_handle, 'fasta')):
spacer, _ = AntiCRISPR.objects.get_or_create(
accession=record.name,
sequence=str(record.seq))
spacer.save()
示例6: get_tax_names
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_tax_names(taxa):
"""Get tax names from ids or string"""
logging.debug('Checking tax inputs')
def splitter(s):
return re.split('\s*,\s*', s)
tax_ids = []
if os.path.isfile(taxa):
for line in open(taxa):
tax_ids.extend(splitter(line.rstrip()))
else:
tax_ids = splitter(taxa)
tax_names = []
for tax in tax_ids:
logging.debug('Tax {}'.format(tax))
if tax.isdigit():
handle = Entrez.efetch(db='taxonomy', id=tax)
results = Entrez.read(handle)
if results:
name = results[0].get('ScientificName')
if name:
tax_names.append(name)
else:
tax_names.append(tax)
return set(tax_names)
# --------------------------------------------------
示例7: fetch_from_entrez
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetch_from_entrez(index, cache_dir=False):
logger = logging.getLogger('build')
# slugify the index for the cache filename (some indices have symbols not allowed in file names (e.g. /))
index_slug= slugify(index)
cache_file_path = '{}/{}'.format('/'.join(cache_dir), index_slug)
# try fetching from cache
if cache_dir:
d = fetch_from_cache(cache_dir, index_slug)
if d:
logger.info('Fetched {} from cache'.format(cache_file_path))
return d
# if nothing is found in the cache, use the web API
logger.info('Fetching {} from Entrez'.format(index))
tries = 0
max_tries = 5
while tries < max_tries:
if tries > 0:
logger.warning('Failed fetching pubmed {}, retrying'.format(str(index)))
try:
Entrez.email = 'info@gpcrdb.org'
handle = Entrez.efetch(
db="pubmed",
id=str(index),
rettype="medline",
retmode="text"
)
except:
tries += 1
time.sleep(2)
else:
d = Medline.read(handle)
# save to cache
save_to_cache(cache_dir, index_slug, d)
logger.info('Saved entry for {} in cache'.format(cache_file_path))
return d
示例8: entrez_batch_webhistory
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def entrez_batch_webhistory(args, record, expected, batchsize, *fnargs, **fnkwargs):
"""Recover Entrez data from a prior NCBI webhistory search.
:param args: Namespace, command-line arguments
:param record: Entrez webhistory record
:param expected: int, number of expected search returns
:param batchsize: int, number of search returns to retrieve in each batch
:param *fnargs: tuple, arguments to Efetch
:param **fnkwargs: dict, keyword arguments to Efetch
Recovery is performed in in batches of defined size, using Efetch.
Returns all results as a list.
"""
logger = logging.getLogger(__name__)
results = []
for start in range(0, expected, batchsize):
batch_handle = entrez_retry(
args,
logger,
Entrez.efetch,
retstart=start,
retmax=batchsize,
webenv=record["WebEnv"],
query_key=record["QueryKey"],
*fnargs,
**fnkwargs,
)
batch_record = Entrez.read(batch_handle, validate=False)
results.extend(batch_record)
return results
# Get assembly UIDs for the root taxon
示例9: get_tax_data
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def get_tax_data(self, taxid):
"""once we have the taxid, we can fetch the record"""
Entrez.email = 'A.N.Other@example.com'
search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
return Entrez.read(search)
示例10: fetSeqFromNCBI
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fetSeqFromNCBI(self, id_array):
batch_size = 20
count = len(id_array)
download_contents = ""
for start in range(0, count, batch_size):
if self.interrupt:
return
end = min(count, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > count:
batch_size = count - start
Entrez.email = self.email if self.email else "A.N.Other@example.com"
fetch_handle = Entrez.efetch(db="nucleotide", rettype=self.rettype, retmode="text",
retstart=start, retmax=batch_size, id=id_array)
download_contents += fetch_handle.read()
self.progressDiologSig.emit(end * 100 / count)
if self.rettype == "gb":
self.inputContentSig.emit(
download_contents, self.outputPath)
else:
with open(self.outputPath + os.sep + self.fasta_file_name, "w", encoding="utf-8") as f:
f.write(download_contents)
self.fastaDownloadFinishedSig.emit(self.outputPath)
# result_handle = Entrez.efetch(
# db="nucleotide", rettype="gb", id=id_array, retmode="text")
# # with open(self.exportPath + os.sep + "new.gb", "w", encoding="utf-8") as f2:
# # f2.write(result_handle.read())
# self.inputContentSig.emit(
# result_handle.read(), [])
示例11: downloadSeq
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def downloadSeq(self):
try:
checked_ids = self.NCBI_model.list_checked
# if not checked_ids:
# checked_ids = self.NCBI_model.fetchAllIDs()
batch_size = 20
count = len(checked_ids) if checked_ids else self.count
self.download_contents = ""
for start in range(0, count, batch_size):
if self.interrupt:
return
end = min(count, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > count:
batch_size = count - start
if not checked_ids:
#下载所有序列的模式
fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
retstart=start, retmax=batch_size,
webenv=self.webenv, query_key=self.query_key)
else:
fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
retstart=start, retmax=batch_size, id=checked_ids)
self.download_contents += fetch_handle.read()
self.progressDiologSig.emit(end * 100 / count)
# index = self.comboBox.currentIndex()
# filepath = self.comboBox.itemData(index, role=Qt.ToolTipRole)
# self.downloadFinished.emit()
except:
self.exception_signal.emit(''.join(
traceback.format_exception(
*sys.exc_info())))
示例12: addition_search
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def addition_search(self):
try:
total_displayed = self.display_items
if self.count < total_displayed:
total_displayed = self.count
batch_size = 20
self.searchSig.emit("fetching")
for start in range(self.exist_base, total_displayed, batch_size):
if self.interrupt:
break
end = min(total_displayed, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > total_displayed:
batch_size = total_displayed - start
fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
retstart=start, retmax=batch_size,
webenv=self.webenv, query_key=self.query_key)
fetch_records = Entrez.read(fetch_handle)
for num, record in enumerate(fetch_records):
list_ = []
for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
"GBSeq_update-date",
"GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
"GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
"GBSeq_comment"]:
if i in record:
list_.append(str(record[i]))
else:
list_.append("N/A")
self.updateSig.emit(list_)
self.progressBarSig.emit((start - self.exist_base + num + 1) * 100 / (total_displayed - self.exist_base))
# self.progressBarSig.emit((start - self.exist_base)*100/(total_displayed - self.exist_base))
fetch_handle.close()
self.searchSig.emit("finished")
except:
self.searchSig.emit("except")
self.exception_signal.emit(''.join(
traceback.format_exception(
*sys.exc_info())))
示例13: eutilsToFile
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def eutilsToFile(db,id,filename):
Entrez.email = "jlever@bcgsc.ca" # Always tell NCBI who you are
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="xml")
with codecs.open(filename,'w','utf-8') as f:
xml = handle.read()
f.write(xml)
示例14: fastaRecordFromId_remote
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def fastaRecordFromId_remote( self, id ):
"""
experimental: fetch fasta records from remote database
"""
from Bio import Entrez
from Bio import SeqIO
Entrez.email = "A.N.Other@example.com"
if self.verbose: self.log.add_nobreak( 'r' )
handle = Entrez.efetch(db="protein", rettype="fasta", id=id)
frecord = SeqIO.read(handle, "fasta")
frecord.id = str(id)
handle.close()
return frecord
示例15: download
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import efetch [as 别名]
def download(self, ids):
'''
Fetch data from the Entrez Taxonomy database for the Taxonomy IDs in
ids
'''
Ent = Entrez.efetch(db="taxonomy",
id=ids, retmode="xml", retmax=1000000)
res = Entrez.read(Ent)
self.dataset = res