本文整理汇总了Python中Bio.Entrez.read方法的典型用法代码示例。如果您正苦于以下问题:Python Entrez.read方法的具体用法?Python Entrez.read怎么用?Python Entrez.read使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Entrez
的用法示例。
在下文中一共展示了Entrez.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fetch_chrom_name
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetch_chrom_name(id):
try:
if not id.startswith("NC_"):
return id
Entrez.email = "vcf-kit@vcf-kit.com"
chrom = Entrez.read(Entrez.efetch(db="nuccore", id=id, rettype="gb", retmode="xml"))
gb_feature_quals = chrom[0]["GBSeq_feature-table"][0]["GBFeature_quals"]
features = dict([x.values() for x in gb_feature_quals])
if "organelle" in features:
if features["organelle"] == "mitochondrion":
return "MtDNA"
else:
chrom_name = features["chromosome"]
return chrom_name
except:
return id
示例2: get_species_from_taxid
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_species_from_taxid(self, taxid):
#if it is an integer (a taxid), try to get the species name
species = taxid
if taxid.isdigit():
Entrez.email = EMBL.PREVIOUS_VALUES["email"]
# fetch the classification sufing the taxid
logging.debug("Fetch The Lineage using Entrez.efetch")
try:
search = Entrez.efetch(id=taxid, db="taxonomy", retmode="xml")
data = Entrez.read(search)
species = data[0]['ScientificName']
except IOError as e:
logging.error("Could not get species from taxid: %s" % e)
return "%s%s" % (species[0].upper(), species[1:].lower())
#if species is a taxid we change by the species name
示例3: get_taxid_from_species
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_taxid_from_species(self, species):
#if it is a species name try to get the taxid
taxid = species
if not species.isdigit():
Entrez.email = EMBL.PREVIOUS_VALUES["email"]
#fetch taxid from ncbi taxomomy
logging.debug("Fetch the taxid from species name using Entrez.esearch")
species = species.replace(" ", "+").strip()
try:
search = Entrez.esearch(term=species, db="taxonomy", retmode="xml")
record = Entrez.read(search)
if not record['IdList']: #no taxid found
logging.warning("Please verify the species name. '%s' species is unknown into the NCBI taxonomy databse. Impossible to check the taxonomic classification. We will use the default value 'Life' to populate the OC line.",self.species)
taxid=None
else:
taxid = record['IdList'][0]
except IOError as e:
logging.error("Could not get taxid from species: %s" % e)
return taxid
示例4: AS
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def AS(self):
"""
The AS (ASsembly Information) lines provide information on the composition of
a TPA or TSA sequence. These lines include information on local sequence spans
(those spans seen in the sequence of the entry showing the AS lines) plus
identifiers and base spans of contributing primary sequences (for ENA
primary entries only).
a) LOCAL_SPAN base span on local sequence shown in entry
b) PRIMARY_IDENTIFIER acc.version of contributing ENA sequence(s)
or trace identifier for ENA read(s)
c) PRIMARY_SPAN base span on contributing ENA primary
sequence or not_available for ENA read(s)
d) COMP 'c' is used to indicate that contributing sequence
originates from complementary strand in primary
entry
"""
output = ""
for assembly in self.assembly_information:
output += "AS %s%s%s%s" % ("{:16}".format(assembly['local_span']),
"{:24}".format(assembly['identifier']),
"{:18}".format(assembly['primary_span']),
assembly['complementary'])
return output
示例5: get_GIs
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_GIs(self, accessions, n_entrez=2500, **kwargs):
'''
Use entrez esearch to get genbank identifiers from accession numbers
'''
retmax = 10**5 # max records to retrieve at once; 10^5 is documented limit, but >2500 reproducibly throws errors
queries = []
giList = []
for i in sorted(xrange(0, len(accessions), n_entrez)): # split accessions list into 2500-long portions
queries.append(" ".join(accessions[i:i+n_entrez])) # convert list to ' ' separated string
assert sum([len(q.split()) for q in queries]) == len(accessions) # sanity check
for q in queries:
handle = Entrez.esearch(db=self.gbdb, term=q, retmax=retmax) # retrieve xml of search results
giList += Entrez.read(handle)['IdList'] # pull GI numbers from handle
return giList
示例6: get_tax_names
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_tax_names(taxa):
"""Get tax names from ids or string"""
logging.debug('Checking tax inputs')
def splitter(s):
return re.split('\s*,\s*', s)
tax_ids = []
if os.path.isfile(taxa):
for line in open(taxa):
tax_ids.extend(splitter(line.rstrip()))
else:
tax_ids = splitter(taxa)
tax_names = []
for tax in tax_ids:
logging.debug('Tax {}'.format(tax))
if tax.isdigit():
handle = Entrez.efetch(db='taxonomy', id=tax)
results = Entrez.read(handle)
if results:
name = results[0].get('ScientificName')
if name:
tax_names.append(name)
else:
tax_names.append(tax)
return set(tax_names)
# --------------------------------------------------
示例7: entrez_batch_webhistory
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def entrez_batch_webhistory(args, record, expected, batchsize, *fnargs, **fnkwargs):
"""Recover Entrez data from a prior NCBI webhistory search.
:param args: Namespace, command-line arguments
:param record: Entrez webhistory record
:param expected: int, number of expected search returns
:param batchsize: int, number of search returns to retrieve in each batch
:param *fnargs: tuple, arguments to Efetch
:param **fnkwargs: dict, keyword arguments to Efetch
Recovery is performed in in batches of defined size, using Efetch.
Returns all results as a list.
"""
logger = logging.getLogger(__name__)
results = []
for start in range(0, expected, batchsize):
batch_handle = entrez_retry(
args,
logger,
Entrez.efetch,
retstart=start,
retmax=batchsize,
webenv=record["WebEnv"],
query_key=record["QueryKey"],
*fnargs,
**fnkwargs,
)
batch_record = Entrez.read(batch_handle, validate=False)
results.extend(batch_record)
return results
# Get assembly UIDs for the root taxon
示例8: get_asm_uids
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_asm_uids(args, taxon_uid):
"""Return a set of NCBI UIDs associated with the passed taxon.
:param args: Namespace, command-line arguments
:param taxon_uid: str, NCBI taxon ID
This query at NCBI returns all assemblies for the taxon subtree
rooted at the passed taxon_uid.
"""
logger = logging.getLogger(__name__)
query = f"txid{taxon_uid}[Organism:exp]"
logger.info("Entrez ESearch with query: %s", query)
# Perform initial search for assembly UIDs with taxon ID as query.
# Use NCBI history for the search.
handle = entrez_retry(
args,
logger,
Entrez.esearch,
db="assembly",
term=query,
format="xml",
usehistory="y",
)
record = Entrez.read(handle, validate=False)
result_count = int(record["Count"])
logger.info("Entrez ESearch returns %d assembly IDs", result_count)
# Recover assembly UIDs from the web history
asm_ids = entrez_batch_webhistory(
args, logger, record, result_count, 250, db="assembly", retmode="xml"
)
logger.info("Identified %d unique assemblies", len(asm_ids))
return asm_ids
# Extract filestem from Entrez eSummary
示例9: fetchCBS
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetchCBS(self):
dict_ = {"1":"0", "2":"1", "5":"4", "9":"7"}
out = ["NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA"]
if str(self.codeTable) not in dict_: return out ##记得改
code = dict_[str(self.codeTable)]
os.chdir(self.path)
infile = "codonW_infile.fas"
outfile = "codonW_outfile.txt"
blkfile = "codonW_blk.txt"
errorfile = "codonW_error.fas"
with open(self.path + os.sep + infile, "w", encoding="utf-8") as f:
f.write(">seq\n%s\n"%self.seq)
command = '"%s" "%s" "%s" "%s" -all_indices -nomenu -silent -noblk -code %s'%(self.codonW, infile, outfile, blkfile, code)
# print(command)
popen = self.factory.init_popen(command)
try:
while True:
try:
out_line = popen.stdout.readline().decode("utf-8", errors='ignore')
except UnicodeDecodeError:
out_line = popen.stdout.readline().decode("gbk", errors='ignore')
if out_line == "" and popen.poll() is not None:
break
except: pass
## 读取输出结果
if not os.path.exists(self.path + os.sep + outfile):
with open(errorfile, "a", encoding="utf-8") as f2:
f2.write(command + "\n" + self.seq + "\n")
# print("error seq.:", self.seq)
else:
with open(self.path + os.sep + outfile, encoding="utf-8", errors="ignore") as f1:
content = f1.read()
try:
list_ = content.split("\n")[1].split("\t")
out = list_[5:9] + list_[11:15]
except IndexError:
with open(errorfile, "a", encoding="utf-8") as f2:
f2.write(command + "\n" + self.seq + "\n")
for num, i in enumerate(out):
if (not self.is_float(i)) and (not self.is_int(i)): out[num] = "NA"
return out
示例10: merge_file_contents
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def merge_file_contents(self, files, base=None, proportion=None, processSig=None):
all_content = ""
for num, file in enumerate(files):
with open(file, encoding="utf-8", errors='ignore') as f:
all_content += f.read()
if processSig:
processSig.emit(base + (num+1)*proportion/len(files))
return all_content
示例11: fetchContentsByIDs
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def fetchContentsByIDs(self, IDs, base=None, proportion=None, processSig=None):
contents = ""
for num, ID in enumerate(IDs):
ID_path = self.fetchRecordPath(ID)
with open(ID_path, encoding="utf-8", errors='ignore') as f:
contents += f.read()
if processSig:
processSig.emit(base + (num+1)*proportion/len(IDs))
return contents
# def fetchIDsByContents(self, contents):
# '''注意这个ID是locus的ID'''
# rgx = re.compile(r"(?sm)LOCUS {7}(\S+).+?^//\s*?(?=LOCUS|$)")
# return rgx.findall(contents)
示例12: get_tax_id
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_tax_id(self, query_name):
"""to get data from ncbi taxomomy, we need to have the taxid. we can
get that by passing the species name to esearch, which will return
the tax id"""
query_name = query_name.replace(' ', "+").strip()
Entrez.email = 'A.N.Other@example.com'
search = Entrez.esearch(term=query_name, db="taxonomy", retmode="xml")
record = Entrez.read(search)
return record['IdList'][0] if record['IdList'] else None
示例13: downloadSeq
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def downloadSeq(self):
try:
checked_ids = self.NCBI_model.list_checked
# if not checked_ids:
# checked_ids = self.NCBI_model.fetchAllIDs()
batch_size = 20
count = len(checked_ids) if checked_ids else self.count
self.download_contents = ""
for start in range(0, count, batch_size):
if self.interrupt:
return
end = min(count, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > count:
batch_size = count - start
if not checked_ids:
#下载所有序列的模式
fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
retstart=start, retmax=batch_size,
webenv=self.webenv, query_key=self.query_key)
else:
fetch_handle = Entrez.efetch(db=self.database, rettype=self.rettype, retmode="text",
retstart=start, retmax=batch_size, id=checked_ids)
self.download_contents += fetch_handle.read()
self.progressDiologSig.emit(end * 100 / count)
# index = self.comboBox.currentIndex()
# filepath = self.comboBox.itemData(index, role=Qt.ToolTipRole)
# self.downloadFinished.emit()
except:
self.exception_signal.emit(''.join(
traceback.format_exception(
*sys.exc_info())))
示例14: addition_search
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def addition_search(self):
try:
total_displayed = self.display_items
if self.count < total_displayed:
total_displayed = self.count
batch_size = 20
self.searchSig.emit("fetching")
for start in range(self.exist_base, total_displayed, batch_size):
if self.interrupt:
break
end = min(total_displayed, start + batch_size)
print("Going to download record %i to %i" % (start + 1, end))
if (start + batch_size) > total_displayed:
batch_size = total_displayed - start
fetch_handle = Entrez.efetch(db=self.database, retmode="xml",
retstart=start, retmax=batch_size,
webenv=self.webenv, query_key=self.query_key)
fetch_records = Entrez.read(fetch_handle)
for num, record in enumerate(fetch_records):
list_ = []
for i in ["GBSeq_accession-version", "GBSeq_definition", "GBSeq_organism", "GBSeq_length",
"GBSeq_update-date",
"GBSeq_taxonomy", "GBSeq_create-date", "GBSeq_moltype", "GBSeq_topology", "GBSeq_references",
"GBSeq_source", "GBSeq_keywords", "GBSeq_project", "GBSeq_other-seqids", "GBSeq_strandedness",
"GBSeq_comment"]:
if i in record:
list_.append(str(record[i]))
else:
list_.append("N/A")
self.updateSig.emit(list_)
self.progressBarSig.emit((start - self.exist_base + num + 1) * 100 / (total_displayed - self.exist_base))
# self.progressBarSig.emit((start - self.exist_base)*100/(total_displayed - self.exist_base))
fetch_handle.close()
self.searchSig.emit("finished")
except:
self.searchSig.emit("except")
self.exception_signal.emit(''.join(
traceback.format_exception(
*sys.exc_info())))
示例15: get_taxid_mapping_for_batch
# 需要导入模块: from Bio import Entrez [as 别名]
# 或者: from Bio.Entrez import read [as 别名]
def get_taxid_mapping_for_batch(taxids, taxid2wikidict, mutex, semaphore, max_attempt=3):
''' Get wiki mapping for a list of taxids '''
taxid_str = ",".join(taxids)
log.write(f"fetching batch {taxid_str}")
for attempt in range(max_attempt):
try:
handle = Entrez.elink(dbfrom="taxonomy", id=taxid_str, cmd="llinks")
record = Entrez.read(handle)
handle.close()
parsed = {}
results = record[0]['IdUrlList']['IdUrlSet']
for result in results:
taxid = result['Id']
wikiurl = ""
for link in result['ObjUrl']:
url = str(link['Url'])
if re.search('wikipedia.org', url):
wikiurl = url
break
parsed[taxid] = wikiurl
break
except:
log.write(f"failed batch attempt {attempt}")
time.sleep(5)
semaphore.release()
with mutex:
taxid2wikidict.update(parsed)