本文整理汇总了Python中skbio.TabularMSA.read方法的典型用法代码示例。如果您正苦于以下问题:Python TabularMSA.read方法的具体用法?Python TabularMSA.read怎么用?Python TabularMSA.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类skbio.TabularMSA
的用法示例。
在下文中一共展示了TabularMSA.read方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: filter_positions
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def filter_positions(alignment_fh, maximum_gap_frequency,
maximum_position_entropy):
"""Filter gaps and high entropy positions from an alignment."""
with alignment_fh:
try:
aln = TabularMSA.read(alignment_fh, constructor=DNA)
except ValueError:
alignment_fh.seek(0)
aln = TabularMSA.read(alignment_fh, constructor=RNA)
aln = _filter_gap_positions(aln, maximum_gap_frequency)
aln = _filter_high_entropy_positions(aln, maximum_position_entropy)
return aln
示例2: test_reformat_treepuzzle
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def test_reformat_treepuzzle(self):
""" Test functionality of reformat_treepuzzle()
"""
species_tree = TreeNode.read(self.species_tree_fp, format='newick')
gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
output_tree_fp = join(self.working_dir, "joined_trees.nwk")
output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy")
reformat_treepuzzle(gene_tree_3,
species_tree,
self.msa_fa_3_fp,
output_tree_fp,
output_msa_phy_fp)
reformat_tree_exp = [
"(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
"0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
"0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
"1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
"1.6606127,SE007:0.70000178):1.6331374);\n",
"(((((((SE001:2.1494876,SE010:2.1494876):"
"3.7761166,SE008:5.9256042):0.2102448,(SE006:"
"5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
"SE005:6.3412723):0.3714563,SE004:6.7127286):"
"0.7293362,SE003:7.4420648):0.2444784,SE002:"
"7.6865432);\n"]
with open(output_tree_fp, 'r') as output_tree_f:
reformat_tree_act = output_tree_f.readlines()
self.assertListEqual(reformat_tree_exp, reformat_tree_act)
msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein)
labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005',
u'SE006', u'SE008', u'SE009', u'SE010']
labels_act = list(msa_fa.index)
self.assertListEqual(labels_exp, labels_act)
示例3: aln_distmat
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def aln_distmat(alignment, reps=3):
'''Calculate pairwise distances from a MSA of genomes'''
aln = TabularMSA.read(alignment, constructor=DNA)
aln.reassign_index(minter="id")
dist = DistanceMatrix.from_iterable([seq.values for seq in aln],
metric=hamming, keys=aln.index)
return dist
示例4: reformat_treepuzzle
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def reformat_treepuzzle(gene_tree,
species_tree,
gene_msa_fa_fp,
output_tree_fp,
output_msa_phy_fp):
""" Reformat input trees to the format accepted by Tree-Puzzle.
Parameters
----------
gene_tree: skbio.TreeNode
TreeNode instance for gene tree
species_tree_fp: skbio.TreeNode
TreeNode instance for species tree
gene_msa_fa_fp: string
file path to gene alignments in FASTA format
output_tree_fp: string
file path to output trees (Nexus format)
output_msa_phy_fp: string
file path to output MSA in PHYLIP format
See Also
--------
skbio.TreeNode
"""
# remove the root branch length (output with ALF)
for node in gene_tree.postorder():
if node.is_root():
node.length = None
for node in species_tree.postorder():
if node.is_root():
node.length = None
# trim gene tree leaves to exclude '_GENENAME' (if exists)
trim_gene_tree_leaves(gene_tree)
join_trees(gene_tree,
species_tree,
output_tree_fp)
# trim FASTA sequence labels to exclude '/GENENAME' (if exists)
msa_fa = TabularMSA.read(gene_msa_fa_fp, constructor=Protein)
msa_fa.reassign_index(minter='id')
mapping = id_mapper(msa_fa.index)
msa_fa.reassign_index(mapping=mapping)
msa_fa.write(output_msa_phy_fp, format='phylip')
示例5: read_msa_file
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def read_msa_file(self):
msa = TabularMSA.read(self.msa_file, constructor=Protein)
msa.reassign_index(minter='id')
return msa
示例6: get_consrv_score
# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def get_consrv_score(fsta_fh,host,clustalo_fh,rate4site_fh):
"""
Extracts Residue wise conservation scores
:param fsta_fh: path to FASTA file
:param host: host name e.g. E. coli
:param clustalo_fh: path to Clustalo libraries
:param rate4site_fh: path to rate4site libraries
:returns data_conserv: pandas table with conservation scores per residue
"""
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from skbio import TabularMSA, Protein
from Bio.Alphabet import IUPAC
from dms2dfe.lib.io_seq_files import fasta_nts2prt
blast_method='blastp'
blast_db="swissprot"
# make prt fasta
fsta_prt_fh="%s_prt%s" % (splitext(fsta_fh)[0],splitext(fsta_fh)[1])
if not exists(fsta_prt_fh):
prt_seq=fasta_nts2prt(fsta_fh,host=host)
for fsta_data in SeqIO.parse(fsta_prt_fh,'fasta'):
ref_id=fsta_data.id
prt_seq=str(fsta_data.seq)
break
blast_fh="%s_blastp.xml" % (splitext(fsta_fh)[0])
blast_fasta_fh="%s.fasta" % (splitext(blast_fh)[0])
msa_fh=blast_fasta_fh.replace("blastp","blastp_msa")
if not exists(msa_fh):
if not exists(blast_fasta_fh):
if not exists(blast_fh):
# get blast
blast_handle = NCBIWWW.qblast(blast_method,
blast_db, sequence=prt_seq)
blast_results = blast_handle.read()
blast_f=open(blast_fh, 'w')
blast_f.write(blast_results)
blast_f.close()
blast_f = open(blast_fh, 'r')
blast_records=NCBIXML.parse(blast_f)
# blast to fasta
blast_fasta_f=open(blast_fasta_fh, 'w')
fsta_data=[]
fsta_data.append(SeqRecord.SeqRecord(Seq.Seq(prt_seq,IUPAC.protein),
id = ref_id,description=''))
for rec in blast_records:
for aln in rec.alignments:
for hsp in aln.hsps:
fsta_data.append(SeqRecord.SeqRecord(Seq.Seq(hsp.sbjct,IUPAC.protein),
id = aln.hit_id,description=''))
SeqIO.write(fsta_data, blast_fasta_f, "fasta")
blast_fasta_f.close()
# blast fasta to msa : clustaw
clustalo_com="%s -i %s -o %s --outfmt=fasta --log=%s.log" % (clustalo_fh,blast_fasta_fh,msa_fh,msa_fh)
log_fh="%s.log" % clustalo_fh
log_f = open(log_fh,'a')
subprocess.call(clustalo_com,shell=True,stdout=log_f, stderr=subprocess.STDOUT)
log_f.close()
# msa to consrv skbio==0.4.1
msa = TabularMSA.read(msa_fh, constructor=Protein)
msa.reassign_index(minter='id')
metric='inverse_shannon_uncertainty'
gap_modes=['ignore','include']
gap_modes_labels=['gaps ignored','gaps included'] #'no gaps',
data_feats_conserv=pd.DataFrame(columns=["aasi"])
for fsta_data in SeqIO.parse(msa_fh,'fasta'):
if ref_id in fsta_data.id:
ref_seq=fsta_data.seq
break
for gap_mode in gap_modes:
positional_conservation = msa.conservation(metric=metric, degenerate_mode='nan', gap_mode=gap_mode)
aai=1
for i in range(len(ref_seq)):
if ref_seq[i]!="-":
data_feats_conserv.loc[i,"aasi"]=aai
data_feats_conserv.loc[i,"Conservation score (inverse shannon uncertainty): %s" % (gap_modes_labels[gap_modes.index(gap_mode)])]=positional_conservation[i]
aai+=1
data_feats_conserv=data_feats_conserv.set_index("aasi")
# rate4site_rates=["-Im","-Ib"]
# rate4site_rates_labels=["maximum likelihood rates","empirical Bayes rates"]
rate4site_rates=["-Ib"]
# rate4site_rates_labels=["maximum likelihood rates","empirical Bayes rates"]
rate4site_trees=["-zj","-zn"]
rate4site_trees_labels=["Jukes-Cantor distances","maximum likelihood distances"]
data_feats_conserv_rate4site=pd.DataFrame()
data_feats_conserv_rate4site.index.name="aasi"
rate4site_out_fh="%s.rate4site" % (splitext(rate4site_fh)[0])
for rate4site_rate in rate4site_rates:
for rate4site_tree in rate4site_trees:
rate4site_out_csv_fh="%s/%s.csv%s%s" % (dirname(rate4site_out_fh),ref_id,rate4site_rate,rate4site_tree)
if not exists(rate4site_out_csv_fh):
rate4site_com="%s -s %s -o %s -x %s_x -y %s_y -a %s %s %s;rm r4s.res" % \
(rate4site_fh,msa_fh,rate4site_out_fh,rate4site_out_fh,rate4site_out_fh,ref_id,rate4site_rate,rate4site_tree)
#.........这里部分代码省略.........