当前位置: 首页>>代码示例>>Python>>正文


Python TabularMSA.read方法代码示例

本文整理汇总了Python中skbio.TabularMSA.read方法的典型用法代码示例。如果您正苦于以下问题:Python TabularMSA.read方法的具体用法?Python TabularMSA.read怎么用?Python TabularMSA.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在skbio.TabularMSA的用法示例。


在下文中一共展示了TabularMSA.read方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: filter_positions

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def filter_positions(alignment_fh, maximum_gap_frequency,
                     maximum_position_entropy):
    """Filter gaps and high entropy positions from an alignment."""

    with alignment_fh:
        try:
            aln = TabularMSA.read(alignment_fh, constructor=DNA)
        except ValueError:
            alignment_fh.seek(0)
            aln = TabularMSA.read(alignment_fh, constructor=RNA)
    aln = _filter_gap_positions(aln, maximum_gap_frequency)
    aln = _filter_high_entropy_positions(aln, maximum_position_entropy)
    return aln
开发者ID:JTFouquier,项目名称:ghost-tree,代码行数:15,代码来源:filter.py

示例2: test_reformat_treepuzzle

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
 def test_reformat_treepuzzle(self):
     """ Test functionality of reformat_treepuzzle()
     """
     species_tree = TreeNode.read(self.species_tree_fp, format='newick')
     gene_tree_3 = TreeNode.read(self.gene_tree_3_fp, format='newick')
     output_tree_fp = join(self.working_dir, "joined_trees.nwk")
     output_msa_phy_fp = join(self.working_dir, "gene_tree_3.phy")
     reformat_treepuzzle(gene_tree_3,
                         species_tree,
                         self.msa_fa_3_fp,
                         output_tree_fp,
                         output_msa_phy_fp)
     reformat_tree_exp = [
         "(((((((SE001:2.1494877,SE010:1.08661):3.7761166,SE008:"
         "0.86305436):0.21024487,(SE006:0.56704221,SE009:0.5014676):"
         "0.90294223):0.20542323,SE005:3.0992506):0.37145632,SE004:"
         "1.8129133):0.72933621,SE003:1.737411):0.24447835,(SE002:"
         "1.6606127,SE007:0.70000178):1.6331374);\n",
         "(((((((SE001:2.1494876,SE010:2.1494876):"
         "3.7761166,SE008:5.9256042):0.2102448,(SE006:"
         "5.2329068,SE009:5.2329068):0.9029422):0.2054233,"
         "SE005:6.3412723):0.3714563,SE004:6.7127286):"
         "0.7293362,SE003:7.4420648):0.2444784,SE002:"
         "7.6865432);\n"]
     with open(output_tree_fp, 'r') as output_tree_f:
         reformat_tree_act = output_tree_f.readlines()
     self.assertListEqual(reformat_tree_exp, reformat_tree_act)
     msa_fa = TabularMSA.read(output_msa_phy_fp, constructor=Protein)
     labels_exp = [u'SE001', u'SE002', u'SE003', u'SE004', u'SE005',
                   u'SE006', u'SE008', u'SE009', u'SE010']
     labels_act = list(msa_fa.index)
     self.assertListEqual(labels_exp, labels_act)
开发者ID:carlyboyd,项目名称:WGS-HGT,代码行数:34,代码来源:test_reformat_input.py

示例3: aln_distmat

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def aln_distmat(alignment, reps=3):
    '''Calculate pairwise distances from a MSA of genomes'''
    aln = TabularMSA.read(alignment, constructor=DNA)
    aln.reassign_index(minter="id")
    dist = DistanceMatrix.from_iterable([seq.values for seq in aln],
                                        metric=hamming, keys=aln.index)
    return dist
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:9,代码来源:alndist.py

示例4: reformat_treepuzzle

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def reformat_treepuzzle(gene_tree,
                        species_tree,
                        gene_msa_fa_fp,
                        output_tree_fp,
                        output_msa_phy_fp):
    """ Reformat input trees to the format accepted by Tree-Puzzle.

    Parameters
    ----------
    gene_tree: skbio.TreeNode
        TreeNode instance for gene tree
    species_tree_fp: skbio.TreeNode
        TreeNode instance for species tree
    gene_msa_fa_fp: string
        file path to gene alignments in FASTA format
    output_tree_fp: string
        file path to output trees (Nexus format)
    output_msa_phy_fp: string
        file path to output MSA in PHYLIP format

    See Also
    --------
    skbio.TreeNode
    """
    # remove the root branch length (output with ALF)
    for node in gene_tree.postorder():
        if node.is_root():
            node.length = None
    for node in species_tree.postorder():
        if node.is_root():
            node.length = None
    # trim gene tree leaves to exclude '_GENENAME' (if exists)
    trim_gene_tree_leaves(gene_tree)
    join_trees(gene_tree,
               species_tree,
               output_tree_fp)
    # trim FASTA sequence labels to exclude '/GENENAME' (if exists)
    msa_fa = TabularMSA.read(gene_msa_fa_fp, constructor=Protein)
    msa_fa.reassign_index(minter='id')
    mapping = id_mapper(msa_fa.index)
    msa_fa.reassign_index(mapping=mapping)
    msa_fa.write(output_msa_phy_fp, format='phylip')
开发者ID:qiyunzhu,项目名称:WGS-HGT,代码行数:44,代码来源:reformat_input.py

示例5: read_msa_file

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
    def read_msa_file(self):

        msa = TabularMSA.read(self.msa_file, constructor=Protein)
        msa.reassign_index(minter='id')

        return msa
开发者ID:Mazzafish,项目名称:neoantigen,代码行数:8,代码来源:msa_utils.py

示例6: get_consrv_score

# 需要导入模块: from skbio import TabularMSA [as 别名]
# 或者: from skbio.TabularMSA import read [as 别名]
def get_consrv_score(fsta_fh,host,clustalo_fh,rate4site_fh):
    """
    Extracts Residue wise conservation scores 

    :param fsta_fh: path to FASTA file
    :param host: host name e.g. E. coli
    :param clustalo_fh: path to Clustalo libraries
    :param rate4site_fh: path to rate4site libraries
    :returns data_conserv: pandas table with conservation scores per residue
    """
    from Bio.Blast import NCBIWWW
    from Bio.Blast import NCBIXML
    from skbio import TabularMSA, Protein
    from Bio.Alphabet import IUPAC
    from dms2dfe.lib.io_seq_files import fasta_nts2prt
    blast_method='blastp'
    blast_db="swissprot"
    
    # make prt fasta
    fsta_prt_fh="%s_prt%s" % (splitext(fsta_fh)[0],splitext(fsta_fh)[1])
    if not exists(fsta_prt_fh):
        prt_seq=fasta_nts2prt(fsta_fh,host=host)

    for fsta_data in SeqIO.parse(fsta_prt_fh,'fasta'):
        ref_id=fsta_data.id
        prt_seq=str(fsta_data.seq)
        break

    blast_fh="%s_blastp.xml" % (splitext(fsta_fh)[0])
    blast_fasta_fh="%s.fasta" % (splitext(blast_fh)[0])
    msa_fh=blast_fasta_fh.replace("blastp","blastp_msa")
    if not exists(msa_fh):
        if not exists(blast_fasta_fh):
            if not exists(blast_fh):
                # get blast
                blast_handle = NCBIWWW.qblast(blast_method, 
                               blast_db, sequence=prt_seq)
                blast_results = blast_handle.read()
                blast_f=open(blast_fh, 'w')
                blast_f.write(blast_results)
                blast_f.close()
            blast_f = open(blast_fh, 'r')
            blast_records=NCBIXML.parse(blast_f)

            # blast to fasta
            blast_fasta_f=open(blast_fasta_fh, 'w')
            fsta_data=[]
            fsta_data.append(SeqRecord.SeqRecord(Seq.Seq(prt_seq,IUPAC.protein),
                                 id = ref_id,description=''))
            for rec in blast_records:
                for aln in rec.alignments:
                    for hsp in aln.hsps:
                        fsta_data.append(SeqRecord.SeqRecord(Seq.Seq(hsp.sbjct,IUPAC.protein),
                                 id = aln.hit_id,description=''))
            SeqIO.write(fsta_data, blast_fasta_f, "fasta")
            blast_fasta_f.close()
    # blast fasta to msa : clustaw
        clustalo_com="%s -i %s -o %s --outfmt=fasta --log=%s.log" % (clustalo_fh,blast_fasta_fh,msa_fh,msa_fh)
        log_fh="%s.log" % clustalo_fh
        log_f = open(log_fh,'a')
        subprocess.call(clustalo_com,shell=True,stdout=log_f, stderr=subprocess.STDOUT)
        log_f.close()
    # msa to consrv skbio==0.4.1
    msa = TabularMSA.read(msa_fh, constructor=Protein)
    msa.reassign_index(minter='id')
    metric='inverse_shannon_uncertainty'
    gap_modes=['ignore','include']
    gap_modes_labels=['gaps ignored','gaps included'] #'no gaps',
    data_feats_conserv=pd.DataFrame(columns=["aasi"])

    for fsta_data in SeqIO.parse(msa_fh,'fasta'):
        if ref_id in fsta_data.id:
            ref_seq=fsta_data.seq
            break

    for gap_mode in gap_modes:
        positional_conservation = msa.conservation(metric=metric, degenerate_mode='nan', gap_mode=gap_mode)
        aai=1
        for i in range(len(ref_seq)):
            if ref_seq[i]!="-":
                data_feats_conserv.loc[i,"aasi"]=aai
                data_feats_conserv.loc[i,"Conservation score (inverse shannon uncertainty): %s" % (gap_modes_labels[gap_modes.index(gap_mode)])]=positional_conservation[i]
                aai+=1
    data_feats_conserv=data_feats_conserv.set_index("aasi")

    # rate4site_rates=["-Im","-Ib"]
    # rate4site_rates_labels=["maximum likelihood rates","empirical Bayes rates"]
    rate4site_rates=["-Ib"]
    # rate4site_rates_labels=["maximum likelihood rates","empirical Bayes rates"]
    rate4site_trees=["-zj","-zn"]
    rate4site_trees_labels=["Jukes-Cantor distances","maximum likelihood distances"]
    data_feats_conserv_rate4site=pd.DataFrame()
    data_feats_conserv_rate4site.index.name="aasi"
    rate4site_out_fh="%s.rate4site" % (splitext(rate4site_fh)[0])
    for rate4site_rate in rate4site_rates:
        for rate4site_tree in rate4site_trees:
            rate4site_out_csv_fh="%s/%s.csv%s%s" % (dirname(rate4site_out_fh),ref_id,rate4site_rate,rate4site_tree)
            if not exists(rate4site_out_csv_fh):
                rate4site_com="%s -s %s -o %s -x %s_x -y %s_y -a %s %s %s;rm r4s.res" % \
                (rate4site_fh,msa_fh,rate4site_out_fh,rate4site_out_fh,rate4site_out_fh,ref_id,rate4site_rate,rate4site_tree)
#.........这里部分代码省略.........
开发者ID:kc-lab,项目名称:dms2dfe,代码行数:103,代码来源:get_protein_features.py


注:本文中的skbio.TabularMSA.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。