本文整理汇总了Python中skbio.Alignment.read方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.read方法的具体用法?Python Alignment.read怎么用?Python Alignment.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类skbio.Alignment
的用法示例。
在下文中一共展示了Alignment.read方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: align_two_alignments
# 需要导入模块: from skbio import Alignment [as 别名]
# 或者: from skbio.Alignment import read [as 别名]
def align_two_alignments(aln1_fp, aln2_fp, moltype, params=None):
"""Returns an Alignment object from two existing Alignments.
Parameters
----------
aln1_fp : string
file path of 1st alignment
aln2_fp : string
file path of 2nd alignment
params : dict of parameters to pass in to the Mafft app controller.
Returns
-------
The aligned sequences.
"""
# Create Mafft app.
app = Mafft(InputHandler='_input_as_paths',
params=params,
SuppressStderr=False)
app._command = 'mafft-profile'
# Get results using int_map as input to app
res = app([aln1_fp, aln2_fp])
return Alignment.read(res['StdOut'], constructor=moltype)
示例2: align_unaligned_seqs
# 需要导入模块: from skbio import Alignment [as 别名]
# 或者: from skbio.Alignment import read [as 别名]
def align_unaligned_seqs(seqs_fp, moltype=DNA, params=None, accurate=False):
"""Aligns unaligned sequences
Parameters
----------
seqs_fp : string
file path of the input fasta file
moltype : {skbio.DNA, skbio.RNA, skbio.Protein}
params : dict-like type
It pass the additional parameter settings to the application.
Default is None.
accurate : boolean
Perform accurate alignment or not. It will sacrifice performance
if set to True. Default is False.
Returns
-------
Alignment object
The aligned sequences.
See Also
--------
skbio.Alignment
skbio.DNA
skbio.RNA
skbio.Protein
"""
# Create Mafft app.
app = Mafft(InputHandler='_input_as_path', params=params)
# Turn on correct sequence type
app.Parameters[MOLTYPE_MAP[moltype]].on()
# Do not report progress
app.Parameters['--quiet'].on()
# More accurate alignment, sacrificing performance.
if accurate:
app.Parameters['--globalpair'].on()
app.Parameters['--maxiterate'].Value = 1000
# Get results using int_map as input to app
res = app(seqs_fp)
# Get alignment as dict out of results
alignment = Alignment.read(res['StdOut'], constructor=moltype)
# Clean up
res.cleanUp()
return alignment
示例3: reformat_treepuzzle
# 需要导入模块: from skbio import Alignment [as 别名]
# 或者: from skbio.Alignment import read [as 别名]
def reformat_treepuzzle(gene_tree,
species_tree,
gene_msa_fa_fp,
output_tree_fp,
output_msa_phy_fp):
""" Reformat input trees to the format accepted by Tree-Puzzle
Parameters
----------
gene_tree: skbio.TreeNode
TreeNode instance for gene tree
species_tree_fp: skbio.TreeNode
TreeNode instance for species tree
gene_msa_fa_fp: string
file path to gene alignments in FASTA format
output_tree_fp: string
file path to output trees (Nexus format)
output_msa_phy_fp: string
file path to output MSA in PHYLIP format
See Also
--------
skbio.TreeNode
"""
# remove the root branch length (output with ALF)
for node in gene_tree.postorder():
if node.is_root():
node.length = None
for node in species_tree.postorder():
if node.is_root():
node.length = None
# trim gene tree leaves to exclude '_GENENAME' (if exists)
trim_gene_tree_leaves(gene_tree)
join_trees(gene_tree,
species_tree,
output_tree_fp)
# trim FASTA sequence labels to exclude '/GENENAME' (if exists)
msa_fa = Alignment.read(gene_msa_fa_fp, format='fasta')
msa_fa_update_ids, new_to_old_ids = msa_fa.update_ids(func=id_mapper)
msa_fa_update_ids.write(output_msa_phy_fp, format='phylip')
示例4: parse_deblur_output
# 需要导入模块: from skbio import Alignment [as 别名]
# 或者: from skbio.Alignment import read [as 别名]
def parse_deblur_output(seqs_fp, derep_clusters):
""" Parse deblur output file into an OTU map.
Parameters
----------
seqs_fp: string
file path to deblurred sequences
derep_clusters: dictionary
dictionary of dereplicated sequences map
Returns
-------
clusters: dictionary
dictionary of clusters including dereplicated sequence labels
Notes
-----
For each deblurred sequence in seqs_fp, use the sequence label to
obtain all dereplicated sequence labels belonging to it
(from derep_clusters) to create entries in a new dictionary where the keys
are actual sequences (not the labels). Note not all sequences
in derep_clusters will be in seqs_fp since they could have been removed in
the artifact filtering step.
"""
clusters = {}
# Replace representative sequence name with actual sequence in cluster
msa_fa = Alignment.read(seqs_fp, format='fasta')
for label, seq in Alignment.iteritems(msa_fa):
cluster_id = label.split(';')[0]
seq2 = str(seq.degap())
if seq2 not in clusters:
clusters[seq2] = []
if cluster_id not in derep_clusters:
raise ValueError(
'Seed ID %s does not exist in .uc file' % cluster_id)
else:
clusters[seq2].extend(derep_clusters[cluster_id])
return clusters