本文整理汇总了Python中cogent.core.alignment.SequenceCollection.toFasta方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceCollection.toFasta方法的具体用法?Python SequenceCollection.toFasta怎么用?Python SequenceCollection.toFasta使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cogent.core.alignment.SequenceCollection
的用法示例。
在下文中一共展示了SequenceCollection.toFasta方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
"""Returns a tree from Alignment object aln.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one.
moltype: cogent.core.moltype.MolType object
best_tree: if True (default:False), uses a slower but more accurate
algorithm to build the tree.
params: dict of parameters to pass in to the Clustal app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
"""
# Create instance of app controller, enable tree, disable alignment
app = Clustalw(InputHandler="_input_as_multiline_string", params=params, WorkingDir="/tmp")
app.Parameters["-align"].off()
# Set params to empty dict if None.
if params is None:
params = {}
if moltype == DNA or moltype == RNA:
params["-type"] = "d"
elif moltype == PROTEIN:
params["-type"] = "p"
else:
raise ValueError, "moltype must be DNA, RNA, or PROTEIN"
# best_tree -> bootstrap
if best_tree:
if "-bootstrap" not in params:
app.Parameters["-bootstrap"].on(1000)
if "-seed" not in params:
app.Parameters["-seed"].on(randint(0, 1000))
if "-bootlabels" not in params:
app.Parameters["-bootlabels"].on("nodes")
else:
app.Parameters["-tree"].on()
# Setup mapping. Clustalw clips identifiers. We will need to remap them.
seq_collection = SequenceCollection(aln)
int_map, int_keys = seq_collection.getIntMap()
int_map = SequenceCollection(int_map)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
for node in tree.tips():
node.Name = int_keys[node.Name]
# Clean up
result.cleanUp()
del (seq_collection, app, result, int_map, int_keys)
return tree
示例2: build_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
"""Returns a tree from alignment
Will check MolType of aln object
"""
if params is None:
params = {}
if moltype == DNA or moltype == RNA:
params["-nt"] = True
elif moltype == PROTEIN:
params["-nt"] = False
else:
raise ValueError, "FastTree does not support moltype: %s" % moltype.label
if best_tree:
params["-slow"] = True
# Create mapping between abbreviated IDs and full IDs
int_map, int_keys = aln.getIntMap()
# Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map, MolType=moltype)
app = FastTree(params=params)
result = app(int_map.toFasta())
tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
# remap tip names
for tip in tree.tips():
tip.Name = int_keys[tip.Name]
return tree
示例3: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def align_unaligned_seqs(seqs, moltype, params=None):
"""Returns an Alignment object from seqs.
seqs: cogent.core.alignment.SequenceCollection object, or data that can be
used to build one.
moltype: a MolType object. DNA, RNA, or PROTEIN.
params: dict of parameters to pass in to the Clustal app controller.
Result will be a cogent.core.alignment.Alignment object.
"""
# create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
# Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map, MolType=moltype)
# Create Clustalw app.
app = Clustalw(InputHandler="_input_as_multiline_string", params=params)
# Get results using int_map as input to app
res = app(int_map.toFasta())
# Get alignment as dict out of results
alignment = dict(ClustalParser(res["Align"].readlines()))
# Make new dict mapping original IDs
new_alignment = {}
for k, v in alignment.items():
new_alignment[int_keys[k]] = v
# Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment, MolType=moltype)
# Clean up
res.cleanUp()
del (seq_collection, int_map, int_keys, app, res, alignment)
return new_alignment
示例4: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def add_seqs_to_alignment(seqs, aln, moltype, params=None):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.alignment.SequenceCollection object, or data that can
be used to build one.
aln: a cogent.core.alignment.Alignment object, or data that can be used to
build one
params: dict of parameters to pass in to the Clustal app controller.
"""
# create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
seq_int_map, seq_int_keys = seq_collection.getIntMap()
# Create SequenceCollection from int_map.
seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)
# create Alignment object from aln
aln = Alignment(aln, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln.getIntMap(prefix="seqn_")
# Create SequenceCollection from int_map.
aln_int_map = Alignment(aln_int_map, MolType=moltype)
# Update seq_int_keys with aln_int_keys
seq_int_keys.update(aln_int_keys)
# Create Mafft app.
app = Clustalw(InputHandler="_input_as_multiline_string", params=params, SuppressStderr=True)
app.Parameters["-align"].off()
app.Parameters["-infile"].off()
app.Parameters["-sequences"].on()
# Add aln_int_map as profile1
app.Parameters["-profile1"].on(app._tempfile_as_multiline_string(aln_int_map.toFasta()))
# Add seq_int_map as profile2
app.Parameters["-profile2"].on(app._tempfile_as_multiline_string(seq_int_map.toFasta()))
# Get results using int_map as input to app
res = app()
# Get alignment as dict out of results
alignment = dict(ClustalParser(res["Align"].readlines()))
# Make new dict mapping original IDs
new_alignment = {}
for k, v in alignment.items():
new_alignment[seq_int_keys[k]] = v
# Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment, MolType=moltype)
# Clean up
res.cleanUp()
remove(app.Parameters["-profile1"].Value)
remove(app.Parameters["-profile2"].Value)
del (seq_collection, seq_int_map, seq_int_keys, aln, aln_int_map, aln_int_keys, app, res, alignment)
return new_alignment
示例5: bootstrap_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
"""Returns a tree from Alignment object aln with bootstrap support values.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one.
seed: an interger, seed value to use
num_trees: an integer, number of trees to bootstrap against
params: dict of parameters to pass in to the Clustal app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
If seed is not specifed in params, a random integer between 0-1000 is used.
"""
# Create instance of controllor, enable bootstrap, disable alignment,tree
app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
WorkingDir='/tmp')
app.Parameters['-align'].off()
app.Parameters['-tree'].off()
if app.Parameters['-bootstrap'].isOff():
if num_trees is None:
num_trees = 1000
app.Parameters['-bootstrap'].on(num_trees)
if app.Parameters['-seed'].isOff():
if seed is None:
seed = randint(0,1000)
app.Parameters['-seed'].on(seed)
if app.Parameters['-bootlabels'].isOff():
app.Parameters['-bootlabels'].on("node")
# Setup mapping. Clustalw clips identifiers. We will need to remap them.
seq_collection = SequenceCollection(aln)
int_map, int_keys = seq_collection.getIntMap()
int_map = SequenceCollection(int_map)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
for node in tree.tips():
node.Name = int_keys[node.Name]
# Clean up
result.cleanUp()
del(seq_collection, app, result, int_map, int_keys)
return tree
示例6: cdhit_clusters_from_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def cdhit_clusters_from_seqs(seqs, moltype, params=None):
"""Returns the CD-HIT clusters given seqs
seqs : dict like collection of sequences
moltype : cogent.core.moltype object
params : cd-hit parameters
NOTE: This method will call CD_HIT if moltype is PROTIEN,
CD_HIT_EST if moltype is RNA/DNA, and raise if any other
moltype is passed.
"""
# keys are not remapped. Tested against seq_ids of 100char length
seqs = SequenceCollection(seqs, MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seqs.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
# setup params and make sure the output argument is set
if params is None:
params = {}
if '-o' not in params:
params['-o'] = get_tmp_filename()
# call the correct version of cd-hit base on moltype
working_dir = get_tmp_filename()
if moltype is PROTEIN:
app = CD_HIT(WorkingDir=working_dir, params=params)
elif moltype is RNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
elif moltype is DNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
else:
raise ValueError, "Moltype must be either PROTEIN, RNA, or DNA"
# grab result
res = app(int_map.toFasta())
clusters = parse_cdhit_clstr_file(res['CLSTR'].readlines())
remapped_clusters = []
for c in clusters:
curr = [int_keys[i] for i in c]
remapped_clusters.append(curr)
# perform cleanup
res.cleanUp()
shutil.rmtree(working_dir)
remove(params['-o'] + '.bak.clstr')
return remapped_clusters
示例7: build_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
"""Returns a tree from Alignment object aln.
aln: a cogent.core.alignment.Alignment object, or data that can be used
to build one.
moltype: cogent.core.moltype.MolType object
best_tree: unsupported
params: dict of parameters to pass in to the Muscle app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
"""
# Create instance of app controller, enable tree, disable alignment
app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
WorkingDir='/tmp')
app.Parameters['-clusteronly'].on()
app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
app.Parameters['-seqtype'].on(moltype.label)
seq_collection = SequenceCollection(aln, MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
for tip in tree.tips():
tip.Name = int_keys[tip.Name]
# Clean up
result.cleanUp()
del(seq_collection, app, result)
return tree
示例8: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def align_unaligned_seqs(seqs,moltype,params=None,accurate=False):
"""Aligns unaligned sequences
seqs: either list of sequence objects or list of strings
add_seq_names: boolean. if True, sequence names are inserted in the list
of sequences. if False, it assumes seqs is a list of lines of some
proper format that the program can handle
"""
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
#Create Mafft app.
app = Mafft(InputHandler='_input_as_multiline_string',params=params)
#Turn on correct moltype
moltype_string = moltype.label.upper()
app.Parameters[MOLTYPE_MAP[moltype_string]].on()
#Do not report progress
app.Parameters['--quiet'].on()
#More accurate alignment, sacrificing performance.
if accurate:
app.Parameters['--globalpair'].on()
app.Parameters['--maxiterate'].Value=1000
#Get results using int_map as input to app
res = app(int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['StdOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
new_alignment[int_keys[k]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
del(seq_collection,int_map,int_keys,app,res,alignment)
return new_alignment
示例9: cdhit_from_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def cdhit_from_seqs(seqs, moltype, params=None):
"""Returns the CD-HIT results given seqs
seqs : dict like collection of sequences
moltype : cogent.core.moltype object
params : cd-hit parameters
NOTE: This method will call CD_HIT if moltype is PROTIEN,
CD_HIT_EST if moltype is RNA/DNA, and raise if any other
moltype is passed.
"""
# keys are not remapped. Tested against seq_ids of 100char length
seqs = SequenceCollection(seqs, MolType=moltype)
# setup params and make sure the output argument is set
if params is None:
params = {}
if '-o' not in params:
params['-o'] = get_tmp_filename()
# call the correct version of cd-hit base on moltype
working_dir = get_tmp_filename()
if moltype is PROTEIN:
app = CD_HIT(WorkingDir=working_dir, params=params)
elif moltype is RNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
elif moltype is DNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
else:
raise ValueError, "Moltype must be either PROTEIN, RNA, or DNA"
# grab result
res = app(seqs.toFasta())
new_seqs = dict(MinimalFastaParser(res['FASTA'].readlines()))
# perform cleanup
res.cleanUp()
shutil.rmtree(working_dir)
remove(params['-o'] + '.bak.clstr')
return SequenceCollection(new_seqs, MolType=moltype)
示例10: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def align_unaligned_seqs(seqs, moltype, params=None):
"""Returns an Alignment object from seqs.
seqs: SequenceCollection object, or data that can be used to build one.
moltype: a MolType object. DNA, RNA, or PROTEIN.
params: dict of parameters to pass in to the Muscle app controller.
Result will be an Alignment object.
"""
if not params:
params = {}
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
#get temporary filename
params.update({'-out':get_tmp_filename()})
#Create Muscle app.
app = Muscle(InputHandler='_input_as_multiline_string',\
params=params)
#Get results using int_map as input to app
res = app(int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
new_alignment[int_keys[k]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
del(seq_collection,int_map,int_keys,app,res,alignment,params)
return new_alignment
示例11: align_two_alignments
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def align_two_alignments(aln1, aln2, params=None):
"""Returns an Alignment object from two existing Alignments.
aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
used to build them.
params: dict of parameters to pass in to the Muscle app controller.
"""
if not params:
params = {}
#create SequenceCollection object from aln1
aln1_collection = SequenceCollection(aln1)
#Create mapping between abbreviated IDs and full IDs
aln1_int_map, aln1_int_keys = aln1_collection.getIntMap(prefix='aln1_')
#Create SequenceCollection from int_map.
aln1_int_map = SequenceCollection(aln1_int_map)
#create SequenceCollection object from aln2
aln2_collection = SequenceCollection(aln2)
#Create mapping between abbreviated IDs and full IDs
aln2_int_map, aln2_int_keys = aln2_collection.getIntMap(prefix='aln2_')
#Create SequenceCollection from int_map.
aln2_int_map = SequenceCollection(aln2_int_map)
#set output and profile options
params.update({'-out':get_tmp_filename(), '-profile':True})
#save aln1 to tmp file
aln1_filename = get_tmp_filename()
aln1_out = open(aln1_filename,'w')
aln1_out.write(aln1_int_map.toFasta())
aln1_out.close()
#save aln2 to tmp file
aln2_filename = get_tmp_filename()
aln2_out = open(aln2_filename, 'w')
aln2_out.write(aln2_int_map.toFasta())
aln2_out.close()
#Create Muscle app and get results
app = Muscle(InputHandler='_input_as_multifile', params=params)
res = app((aln1_filename, aln2_filename))
#Get alignment as dict out of results
alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
if k in aln1_int_keys:
new_alignment[aln1_int_keys[k]] = v
else:
new_alignment[aln2_int_keys[k]] = v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment)
#Clean up
res.cleanUp()
del(aln1_collection, aln1_int_map, aln1_int_keys)
del(aln2_collection, aln2_int_map, aln2_int_keys)
del(app, res, alignment, params)
remove(aln1_filename)
remove(aln2_filename)
return new_alignment
示例12: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.sequence.Sequence object, or data that can be used
to build one.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one
params: dict of parameters to pass in to the Mafft app controller.
"""
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
seq_int_map, seq_int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
#create Alignment object from aln
aln = Alignment(aln,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
#Create SequenceCollection from int_map.
aln_int_map = Alignment(aln_int_map,MolType=moltype)
#Update seq_int_keys with aln_int_keys
seq_int_keys.update(aln_int_keys)
#Create Mafft app.
app = Mafft(InputHandler='_input_as_multiline_string',\
params=params,
SuppressStderr=True)
#Turn on correct moltype
moltype_string = moltype.label.upper()
app.Parameters[MOLTYPE_MAP[moltype_string]].on()
#Do not report progress
app.Parameters['--quiet'].on()
#Add aln_int_map as seed alignment
app.Parameters['--seed'].on(\
app._tempfile_as_multiline_string(aln_int_map.toFasta()))
#More accurate alignment, sacrificing performance.
if accurate:
app.Parameters['--globalpair'].on()
app.Parameters['--maxiterate'].Value=1000
#Get results using int_map as input to app
res = app(seq_int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['StdOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
key = k.replace('_seed_','')
new_alignment[seq_int_keys[key]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
remove(app.Parameters['--seed'].Value)
del(seq_collection,seq_int_map,seq_int_keys,\
aln,aln_int_map,aln_int_keys,app,res,alignment)
return new_alignment
示例13: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def add_seqs_to_alignment(seqs, aln, params=None):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.alignment.SequenceCollection object, or data that can
be used to build one.
aln: a cogent.core.alignment.Alignment object, or data that can be used
to build one
params: dict of parameters to pass in to the Muscle app controller.
"""
if not params:
params = {}
#create SequenceCollection object from seqs
seqs_collection = SequenceCollection(seqs)
#Create mapping between abbreviated IDs and full IDs
seqs_int_map, seqs_int_keys = seqs_collection.getIntMap(prefix='seq_')
#Create SequenceCollection from int_map.
seqs_int_map = SequenceCollection(seqs_int_map)
#create SequenceCollection object from aln
aln_collection = SequenceCollection(aln)
#Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln_collection.getIntMap(prefix='aln_')
#Create SequenceCollection from int_map.
aln_int_map = SequenceCollection(aln_int_map)
#set output and profile options
params.update({'-out':get_tmp_filename(), '-profile':True})
#save seqs to tmp file
seqs_filename = get_tmp_filename()
seqs_out = open(seqs_filename,'w')
seqs_out.write(seqs_int_map.toFasta())
seqs_out.close()
#save aln to tmp file
aln_filename = get_tmp_filename()
aln_out = open(aln_filename, 'w')
aln_out.write(aln_int_map.toFasta())
aln_out.close()
#Create Muscle app and get results
app = Muscle(InputHandler='_input_as_multifile', params=params,
WorkingDir=tempfile.gettempdir())
res = app((aln_filename, seqs_filename))
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['MuscleOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
if k in seqs_int_keys:
new_alignment[seqs_int_keys[k]] = v
else:
new_alignment[aln_int_keys[k]] = v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment)
#Clean up
res.cleanUp()
del(seqs_collection, seqs_int_map, seqs_int_keys)
del(aln_collection, aln_int_map, aln_int_keys)
del(app, res, alignment, params)
remove(seqs_filename)
remove(aln_filename)
return new_alignment
示例14: create_locarnap_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import toFasta [as 别名]
def create_locarnap_alignment(seqs,moltype,struct=False,params=None):
"""Returns mlocarna results given an unaligned SequenceCollection.
- seqs: A SequenceCollection object or something that behaves like one.
- moltype: cogent.core.moltype object.
-struct: Boolean whether or not to also output vienna structure string
"""
#Construct SequenceCollection object.
seqs = SequenceCollection(seqs,MolType=moltype)
#need to make int map.
int_map, int_keys = seqs.getIntMap()
#construct SequenceCollection object from int map to use functionality
int_map = SequenceCollection(int_map, MolType=moltype)
#Create application.
app = MLocarna(InputHandler='_input_as_multiline_string',params=params)
#Get temporary directory to write all mlocarna files.
mlocarna_dir = get_tmp_filename(suffix='')
app.Parameters['--tgtdir'].on(mlocarna_dir)
#set parameters to run locarna-p
app.Parameters['--write-structure'].on()
app.Parameters['--probabilistic'].on()
app.Parameters['--consistency-transformation'].on()
res = app(int_map.toFasta())
#get the structure from the results if necessary
if struct:
structfile = open(res['ProbabilisticAlignment'].name, 'U')
structure = ""
newstrline = True
for line in structfile:
line = line.strip()
#read in structure lines of alignment (--write-structure)
if len(line) > 0 and (line[0] == "." or line[0] == "("):
#only append if new structure aspect, since struct is
#written both above and below blocks in alignment
if newstrline:
structure += line
newstrline = not newstrline
else:
newstrline = not newstrline
aligned = dict(ClustalParser(res['ClustalAlignment']))
#Make new dict mapping original IDs
new_alignment={}
for k,v in aligned.items():
new_alignment[int_keys.get(k,k)]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up after MlocARNA
res.cleanUp()
shutil.rmtree(mlocarna_dir)
#output alignment and structure if asked for, else outout just alignment
if struct:
return new_alignment, structure
else:
return new_alignment