本文整理汇总了Python中cogent.core.alignment.SequenceCollection.getIntMap方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceCollection.getIntMap方法的具体用法?Python SequenceCollection.getIntMap怎么用?Python SequenceCollection.getIntMap使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cogent.core.alignment.SequenceCollection
的用法示例。
在下文中一共展示了SequenceCollection.getIntMap方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
"""Returns a tree from Alignment object aln.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one.
moltype: cogent.core.moltype.MolType object
best_tree: if True (default:False), uses a slower but more accurate
algorithm to build the tree.
params: dict of parameters to pass in to the Clustal app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
"""
# Create instance of app controller, enable tree, disable alignment
app = Clustalw(InputHandler="_input_as_multiline_string", params=params, WorkingDir="/tmp")
app.Parameters["-align"].off()
# Set params to empty dict if None.
if params is None:
params = {}
if moltype == DNA or moltype == RNA:
params["-type"] = "d"
elif moltype == PROTEIN:
params["-type"] = "p"
else:
raise ValueError, "moltype must be DNA, RNA, or PROTEIN"
# best_tree -> bootstrap
if best_tree:
if "-bootstrap" not in params:
app.Parameters["-bootstrap"].on(1000)
if "-seed" not in params:
app.Parameters["-seed"].on(randint(0, 1000))
if "-bootlabels" not in params:
app.Parameters["-bootlabels"].on("nodes")
else:
app.Parameters["-tree"].on()
# Setup mapping. Clustalw clips identifiers. We will need to remap them.
seq_collection = SequenceCollection(aln)
int_map, int_keys = seq_collection.getIntMap()
int_map = SequenceCollection(int_map)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
for node in tree.tips():
node.Name = int_keys[node.Name]
# Clean up
result.cleanUp()
del (seq_collection, app, result, int_map, int_keys)
return tree
示例2: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def align_unaligned_seqs(seqs, moltype, params=None):
"""Returns an Alignment object from seqs.
seqs: cogent.core.alignment.SequenceCollection object, or data that can be
used to build one.
moltype: a MolType object. DNA, RNA, or PROTEIN.
params: dict of parameters to pass in to the Clustal app controller.
Result will be a cogent.core.alignment.Alignment object.
"""
# create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
# Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map, MolType=moltype)
# Create Clustalw app.
app = Clustalw(InputHandler="_input_as_multiline_string", params=params)
# Get results using int_map as input to app
res = app(int_map.toFasta())
# Get alignment as dict out of results
alignment = dict(ClustalParser(res["Align"].readlines()))
# Make new dict mapping original IDs
new_alignment = {}
for k, v in alignment.items():
new_alignment[int_keys[k]] = v
# Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment, MolType=moltype)
# Clean up
res.cleanUp()
del (seq_collection, int_map, int_keys, app, res, alignment)
return new_alignment
示例3: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def add_seqs_to_alignment(seqs, aln, moltype, params=None):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.alignment.SequenceCollection object, or data that can
be used to build one.
aln: a cogent.core.alignment.Alignment object, or data that can be used to
build one
params: dict of parameters to pass in to the Clustal app controller.
"""
# create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
seq_int_map, seq_int_keys = seq_collection.getIntMap()
# Create SequenceCollection from int_map.
seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)
# create Alignment object from aln
aln = Alignment(aln, MolType=moltype)
# Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln.getIntMap(prefix="seqn_")
# Create SequenceCollection from int_map.
aln_int_map = Alignment(aln_int_map, MolType=moltype)
# Update seq_int_keys with aln_int_keys
seq_int_keys.update(aln_int_keys)
# Create Mafft app.
app = Clustalw(InputHandler="_input_as_multiline_string", params=params, SuppressStderr=True)
app.Parameters["-align"].off()
app.Parameters["-infile"].off()
app.Parameters["-sequences"].on()
# Add aln_int_map as profile1
app.Parameters["-profile1"].on(app._tempfile_as_multiline_string(aln_int_map.toFasta()))
# Add seq_int_map as profile2
app.Parameters["-profile2"].on(app._tempfile_as_multiline_string(seq_int_map.toFasta()))
# Get results using int_map as input to app
res = app()
# Get alignment as dict out of results
alignment = dict(ClustalParser(res["Align"].readlines()))
# Make new dict mapping original IDs
new_alignment = {}
for k, v in alignment.items():
new_alignment[seq_int_keys[k]] = v
# Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment, MolType=moltype)
# Clean up
res.cleanUp()
remove(app.Parameters["-profile1"].Value)
remove(app.Parameters["-profile2"].Value)
del (seq_collection, seq_int_map, seq_int_keys, aln, aln_int_map, aln_int_keys, app, res, alignment)
return new_alignment
示例4: bootstrap_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
"""Returns a tree from Alignment object aln with bootstrap support values.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one.
seed: an interger, seed value to use
num_trees: an integer, number of trees to bootstrap against
params: dict of parameters to pass in to the Clustal app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
If seed is not specifed in params, a random integer between 0-1000 is used.
"""
# Create instance of controllor, enable bootstrap, disable alignment,tree
app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
WorkingDir='/tmp')
app.Parameters['-align'].off()
app.Parameters['-tree'].off()
if app.Parameters['-bootstrap'].isOff():
if num_trees is None:
num_trees = 1000
app.Parameters['-bootstrap'].on(num_trees)
if app.Parameters['-seed'].isOff():
if seed is None:
seed = randint(0,1000)
app.Parameters['-seed'].on(seed)
if app.Parameters['-bootlabels'].isOff():
app.Parameters['-bootlabels'].on("node")
# Setup mapping. Clustalw clips identifiers. We will need to remap them.
seq_collection = SequenceCollection(aln)
int_map, int_keys = seq_collection.getIntMap()
int_map = SequenceCollection(int_map)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
for node in tree.tips():
node.Name = int_keys[node.Name]
# Clean up
result.cleanUp()
del(seq_collection, app, result, int_map, int_keys)
return tree
示例5: cdhit_clusters_from_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def cdhit_clusters_from_seqs(seqs, moltype, params=None):
"""Returns the CD-HIT clusters given seqs
seqs : dict like collection of sequences
moltype : cogent.core.moltype object
params : cd-hit parameters
NOTE: This method will call CD_HIT if moltype is PROTIEN,
CD_HIT_EST if moltype is RNA/DNA, and raise if any other
moltype is passed.
"""
# keys are not remapped. Tested against seq_ids of 100char length
seqs = SequenceCollection(seqs, MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seqs.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
# setup params and make sure the output argument is set
if params is None:
params = {}
if '-o' not in params:
params['-o'] = get_tmp_filename()
# call the correct version of cd-hit base on moltype
working_dir = get_tmp_filename()
if moltype is PROTEIN:
app = CD_HIT(WorkingDir=working_dir, params=params)
elif moltype is RNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
elif moltype is DNA:
app = CD_HIT_EST(WorkingDir=working_dir, params=params)
else:
raise ValueError, "Moltype must be either PROTEIN, RNA, or DNA"
# grab result
res = app(int_map.toFasta())
clusters = parse_cdhit_clstr_file(res['CLSTR'].readlines())
remapped_clusters = []
for c in clusters:
curr = [int_keys[i] for i in c]
remapped_clusters.append(curr)
# perform cleanup
res.cleanUp()
shutil.rmtree(working_dir)
remove(params['-o'] + '.bak.clstr')
return remapped_clusters
示例6: build_tree_from_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
"""Returns a tree from Alignment object aln.
aln: a cogent.core.alignment.Alignment object, or data that can be used
to build one.
moltype: cogent.core.moltype.MolType object
best_tree: unsupported
params: dict of parameters to pass in to the Muscle app controller.
The result will be an cogent.core.tree.PhyloNode object, or None if tree
fails.
"""
# Create instance of app controller, enable tree, disable alignment
app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
WorkingDir='/tmp')
app.Parameters['-clusteronly'].on()
app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
app.Parameters['-seqtype'].on(moltype.label)
seq_collection = SequenceCollection(aln, MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
# Collect result
result = app(int_map.toFasta())
# Build tree
tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
for tip in tree.tips():
tip.Name = int_keys[tip.Name]
# Clean up
result.cleanUp()
del(seq_collection, app, result)
return tree
示例7: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def align_unaligned_seqs(seqs,moltype,params=None,accurate=False):
"""Aligns unaligned sequences
seqs: either list of sequence objects or list of strings
add_seq_names: boolean. if True, sequence names are inserted in the list
of sequences. if False, it assumes seqs is a list of lines of some
proper format that the program can handle
"""
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
#Create Mafft app.
app = Mafft(InputHandler='_input_as_multiline_string',params=params)
#Turn on correct moltype
moltype_string = moltype.label.upper()
app.Parameters[MOLTYPE_MAP[moltype_string]].on()
#Do not report progress
app.Parameters['--quiet'].on()
#More accurate alignment, sacrificing performance.
if accurate:
app.Parameters['--globalpair'].on()
app.Parameters['--maxiterate'].Value=1000
#Get results using int_map as input to app
res = app(int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['StdOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
new_alignment[int_keys[k]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
del(seq_collection,int_map,int_keys,app,res,alignment)
return new_alignment
示例8: align_unaligned_seqs
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def align_unaligned_seqs(seqs, moltype, params=None):
"""Returns an Alignment object from seqs.
seqs: SequenceCollection object, or data that can be used to build one.
moltype: a MolType object. DNA, RNA, or PROTEIN.
params: dict of parameters to pass in to the Muscle app controller.
Result will be an Alignment object.
"""
if not params:
params = {}
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
int_map, int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
int_map = SequenceCollection(int_map,MolType=moltype)
#get temporary filename
params.update({'-out':get_tmp_filename()})
#Create Muscle app.
app = Muscle(InputHandler='_input_as_multiline_string',\
params=params)
#Get results using int_map as input to app
res = app(int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
new_alignment[int_keys[k]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
del(seq_collection,int_map,int_keys,app,res,alignment,params)
return new_alignment
示例9: align_two_alignments
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def align_two_alignments(aln1, aln2, params=None):
"""Returns an Alignment object from two existing Alignments.
aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
used to build them.
params: dict of parameters to pass in to the Muscle app controller.
"""
if not params:
params = {}
#create SequenceCollection object from aln1
aln1_collection = SequenceCollection(aln1)
#Create mapping between abbreviated IDs and full IDs
aln1_int_map, aln1_int_keys = aln1_collection.getIntMap(prefix='aln1_')
#Create SequenceCollection from int_map.
aln1_int_map = SequenceCollection(aln1_int_map)
#create SequenceCollection object from aln2
aln2_collection = SequenceCollection(aln2)
#Create mapping between abbreviated IDs and full IDs
aln2_int_map, aln2_int_keys = aln2_collection.getIntMap(prefix='aln2_')
#Create SequenceCollection from int_map.
aln2_int_map = SequenceCollection(aln2_int_map)
#set output and profile options
params.update({'-out':get_tmp_filename(), '-profile':True})
#save aln1 to tmp file
aln1_filename = get_tmp_filename()
aln1_out = open(aln1_filename,'w')
aln1_out.write(aln1_int_map.toFasta())
aln1_out.close()
#save aln2 to tmp file
aln2_filename = get_tmp_filename()
aln2_out = open(aln2_filename, 'w')
aln2_out.write(aln2_int_map.toFasta())
aln2_out.close()
#Create Muscle app and get results
app = Muscle(InputHandler='_input_as_multifile', params=params)
res = app((aln1_filename, aln2_filename))
#Get alignment as dict out of results
alignment = dict(MinimalFastaParser(res['MuscleOut'].readlines()))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
if k in aln1_int_keys:
new_alignment[aln1_int_keys[k]] = v
else:
new_alignment[aln2_int_keys[k]] = v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment)
#Clean up
res.cleanUp()
del(aln1_collection, aln1_int_map, aln1_int_keys)
del(aln2_collection, aln2_int_map, aln2_int_keys)
del(app, res, alignment, params)
remove(aln1_filename)
remove(aln2_filename)
return new_alignment
示例10: __call__
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def __call__(self, seq_path, result_path=None, log_path=None, \
failure_path=None, cmbuild_params=None, cmalign_params=None):
log_params = []
# load candidate sequences
candidate_sequences = dict(MinimalFastaParser(open(seq_path,'U')))
# load template sequences
try:
info, template_alignment, struct = list(MinimalRfamParser(open(\
self.Params['template_filepath'],'U'),\
seq_constructor=ChangedSequence))[0]
except RecordError:
raise ValueError, "Template alignment must be in Stockholm format with corresponding secondary structure annotation when using InfernalAligner."
moltype = self.Params['moltype']
#Need to make separate mapping for unaligned sequences
unaligned = SequenceCollection(candidate_sequences,MolType=moltype)
int_map, int_keys = unaligned.getIntMap(prefix='unaligned_')
int_map = SequenceCollection(int_map,MolType=moltype)
#Turn on --gapthresh option in cmbuild to force alignment to full model
if cmbuild_params is None:
cmbuild_params = {}
cmbuild_params.update({'--gapthresh':1.0})
#record cmbuild parameters
log_params.append('cmbuild parameters:')
log_params.append(str(cmbuild_params))
#Turn on --sub option in Infernal, since we know the unaligned sequences
# are fragments.
#Also turn on --gapthresh to use same gapthresh as was used to build
# model
if cmalign_params is None:
cmalign_params = {}
cmalign_params.update({'--sub':True,'--gapthresh':1.0})
#record cmalign parameters
log_params.append('cmalign parameters:')
log_params.append(str(cmalign_params))
#Align sequences to alignment including alignment gaps.
aligned, struct_string = cmalign_from_alignment(aln=template_alignment,\
structure_string=struct,\
seqs=int_map,\
moltype=moltype,\
include_aln=True,\
params=cmalign_params,\
cmbuild_params=cmbuild_params)
#Pull out original sequences from full alignment.
infernal_aligned={}
aligned_dict = aligned.NamedSeqs
for key in int_map.Names:
infernal_aligned[int_keys.get(key,key)]=aligned_dict[key]
#Create an Alignment object from alignment dict
infernal_aligned = Alignment(infernal_aligned,MolType=moltype)
if log_path is not None:
log_file = open(log_path,'w')
log_file.write('\n'.join(log_params))
log_file.close()
if result_path is not None:
result_file = open(result_path,'w')
result_file.write(infernal_aligned.toFasta())
result_file.close()
return None
else:
try:
return infernal_aligned
except ValueError:
return {}
示例11: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def add_seqs_to_alignment(seqs, aln, moltype, params=None, accurate=False):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.sequence.Sequence object, or data that can be used
to build one.
aln: an cogent.core.alignment.Alignment object, or data that can be used
to build one
params: dict of parameters to pass in to the Mafft app controller.
"""
#create SequenceCollection object from seqs
seq_collection = SequenceCollection(seqs,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
seq_int_map, seq_int_keys = seq_collection.getIntMap()
#Create SequenceCollection from int_map.
seq_int_map = SequenceCollection(seq_int_map,MolType=moltype)
#create Alignment object from aln
aln = Alignment(aln,MolType=moltype)
#Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln.getIntMap(prefix='seqn_')
#Create SequenceCollection from int_map.
aln_int_map = Alignment(aln_int_map,MolType=moltype)
#Update seq_int_keys with aln_int_keys
seq_int_keys.update(aln_int_keys)
#Create Mafft app.
app = Mafft(InputHandler='_input_as_multiline_string',\
params=params,
SuppressStderr=True)
#Turn on correct moltype
moltype_string = moltype.label.upper()
app.Parameters[MOLTYPE_MAP[moltype_string]].on()
#Do not report progress
app.Parameters['--quiet'].on()
#Add aln_int_map as seed alignment
app.Parameters['--seed'].on(\
app._tempfile_as_multiline_string(aln_int_map.toFasta()))
#More accurate alignment, sacrificing performance.
if accurate:
app.Parameters['--globalpair'].on()
app.Parameters['--maxiterate'].Value=1000
#Get results using int_map as input to app
res = app(seq_int_map.toFasta())
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['StdOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
key = k.replace('_seed_','')
new_alignment[seq_int_keys[key]]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up
res.cleanUp()
remove(app.Parameters['--seed'].Value)
del(seq_collection,seq_int_map,seq_int_keys,\
aln,aln_int_map,aln_int_keys,app,res,alignment)
return new_alignment
示例12: add_seqs_to_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def add_seqs_to_alignment(seqs, aln, params=None):
"""Returns an Alignment object from seqs and existing Alignment.
seqs: a cogent.core.alignment.SequenceCollection object, or data that can
be used to build one.
aln: a cogent.core.alignment.Alignment object, or data that can be used
to build one
params: dict of parameters to pass in to the Muscle app controller.
"""
if not params:
params = {}
#create SequenceCollection object from seqs
seqs_collection = SequenceCollection(seqs)
#Create mapping between abbreviated IDs and full IDs
seqs_int_map, seqs_int_keys = seqs_collection.getIntMap(prefix='seq_')
#Create SequenceCollection from int_map.
seqs_int_map = SequenceCollection(seqs_int_map)
#create SequenceCollection object from aln
aln_collection = SequenceCollection(aln)
#Create mapping between abbreviated IDs and full IDs
aln_int_map, aln_int_keys = aln_collection.getIntMap(prefix='aln_')
#Create SequenceCollection from int_map.
aln_int_map = SequenceCollection(aln_int_map)
#set output and profile options
params.update({'-out':get_tmp_filename(), '-profile':True})
#save seqs to tmp file
seqs_filename = get_tmp_filename()
seqs_out = open(seqs_filename,'w')
seqs_out.write(seqs_int_map.toFasta())
seqs_out.close()
#save aln to tmp file
aln_filename = get_tmp_filename()
aln_out = open(aln_filename, 'w')
aln_out.write(aln_int_map.toFasta())
aln_out.close()
#Create Muscle app and get results
app = Muscle(InputHandler='_input_as_multifile', params=params,
WorkingDir=tempfile.gettempdir())
res = app((aln_filename, seqs_filename))
#Get alignment as dict out of results
alignment = dict(parse_fasta(res['MuscleOut']))
#Make new dict mapping original IDs
new_alignment = {}
for k,v in alignment.items():
if k in seqs_int_keys:
new_alignment[seqs_int_keys[k]] = v
else:
new_alignment[aln_int_keys[k]] = v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment)
#Clean up
res.cleanUp()
del(seqs_collection, seqs_int_map, seqs_int_keys)
del(aln_collection, aln_int_map, aln_int_keys)
del(app, res, alignment, params)
remove(seqs_filename)
remove(aln_filename)
return new_alignment
示例13: create_locarnap_alignment
# 需要导入模块: from cogent.core.alignment import SequenceCollection [as 别名]
# 或者: from cogent.core.alignment.SequenceCollection import getIntMap [as 别名]
def create_locarnap_alignment(seqs,moltype,struct=False,params=None):
"""Returns mlocarna results given an unaligned SequenceCollection.
- seqs: A SequenceCollection object or something that behaves like one.
- moltype: cogent.core.moltype object.
-struct: Boolean whether or not to also output vienna structure string
"""
#Construct SequenceCollection object.
seqs = SequenceCollection(seqs,MolType=moltype)
#need to make int map.
int_map, int_keys = seqs.getIntMap()
#construct SequenceCollection object from int map to use functionality
int_map = SequenceCollection(int_map, MolType=moltype)
#Create application.
app = MLocarna(InputHandler='_input_as_multiline_string',params=params)
#Get temporary directory to write all mlocarna files.
mlocarna_dir = get_tmp_filename(suffix='')
app.Parameters['--tgtdir'].on(mlocarna_dir)
#set parameters to run locarna-p
app.Parameters['--write-structure'].on()
app.Parameters['--probabilistic'].on()
app.Parameters['--consistency-transformation'].on()
res = app(int_map.toFasta())
#get the structure from the results if necessary
if struct:
structfile = open(res['ProbabilisticAlignment'].name, 'U')
structure = ""
newstrline = True
for line in structfile:
line = line.strip()
#read in structure lines of alignment (--write-structure)
if len(line) > 0 and (line[0] == "." or line[0] == "("):
#only append if new structure aspect, since struct is
#written both above and below blocks in alignment
if newstrline:
structure += line
newstrline = not newstrline
else:
newstrline = not newstrline
aligned = dict(ClustalParser(res['ClustalAlignment']))
#Make new dict mapping original IDs
new_alignment={}
for k,v in aligned.items():
new_alignment[int_keys.get(k,k)]=v
#Create an Alignment object from alignment dict
new_alignment = Alignment(new_alignment,MolType=moltype)
#Clean up after MlocARNA
res.cleanUp()
shutil.rmtree(mlocarna_dir)
#output alignment and structure if asked for, else outout just alignment
if struct:
return new_alignment, structure
else:
return new_alignment