本文整理汇总了Python中Bio.Align.Applications.MuscleCommandline.diags方法的典型用法代码示例。如果您正苦于以下问题:Python MuscleCommandline.diags方法的具体用法?Python MuscleCommandline.diags怎么用?Python MuscleCommandline.diags使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Align.Applications.MuscleCommandline
的用法示例。
在下文中一共展示了MuscleCommandline.diags方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GetExec
# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def GetExec(self, optList, frame):
# Respond to the "muscle" command.
self.frame = frame
plugin_exe = r"C:/Program Files (x86)/py27/Lib/site-packages/Muscle.exe"
self.outfile=r".\plugins\muscle.txt"
self.outtype="fasta"
cline = MuscleCommandline(plugin_exe,out=self.outfile)
if '1ProfileCheck' in self.frame.paramBoxes:
if self.frame.paramBoxes['1ProfileCheck'].GetValue():
cline.profile = True
cline.in1 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
cline.in2 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
else:
cline.input = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
if '1DiagCheck' in self.frame.paramBoxes:
if self.frame.paramBoxes['1DiagCheck'].GetValue():
cline.diags=True
if "DiagLenSpin" in self.frame.paramBoxes:
cline.diaglength=int(self.frame.paramBoxes["DiagLenSpin"])
if "DiagMargSpin" in self.frame.paramBoxes:
cline.diaglength=int(self.frame.paramBoxes["DiagMargSpin"])
if "DiagBreakSpin" in self.frame.paramBoxes:
cline.diaglength=int(self.frame.paramBoxes["DiagBreakSpin"])
elif "GapPenSpin" in self.frame.paramBoxes:
cline.gapopen=float(self.frame.paramBoxes["GapPenSpin"].GetValue())
else:
cline.input=r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
if self.frame.abet=="AA":
cline.seqtype="protein"
elif self.frame.abet=="DNA" or self.frame.abet=="RNA":
cline.seqtype="nucleo"
else:
cline.seqtype="auto"
if self.frame.options:
cline.objscore=str(self.boxList[9].GetValue())
cline.weight1=str(self.boxList[13].GetValue())
cline.weight2=str(self.boxList[15].GetValue())
cline.anchorspacing=int(self.boxList[17].GetValue())
cline.center=float(self.boxList[19].GetValue())
cline.hydro=int(self.boxList[21].GetValue())
cline.hydrofactor=float(self.boxList[23].GetValue())
cline.maxhours=float(self.boxList[25].GetValue())
cline.maxiters=int(self.boxList[27].GetValue())
cline.maxtrees=int(self.boxList[29].GetValue())
cline.minbestcolscore=float(self.boxList[31].GetValue())
cline.minsmoothscore=float(self.boxList[33].GetValue())
cline.smoothscoreceil=float(self.boxList[35].GetValue())
cline.smoothwindow=int(self.boxList[37].GetValue())
cline.sueff=float(self.boxList[39].GetValue())
return str(cline)
示例2: quickAlign
# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def quickAlign( refseq, testseq, maxiters=None, diags=None, gapopen=None ):
#sanity check
refseq = re.sub( "-", "", str(refseq) )
testseq = re.sub( "-", "", str(testseq) )
handle = StringIO()
handle.write( ">ref\n%s\n>test\n%s\n"%(refseq,testseq) )
data = handle.getvalue()
muscle_cline = MuscleCommandline(cmd=muscle, quiet=True)
if maxiters is not None: muscle_cline.maxiters = maxiters
if diags is not None: muscle_cline.diags = diag
if gapopen is not None: muscle_cline.gapopen = gapopen
stdout, stderr = muscle_cline(stdin=data)
aligned = dict()
for p in SeqIO.parse(StringIO(stdout), "fasta"):
aligned[ p.id ] = str(p.seq)
return aligned
示例3: buildGSSP
# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def buildGSSP( vgene ):
results = []
if len(masterList[vgene]) < arguments["--numSequences"]:
print( "Skipping %s, not enough sequences (%d)..." % ( vgene, len(masterList[vgene]) ) )
return []
if vgene not in germList:
print( "Skipping %s, it's not in the germline database..." %vgene )
return []
# Take random overlapping subsets to generate multiple profiles
# need to add back a sanity check for capping the number of subsets if there's not enough raw data.
numProfiles = arguments['--profiles']
if arguments["--profiles"] == 0:
numProfiles = 1
success = 0
for i in range(numProfiles):
seqs = [] + germList[vgene] #force a copy rather than an alias
if arguments["--profiles"] == 0:
seqs += list(masterList[vgene])
else:
#get our sequence subset, add the germlines, and write them
# to a temporary file for alignment
seqs += list(numpy.random.choice(masterList[vgene], size=arguments["--numSequences"], replace=False))
tempFile = "%s/work/mGSSP/%s_profileBuilder" % (prj_tree.home, vgene)
with open("%s.fa"%tempFile, "w") as temp:
SeqIO.write(seqs,temp,"fasta")
muscle_cline = MuscleCommandline(cmd=muscle, input="%s.fa"%tempFile, out="%s.aln"%tempFile)
#try to speed up the process a little bit for large datasets
#still going to max out at ~50k seqs per profile (probably)
muscle_cline.maxiters = 2
muscle_cline.diags = True
try:
stdout, stderr = muscle_cline()
except:
print( "Error in alignment #%d for %s (skipping)" % (i+1, vgene) )
for f in glob.glob("%s.*"%tempFile):
os.remove(f)
continue
alignment = AlignIO.read("%s.aln"%tempFile, "fasta")#"clustal")
success += 1
#Input order is not maintained, so we need a little
# kludge to find a germline sequences. Use the
# first one to remove any insertions from the alignment
germRow = 0
for n, rec in enumerate(alignment):
if rec.id in [g.id for g in germList[vgene]]:
germRow = n
break
#look for gaps one at a time so we don't get tripped up by shifting indices
gap = re.search( "-+", str(alignment[germRow].seq) )
while (gap):
alignment = alignment[:, 0:gap.start()] + alignment[:, gap.end():]
gap = re.search( "-+", str(alignment[germRow].seq) )
#Now we get BioPython to make a PSSM for us. To convert that into
# a mutability profile, we will delete the germline residue[s]
# at each position (but save what they were)
germRes = defaultdict(Counter)
summary_align = AlignInfo.SummaryInfo(alignment)
pssm = summary_align.pos_specific_score_matrix(chars_to_ignore=['-','X'])
#get number of datapoints at each position (might be different than the number of sequences in the profile if there are gaps or missing data
# do this by using sum(pos.values()) after ignoring missing data (previous line) but before dumping germline residues.
denominator = []
for p,pos in enumerate(pssm):
denominator.append( sum(pos.values()) - len(germList[vgene]) )
for germ in germList[vgene]:
for pos, residue in enumerate(germ):
if residue == "X":
continue
germRes[pos][residue] += 1
pssm[pos][residue] = 0
#normalize and save
for p, pos in enumerate(pssm):
germAA = ",".join([ x[0] for x in germRes[p].most_common() ])
results.append( [ vgene, i+1, p+1, germAA, "None" if (p < mask[vgene] or denominator[p] < arguments["--numSequences"]) else "%.5f"%(sum(pos.values())/denominator[p]) ] + [ "%.5f"%(pos.get(r,0)/sum(pos.values())) if sum(pos.values()) > 0 else "0.00" for r in aa_list ] )
#clean up
for f in glob.glob("%s.*"%tempFile):
os.remove(f)
print( "Successfully built %d/%d profiles for %s using %d sequences!" % ( success, numProfiles, vgene, len(seqs)-len(germList[vgene]) ) )
return results
示例4: main
# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def main():
global inFile, lookup
oldFiles = (
glob.glob("%s/infile" % prj_tree.phylo)
+ glob.glob("%s/outtree" % prj_tree.phylo)
+ glob.glob("%s/outfile" % prj_tree.phylo)
)
if len(oldFiles) > 0:
if force:
for f in oldFiles:
os.remove(f)
else:
sys.exit("Old files exist! Please use the -f flag to force overwrite.")
if doAlign:
# first create a working file to align and add the germline and natives
shutil.copyfile(
"%s/%s-collected.fa" % (prj_tree.nt, prj_name), "%s/%s_to_align.fa" % (prj_tree.phylo, prj_name)
)
handle = open("%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), "a")
handle.write(">%s\n%s\n" % (germ_seq.id, germ_seq.seq))
for n in natives.values():
handle.write(">%s\n%s\n" % (n.id, n.seq))
handle.close()
# now run muscle
run_muscle = MuscleCommandline(
input="%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), out="%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)
)
run_muscle.maxiters = 2
run_muscle.diags = True
run_muscle.gapopen = -5000.0 # code requires a float
print run_muscle
run_muscle()
# thisVarHidesTheOutput = run_muscle()
# change inFile variable so that remaining code is the same for both cases
# It's probably really bad form to handle this in this way
inFile = "%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)
# open the alignment to rename everything and find germline sequence
# rename is to avoid possible errors with DNAML from sequence ids that are too long
germ_pos = 1
with open(inFile, "rU") as handle:
if doAlign:
aln = AlignIO.read(handle, "fasta")
else:
try:
aln = AlignIO.read(handle, "phylip")
except:
sys.exit("Please make sure custom input is aligned and in PHYLIP format")
lookup = []
for seq in aln:
lookup.append(seq.id)
if re.search("(IG|VH|VK|VL|HV|KV|LV)", seq.id) is not None:
germ_pos = len(lookup)
seq.id = "%010d" % len(lookup)
with open("%s/infile" % prj_tree.phylo, "w") as output:
AlignIO.write(aln, output, "phylip")
# now generate script for DNAML
# J is "jumble" followed by random seed and number of times to repeat
# O is outgroup root, followed by position of the germline in the alignment
# 5 tells DNAML to do the ancestor inference
# Y starts the run
with open("%s/dnaml.in" % prj_tree.phylo, "w") as handle:
seed = random.randint(0, 1e10) * 2 + 1 # seed must be odd
handle.write("J\n%d\n3\nO\n%d\n5\nY\n" % (seed, germ_pos))
# change to work directory so DNAML finds "infile" and puts the output where we expect
os.chdir(prj_tree.phylo)
with open("%s/dnaml.in" % prj_tree.phylo, "rU") as pipe:
subprocess.call([DNAML], stdin=pipe)
# revert names in tree
with open("%s/outtree" % prj_tree.phylo, "rU") as intree:
mytree = intree.read()
fixedtree = re.sub("\d{10}", revertName, mytree)
with open("%s/%s.tree" % (prj_tree.out, prj_name), "w") as outtree:
outtree.write(fixedtree)
# revert names in out file
with open("%s/outfile" % prj_tree.phylo, "rU") as instuff:
mystuff = instuff.read()
fixedstuff = re.sub("\d{10}", revertName, mystuff)
with open("%s/%s.dnaml.out" % (prj_tree.logs, prj_name), "w") as outstuff:
outstuff.write(fixedstuff)
# clean up
os.remove("infile")
os.remove("outfile")
os.remove("outtree")