当前位置: 首页>>代码示例>>Python>>正文


Python MuscleCommandline.diags方法代码示例

本文整理汇总了Python中Bio.Align.Applications.MuscleCommandline.diags方法的典型用法代码示例。如果您正苦于以下问题:Python MuscleCommandline.diags方法的具体用法?Python MuscleCommandline.diags怎么用?Python MuscleCommandline.diags使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Align.Applications.MuscleCommandline的用法示例。


在下文中一共展示了MuscleCommandline.diags方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GetExec

# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
 def GetExec(self, optList, frame):
     # Respond to the "muscle" command.
     self.frame = frame
     plugin_exe = r"C:/Program Files (x86)/py27/Lib/site-packages/Muscle.exe"
     self.outfile=r".\plugins\muscle.txt"
     self.outtype="fasta"
     cline = MuscleCommandline(plugin_exe,out=self.outfile)
     if '1ProfileCheck' in self.frame.paramBoxes:
         if self.frame.paramBoxes['1ProfileCheck'].GetValue():
             cline.profile = True
             cline.in1 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
             cline.in2 = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
         else:
             cline.input = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
     if '1DiagCheck' in self.frame.paramBoxes:
         if self.frame.paramBoxes['1DiagCheck'].GetValue():
             cline.diags=True
             if "DiagLenSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagLenSpin"])
             if "DiagMargSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagMargSpin"])
             if "DiagBreakSpin" in self.frame.paramBoxes:
                 cline.diaglength=int(self.frame.paramBoxes["DiagBreakSpin"])
         elif "GapPenSpin" in self.frame.paramBoxes:
             cline.gapopen=float(self.frame.paramBoxes["GapPenSpin"].GetValue())
         else:
             cline.input=r"C:\Users\francis\Documents\Monguis\BioGui\plugins\my_seq.fasta"
     if self.frame.abet=="AA":
         cline.seqtype="protein"
     elif self.frame.abet=="DNA" or self.frame.abet=="RNA":
         cline.seqtype="nucleo"
     else:
         cline.seqtype="auto"
     
     
     if self.frame.options:
         cline.objscore=str(self.boxList[9].GetValue())
         cline.weight1=str(self.boxList[13].GetValue())
         cline.weight2=str(self.boxList[15].GetValue())
         cline.anchorspacing=int(self.boxList[17].GetValue())
         cline.center=float(self.boxList[19].GetValue())
         cline.hydro=int(self.boxList[21].GetValue())
         cline.hydrofactor=float(self.boxList[23].GetValue())
         cline.maxhours=float(self.boxList[25].GetValue())
         cline.maxiters=int(self.boxList[27].GetValue())
         cline.maxtrees=int(self.boxList[29].GetValue())
         cline.minbestcolscore=float(self.boxList[31].GetValue())
         cline.minsmoothscore=float(self.boxList[33].GetValue())
         cline.smoothscoreceil=float(self.boxList[35].GetValue())
         cline.smoothwindow=int(self.boxList[37].GetValue())
         cline.sueff=float(self.boxList[39].GetValue())
     
     return str(cline)
开发者ID:fxb22,项目名称:BioGUI,代码行数:55,代码来源:MUSCLE23.py

示例2: quickAlign

# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def quickAlign( refseq, testseq, maxiters=None, diags=None, gapopen=None ):
    
	#sanity check
	refseq	= re.sub( "-", "", str(refseq) )
	testseq = re.sub( "-", "", str(testseq) )

	handle = StringIO()
	handle.write( ">ref\n%s\n>test\n%s\n"%(refseq,testseq) )
	data = handle.getvalue()

	muscle_cline = MuscleCommandline(cmd=muscle, quiet=True)
	if maxiters is not None: muscle_cline.maxiters = maxiters
	if diags    is not None: muscle_cline.diags    = diag
	if gapopen  is not None: muscle_cline.gapopen  = gapopen

	stdout, stderr = muscle_cline(stdin=data)

	aligned = dict()
	for p in SeqIO.parse(StringIO(stdout), "fasta"):
		aligned[ p.id ] = str(p.seq)
	return aligned
开发者ID:scharch,项目名称:SONAR,代码行数:23,代码来源:__init__.py

示例3: buildGSSP

# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def buildGSSP( vgene ):

	results = []

	if len(masterList[vgene]) < arguments["--numSequences"]:
		print( "Skipping %s, not enough sequences (%d)..." % ( vgene, len(masterList[vgene]) ) )
		return []
		
	if vgene not in germList:
		print( "Skipping %s, it's not in the germline database..." %vgene )
		return []

	# Take random overlapping subsets to generate multiple profiles
	#  need to add back a sanity check for capping the number of subsets if there's not enough raw data.
	numProfiles = arguments['--profiles']
	if arguments["--profiles"] == 0:
		numProfiles = 1

	success = 0
		
	for i in range(numProfiles):
		seqs = [] + germList[vgene] #force a copy rather than an alias
		if arguments["--profiles"] == 0:
			seqs += list(masterList[vgene])
		else:
			#get our sequence subset, add the germlines, and write them
			#   to a temporary file for alignment
			seqs += list(numpy.random.choice(masterList[vgene], size=arguments["--numSequences"], replace=False))

		tempFile = "%s/work/mGSSP/%s_profileBuilder" % (prj_tree.home, vgene)
		with open("%s.fa"%tempFile, "w") as temp:
			SeqIO.write(seqs,temp,"fasta")

		muscle_cline = MuscleCommandline(cmd=muscle, input="%s.fa"%tempFile, out="%s.aln"%tempFile)

		#try to speed up the process a little bit for large datasets
		#still going to max out at ~50k seqs per profile (probably)
		muscle_cline.maxiters	= 2
		muscle_cline.diags	= True

		try:
			stdout, stderr = muscle_cline()
		except:
			print( "Error in alignment #%d for %s (skipping)" % (i+1, vgene) )
			for f in glob.glob("%s.*"%tempFile): 
				os.remove(f)
			continue

		alignment = AlignIO.read("%s.aln"%tempFile, "fasta")#"clustal")
		success += 1

		#Input order is not maintained, so we need a little
		#   kludge to find a germline sequences. Use the 
		#   first one to remove any insertions from the alignment
		germRow = 0
		for n, rec in enumerate(alignment):
			if rec.id in [g.id for g in germList[vgene]]:
				germRow = n
				break

		#look for gaps one at a time so we don't get tripped up by shifting indices
		gap = re.search( "-+", str(alignment[germRow].seq) )
		while (gap):
			alignment = alignment[:, 0:gap.start()] + alignment[:, gap.end():]
			gap = re.search( "-+", str(alignment[germRow].seq) )
		
		#Now we get BioPython to make a PSSM for us. To convert that into
		#    a mutability profile, we will delete the germline residue[s]
		#    at each position (but save what they were)
		germRes = defaultdict(Counter)
		summary_align = AlignInfo.SummaryInfo(alignment)
		pssm = summary_align.pos_specific_score_matrix(chars_to_ignore=['-','X'])

		#get number of datapoints at each position (might be different than the number of sequences in the profile if there are gaps or missing data
		# do this by using sum(pos.values()) after ignoring missing data (previous line) but before dumping germline residues.
		denominator = []
		for p,pos in enumerate(pssm):
			denominator.append( sum(pos.values()) - len(germList[vgene]) )
    
		for germ in germList[vgene]:
			for pos, residue in enumerate(germ):
				if residue == "X":
					continue
				germRes[pos][residue] += 1
				pssm[pos][residue] = 0

		#normalize and save
		for p, pos in enumerate(pssm):
			germAA = ",".join([ x[0] for x in germRes[p].most_common() ])
			results.append( [ vgene, i+1, p+1, germAA, "None" if (p < mask[vgene] or denominator[p] < arguments["--numSequences"]) else "%.5f"%(sum(pos.values())/denominator[p]) ] + [ "%.5f"%(pos.get(r,0)/sum(pos.values())) if sum(pos.values()) > 0 else "0.00" for r in aa_list ] )
	    
		#clean up
		for f in glob.glob("%s.*"%tempFile): 
			os.remove(f)

	print( "Successfully built %d/%d profiles for %s using %d sequences!" % ( success, numProfiles, vgene, len(seqs)-len(germList[vgene]) ) )
	return results
开发者ID:scharch,项目名称:SONAR,代码行数:99,代码来源:5.3-make_profiles.py

示例4: main

# 需要导入模块: from Bio.Align.Applications import MuscleCommandline [as 别名]
# 或者: from Bio.Align.Applications.MuscleCommandline import diags [as 别名]
def main():

    global inFile, lookup

    oldFiles = (
        glob.glob("%s/infile" % prj_tree.phylo)
        + glob.glob("%s/outtree" % prj_tree.phylo)
        + glob.glob("%s/outfile" % prj_tree.phylo)
    )
    if len(oldFiles) > 0:
        if force:
            for f in oldFiles:
                os.remove(f)
        else:
            sys.exit("Old files exist! Please use the -f flag to force overwrite.")

    if doAlign:

        # first create a working file to align and add the germline and natives
        shutil.copyfile(
            "%s/%s-collected.fa" % (prj_tree.nt, prj_name), "%s/%s_to_align.fa" % (prj_tree.phylo, prj_name)
        )
        handle = open("%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), "a")
        handle.write(">%s\n%s\n" % (germ_seq.id, germ_seq.seq))
        for n in natives.values():
            handle.write(">%s\n%s\n" % (n.id, n.seq))
        handle.close()

        # now run muscle
        run_muscle = MuscleCommandline(
            input="%s/%s_to_align.fa" % (prj_tree.phylo, prj_name), out="%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)
        )
        run_muscle.maxiters = 2
        run_muscle.diags = True
        run_muscle.gapopen = -5000.0  # code requires a float
        print run_muscle
        run_muscle()
        # thisVarHidesTheOutput = run_muscle()

        # change inFile variable so that remaining code is the same for both cases
        # It's probably really bad form to handle this in this way
        inFile = "%s/%s_aligned.afa" % (prj_tree.phylo, prj_name)

    # open the alignment to rename everything and find germline sequence
    # rename is to avoid possible errors with DNAML from sequence ids that are too long
    germ_pos = 1
    with open(inFile, "rU") as handle:
        if doAlign:
            aln = AlignIO.read(handle, "fasta")
        else:
            try:
                aln = AlignIO.read(handle, "phylip")
            except:
                sys.exit("Please make sure custom input is aligned and in PHYLIP format")

    lookup = []
    for seq in aln:
        lookup.append(seq.id)
        if re.search("(IG|VH|VK|VL|HV|KV|LV)", seq.id) is not None:
            germ_pos = len(lookup)
        seq.id = "%010d" % len(lookup)

    with open("%s/infile" % prj_tree.phylo, "w") as output:
        AlignIO.write(aln, output, "phylip")

    # now generate script for DNAML
    # J is "jumble" followed by random seed and number of times to repeat
    # O is outgroup root, followed by position of the germline in the alignment
    # 5 tells DNAML to do the ancestor inference
    # Y starts the run
    with open("%s/dnaml.in" % prj_tree.phylo, "w") as handle:
        seed = random.randint(0, 1e10) * 2 + 1  # seed must be odd
        handle.write("J\n%d\n3\nO\n%d\n5\nY\n" % (seed, germ_pos))

    # change to work directory so DNAML finds "infile" and puts the output where we expect
    os.chdir(prj_tree.phylo)
    with open("%s/dnaml.in" % prj_tree.phylo, "rU") as pipe:
        subprocess.call([DNAML], stdin=pipe)

    # revert names in tree
    with open("%s/outtree" % prj_tree.phylo, "rU") as intree:
        mytree = intree.read()
    fixedtree = re.sub("\d{10}", revertName, mytree)
    with open("%s/%s.tree" % (prj_tree.out, prj_name), "w") as outtree:
        outtree.write(fixedtree)

    # revert names in out file
    with open("%s/outfile" % prj_tree.phylo, "rU") as instuff:
        mystuff = instuff.read()
    fixedstuff = re.sub("\d{10}", revertName, mystuff)
    with open("%s/%s.dnaml.out" % (prj_tree.logs, prj_name), "w") as outstuff:
        outstuff.write(fixedstuff)

    # clean up
    os.remove("infile")
    os.remove("outfile")
    os.remove("outtree")
开发者ID:Batsi-2015,项目名称:SONAR,代码行数:99,代码来源:3.2-run_DNAML.py


注:本文中的Bio.Align.Applications.MuscleCommandline.diags方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。