Python seq.Fasta类代码示例

本文整理汇总了Python中TAMO.seq.Fasta类的典型用法代码示例。如果您正苦于以下问题：Python Fasta类的具体用法？Python Fasta怎么用？Python Fasta使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Fasta类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: SGDData

def SGDData():
    root    = TAMO.paths.SGDdir
    urlroot = 'ftp://genome-ftp.stanford.edu/pub/yeast/data_download/' 
    files = ['chromosomal_feature/SGD_features.tab',
             'chromosomal_feature/dbxref.tab',
             'chromosomal_feature/chromosome_length.tab',
             'sequence/GenBank/yeast_nrpep.fasta.gz',
             'sequence/genomic_sequence/orf_protein/orf_trans_all.fasta.gz',
             ('http://yeastgfp.ucsf.edu/allOrfData.txt','Huh_Nature_2003.tab')
             ]

    chrs = '01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 mt'.split()

    files.extend( ['sequence/NCBI_genome_source/chr%s.fsa'%x for x in chrs] )

    downloadfiles(root,urlroot,files)

    from TAMO.seq import Fasta
    
    print "Assembling yeast genome sequence files into a single file (NCBI_yeast_genome.fsa)"
    D = {}
    for chr in chrs:
        _d = Fasta.load('%s/chr%s.fsa'%(TAMO.paths.SGDdir,chr))
        id, seq = _d.items()[0]
        if chr[0] == '0': chr = chr[1]
        D['chr%s  %s'%(chr,id)] = seq
    Fasta.write(D, TAMO.paths.SGDdir + 'NCBI_yeast_genome.fsa')

开发者ID:xguse，项目名称:customTAMO，代码行数:27，代码来源:GetDataFiles.py

示例2: genomebg

def genomebg(infile,outfile):
    EXE = MDSCAN_DIR + 'genomebg.linux'
    fsaD   = Fasta.load(infile)
    tmpfsa = tempfile.mktemp()
    Fasta.write(fsaD,tmpfsa,linelen=1000000000)
    CMD = '%s -i %s -o %s'%(EXE,tmpfsa,outfile)
    FID = os.popen('( %s ;) 2>&1'%CMD,'r')
    for line in FID.readlines(): print line
    if FID.close(): print "Exited"
    os.unlink(tmpfsa)

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:10，代码来源:MDscan.py

示例3: memefiles2tamo

def memefiles2tamo(files, tamoname):
    global probefile, PROBESET, fsafile
    
    motifs = []
    for filename in files:
        print ">>>SDFSD>F ",filename
        if   re.search('\.ace$',filename):
            mdobject = AlignAce.AlignAce(filename)
            if not mdobject.fastafile: mdobject.fastafile=filename.replace('.ace','.fsa')
        elif re.search('\.meme.*$',filename):
            mdobject = Meme.Meme(filename)
            if not mdobject.fastafile:
                mdobject.fastafile=re.sub('\..\.meme','.meme',filename).replace('.meme','.fsa')
        motifs.extend(mdobject.motifs)

    #fsaname = find_fsa(mdobject.fastafile)
    print mdobject.fastafile
    if fsafile: fsaname = fsafile
    else:       fsaname = Fasta.find(mdobject.fastafile)
    fsaD    = Fasta.load(fsaname)
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('YEAST')
        #PROBESET= pick_genome(fsaname)
    for key,seq in fsaD.items():
        PROBESET.probes[key] = seq

    for motif in motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        #if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        #if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        #if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if motif.frac   == None: motif.frac   = PROBESET.frac(motif,probes,'v',0.7)
        if re.search('\.meme$',filename):
            motif.MAP = -math.log(motif.evalue)/math.log(10)
        if 0 and (motif.CRA == None):
            try:
                pass
                CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,probes,'v',tuple='YES')
                motif.CRA = CRA
                motif.Cfrac = Cfrac
            except: pass

    if re.search('\.meme$',filename):
        mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
    else:
        mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))

    MotifTools.save_motifs(motifs,tamoname)

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:52，代码来源:memeset2tamo.py

示例4: calcStats

def calcStats(fastaPath):
    seqFile = Fasta.load(fastaPath)
    combinedSeq = ''
    
    for each in seqFile:
        combinedSeq += seqFile[each]
    
    combinedSeq= combinedSeq.upper()
    
    seqs       = len(seqFile)
    totNucs    = len(combinedSeq)
    aCnt       = combinedSeq.count('A')
    cCnt       = combinedSeq.count('C')
    gCnt       = combinedSeq.count('G')
    tCnt       = combinedSeq.count('T')
    nCnt       = combinedSeq.count('N')
    nonNs      = aCnt+cCnt+gCnt+tCnt
    n2tot      = float(nCnt)/len(combinedSeq)
    n2nonN     = float(nCnt)/nonNs
    percentGC  = (float(gCnt)+cCnt)/nonNs
    
    
    
    return {'seqLen':seqs,
            'totNucs':totNucs,
            'aCnt':aCnt,
            'cCnt':cCnt,
            'gCnt':gCnt,
            'tCnt':tCnt,
            'nCnt':nCnt,
            'nonNs':nonNs,
            'n2tot':n2tot,
            'n2nonN':n2nonN,
            'percentGC':percentGC}

开发者ID:xguse，项目名称:gusPyProj，代码行数:34，代码来源:seqStats.py

示例5: info2seeds

def info2seeds(N,infofile,probefile,species='YEAST'):
    G    = ProbeSet(species)
    IDs  = G.ids_from_file(probefile)
    Q    = EM.theMarkovBackground.zeroth()
 
    seqs = Fasta.seqs(infofile)
    
    if not N:
        nmers = seqs
    else:
        nmers= MotifTools.top_nmers(N,seqs)
        if len(nmers) > 1000: nmers = nmers[0:1000]
        
    print "Scoring enrichment of %d nmers from %s"%len(nmers,infofile)
    sys.stdout.flush()
    
    nmers_scoresT = []
    for nmer in nmers:
        if nmer.isalpha():
            p = G.p_value(nmer,IDs,'') #'verbose'
            nmers_scoresT.append((nmer,p))
    nmers_scoresT.sort(lambda x,y: cmp(x[1],y[1]))
    last = min(20,len(nmers_scoresT))
    models = []
    for i in range(last):
        seq = nmers_scoresT[i][0]
        m = MotifTools.Motif('',Q)
        m.compute_from_text(seq,0.1)
        models.append(m)
    for tup in nmers_scoresT[0:40]:
        print tup
    return(models)

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:32，代码来源:TAMO_EM.py

示例6: main

def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs  = seqsD.values()
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
        for nmer in _t:
            print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
        sys.stdout.flush()

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:25，代码来源:Background.py

示例7: main

def main(fastafile, outDirectory):  # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
    seqsD = Fasta.load(fastafile)
    seqs  = seqsD.values()
    
    output = []
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total))  # AD 02-27-09 added a '\n' to make file look right
        for nmer in _t:
            output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total))  # AD 02-27-09 added a '\n' to make file look right
        
        # open output file and write out results
        outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
        outFile = open(outFile, 'w')
        for index in output:
            outFile.write(index)

开发者ID:xguse，项目名称:gusPyProj，代码行数:32，代码来源:MarkovBackground.py

示例8: loadMiRNAs

def loadMiRNAs(miRNA_Path):
    """
    Takes fasta file of mature miRNAs.
    Returns dict.
    """
    
    return Fasta.load(miRNA_Path)

开发者ID:xguse，项目名称:gusPyProj，代码行数:7，代码来源:miRNA_targeting.py

示例9: orf2pseq

def orf2pseq(orf):
    global _orfpseqs
    if not _orfpseqs:
        from TAMO.seq import Fasta
        _orfpseqs = Fasta.load(_ORFPSEQS)
        for _orf,pseq in _orfpseqs.items():
            if pseq[-1] == '*': _orfpseqs[_orf] = pseq[:-1]
    if _orfpseqs.has_key(orf): return _orfpseqs[orf]
    else:                      return ''

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:9，代码来源:SGD.py

示例10: train_final

    def train_final(self, model, fg, bg, N, beta):
        input_seqs = []
        for s in fg:
            iseq = self.all_probes[s].upper()
            iseq = re.sub(";","",iseq)
            if (re.search("N", iseq)):
                iseq = re.sub("N","",iseq)
            if (len(iseq)>0): input_seqs.append(iseq)

        if (self.refine):
            final_motif = self.train_model(model, input_seqs, beta)        
        else:
            final_motif = self.models[model]
        train_pos = self.get_LLRs(final_motif, fg)
        train_neg = self.get_LLRs(final_motif, bg)
        over_sampled_positive = self.SMOTE([train_pos], N, N)[0]

        #Train SVM to classify our training set
        c_vals = [1.0e-10, 1.0e-4, 1.0e-3, 1.0e-2, 0.05, 0.1, 1.0, 10.0, 100.0]
        best_classifier = None        
        best_err = 1.0
        for c in c_vals:
            classifier = self.SVM_train(over_sampled_positive, train_neg, c)
            train_err = self.SVM_test(classifier, over_sampled_positive, train_neg)
            if (train_err<best_err):
                best_err = train_err
                best_classifier = classifier
        (train_err, fp, fn) = self.SVM_test(best_classifier, train_pos, [], 1)
        if (self.dump):
            motif = {}
            no_motif = {}
            for name, val in zip(fg,train_pos):
                train_err = self.SVM_test(best_classifier, [val], [])
                if (train_err):
                    no_motif[name] = self.all_probes[name]
                else:
                    motif[name] = self.all_probes[name]
            motif_fsa = self.motif_file.split('.')[0] + '.pos.fsa'
            no_motif_fsa = self.motif_file.split('.')[0] + '.neg.fsa'
            Fasta.write(motif, motif_fsa)
            Fasta.write(no_motif, no_motif_fsa)
        return((final_motif, best_classifier, fn))

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:42，代码来源:THEME.py

示例11: loadSeqs

def loadSeqs(fastaPathList):
    """
    Takes list of paths.  Returns single dict full of seqs found in the files.
    Converts softMasking to hard.
    """
    rDict = {}
    
    for path in fastaPathList:
        rDict.update(Fasta.load(path))
    
    bioDefs.softMaskDict2HardMask(rDict)
    return rDict

开发者ID:xguse，项目名称:gusPyProj，代码行数:12，代码来源:miRNA_targeting.py

示例12: get_seq

def get_seq(chr,start=None,stop=None):
    global ChrD
    if not ChrD:
        from TAMO.seq import Fasta
        ChrD = Fasta.load(SGDdir + 'NCBI_yeast_genome.fsa')
    if (type(chr) != type('')) or (chr.find('chr') != 0):  # 1 -> chr1, 'X' -> chrX
        chr = 'chr%s'%chr
    if (start == None) and chr.find(':') > 0:                  # chr4:454-465 -> chr4, 454, 465
        _chr,_range = chr.split(':')
        chr = _chr
        start, end = _range.split('-')
        start, end = int(start), int(end)
    return ChrD[chr][start-1:end]

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:13，代码来源:SGD.py

示例13: swp2swp

def swp2swp(swp):
    'Converts, when possible, from P014543 to ADR1_YEAST'
    'Only works for yeast right now'
    global _swp2swp
    if not _swp2swp:
        lines = Fasta.keys(_SWPFASTA,key_func=lambda x:x)
        for line in lines:
            toks = line.split()
            text_name  = toks[1]
            numeric_name = toks[2]
            if text_name[0:2] == 'SW' and numeric_name[0] == 'P':
                _swp2swp[text_name[3:]]  = numeric_name
                _swp2swp[numeric_name]   = text_name[3:]
    if _swp2swp.has_key(swp):
        return _swp2swp[swp]

开发者ID:adamlabadorf，项目名称:TAMO，代码行数:15，代码来源:SGD.py

示例14: go

 def go(self):
     """Execution function: runs TAMO.MD.Meme.Meme and catches the output in self.output for access from MDAP."""
     import time
     
     # write a temp fasta file of coregulated seqs to use as input to Meme(file=TempFasta)
     ctimeStr  = time.ctime().replace(' ','_')
     fileName  = 'tempFastaOfCoRegSeqs.MDAP.%s.fas' %(ctimeStr)
     tFasta    = open(fileName, 'w')
     tFastaTxt = Fasta.text(self.coRegSeqs[0])
     tFasta.write(tFastaTxt)
     
     # Call TAMO to do its thing:
     self.output = Meme(file=fileName, width='', extra_args=self.extra_args, bfile=self.bfile)
     
     # delete temp file
     os.remove(fileName)

开发者ID:xguse，项目名称:gusPyProj，代码行数:16，代码来源:MD_wrappers.py

示例15: LoadDNA

def LoadDNA(verbose=False):
	###############################################################################
	#
	#	Read DNA seqeuence
	#	Extract sub-sequence to model
	#	Define rules for DNA
	#
	###############################################################################
	START_POS = 0
	dna = ""
	fastafile = params.GetString(DNA_section,"FILE")
	if (fastafile):
		chromo = params.GetString(DNA_section,"CHR")
		chr_start  = params.GetInt(DNA_section,"START")
		chr_end    = params.GetInt(DNA_section,"END")
		if (not chr_end):
			chr_end = params.GetInt(DNA_section,"LENGTH")
			chr_end += chr_start
		if verbose:
			print ("Loading fasta: [%s]\n"%fastafile)

		seqs = Fasta.load(fastafile)

		seqkeys = seqs.keys()
		seqkeys.sort()

		n = 0
		for chr in seqkeys:
			n += len(seqs[chr])
		if verbose:
			print("Genome length = %d, # chromosomes = %d\n"%(n, len(seqkeys)))

		if (seqs.has_key(chromo)):
			seq = seqs[chromo]
			if verbose:
				print("Chr[%s] = %d nt\n"%(chromo,len(seq)))
			dna = seq[chr_start:chr_end]
			if verbose:
				print("DNA[%d:%d] = %d nt\n"%(chr_start,chr_end,len(dna)))
		else:
			if verbose:
				print("Cannot find [%s] chromosome in %s\n"%(chromo, filename))
		if (verbose):
			print("DNA:[%s]\n"%dna)

	return dna

开发者ID:DowellLab，项目名称:VizGroup，代码行数:46，代码来源:PlotModelResults.py

注：本文中的TAMO.seq.Fasta类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。