当前位置: 首页>>代码示例>>Python>>正文


Python Fasta.load方法代码示例

本文整理汇总了Python中TAMO.seq.Fasta.load方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.load方法的具体用法?Python Fasta.load怎么用?Python Fasta.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在TAMO.seq.Fasta的用法示例。


在下文中一共展示了Fasta.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loadMiRNAs

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def loadMiRNAs(miRNA_Path):
    """
    Takes fasta file of mature miRNAs.
    Returns dict.
    """
    
    return Fasta.load(miRNA_Path)
开发者ID:xguse,项目名称:gusPyProj,代码行数:9,代码来源:miRNA_targeting.py

示例2: main

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def main():
    seqsD = Fasta.load(sys.argv[1])
    seqs  = seqsD.values()
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
        for nmer in _t:
            print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
        sys.stdout.flush()
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:27,代码来源:Background.py

示例3: SGDData

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def SGDData():
    root    = TAMO.paths.SGDdir
    urlroot = 'ftp://genome-ftp.stanford.edu/pub/yeast/data_download/' 
    files = ['chromosomal_feature/SGD_features.tab',
             'chromosomal_feature/dbxref.tab',
             'chromosomal_feature/chromosome_length.tab',
             'sequence/GenBank/yeast_nrpep.fasta.gz',
             'sequence/genomic_sequence/orf_protein/orf_trans_all.fasta.gz',
             ('http://yeastgfp.ucsf.edu/allOrfData.txt','Huh_Nature_2003.tab')
             ]

    chrs = '01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 mt'.split()

    files.extend( ['sequence/NCBI_genome_source/chr%s.fsa'%x for x in chrs] )

    downloadfiles(root,urlroot,files)

    from TAMO.seq import Fasta
    
    print "Assembling yeast genome sequence files into a single file (NCBI_yeast_genome.fsa)"
    D = {}
    for chr in chrs:
        _d = Fasta.load('%s/chr%s.fsa'%(TAMO.paths.SGDdir,chr))
        id, seq = _d.items()[0]
        if chr[0] == '0': chr = chr[1]
        D['chr%s  %s'%(chr,id)] = seq
    Fasta.write(D, TAMO.paths.SGDdir + 'NCBI_yeast_genome.fsa')
开发者ID:xguse,项目名称:customTAMO,代码行数:29,代码来源:GetDataFiles.py

示例4: calcStats

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def calcStats(fastaPath):
    seqFile = Fasta.load(fastaPath)
    combinedSeq = ''
    
    for each in seqFile:
        combinedSeq += seqFile[each]
    
    combinedSeq= combinedSeq.upper()
    
    seqs       = len(seqFile)
    totNucs    = len(combinedSeq)
    aCnt       = combinedSeq.count('A')
    cCnt       = combinedSeq.count('C')
    gCnt       = combinedSeq.count('G')
    tCnt       = combinedSeq.count('T')
    nCnt       = combinedSeq.count('N')
    nonNs      = aCnt+cCnt+gCnt+tCnt
    n2tot      = float(nCnt)/len(combinedSeq)
    n2nonN     = float(nCnt)/nonNs
    percentGC  = (float(gCnt)+cCnt)/nonNs
    
    
    
    return {'seqLen':seqs,
            'totNucs':totNucs,
            'aCnt':aCnt,
            'cCnt':cCnt,
            'gCnt':gCnt,
            'tCnt':tCnt,
            'nCnt':nCnt,
            'nonNs':nonNs,
            'n2tot':n2tot,
            'n2nonN':n2nonN,
            'percentGC':percentGC}
开发者ID:xguse,项目名称:gusPyProj,代码行数:36,代码来源:seqStats.py

示例5: main

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def main(fastafile, outDirectory):  # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
    seqsD = Fasta.load(fastafile)
    seqs  = seqsD.values()
    
    output = []
    for w in range(1,7):
        allnmers = permute(w)
        nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
        nmersD = {}
        total = 0
        for nmer in allnmers:
            nmersD[nmer] = 1 #Pseudo count
            total = total + 1
        for nmer,count in nmersT[:]:
            try: 
                rc = MotifTools.revcomplement(nmer)
                nmersD[nmer] = nmersD[nmer] + count
                nmersD[rc]   = nmersD[rc]   + count
                total = total + 2*count
            except KeyError:
                pass
        _t = nmersD.keys()
        _t.sort()
        output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total))  # AD 02-27-09 added a '\n' to make file look right
        for nmer in _t:
            output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total))  # AD 02-27-09 added a '\n' to make file look right
        
        # open output file and write out results
        outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
        outFile = open(outFile, 'w')
        for index in output:
            outFile.write(index)
开发者ID:xguse,项目名称:gusPyProj,代码行数:34,代码来源:MarkovBackground.py

示例6: orf2pseq

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def orf2pseq(orf):
    global _orfpseqs
    if not _orfpseqs:
        from TAMO.seq import Fasta
        _orfpseqs = Fasta.load(_ORFPSEQS)
        for _orf,pseq in _orfpseqs.items():
            if pseq[-1] == '*': _orfpseqs[_orf] = pseq[:-1]
    if _orfpseqs.has_key(orf): return _orfpseqs[orf]
    else:                      return ''
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:11,代码来源:SGD.py

示例7: genomebg

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def genomebg(infile,outfile):
    EXE = MDSCAN_DIR + 'genomebg.linux'
    fsaD   = Fasta.load(infile)
    tmpfsa = tempfile.mktemp()
    Fasta.write(fsaD,tmpfsa,linelen=1000000000)
    CMD = '%s -i %s -o %s'%(EXE,tmpfsa,outfile)
    FID = os.popen('( %s ;) 2>&1'%CMD,'r')
    for line in FID.readlines(): print line
    if FID.close(): print "Exited"
    os.unlink(tmpfsa)
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:12,代码来源:MDscan.py

示例8: __init__

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
    def __init__(self, fg_file, bg_file, cv_level, markov_file):
        self.cv_level = cv_level
        self.randomize = 0
        self.beta = 0.0
        self.delta = 0.001
        self.refine = 1
        self.motif_file = 'dummy.out'
        self.dump = 0
        self.family = ''
        self.datafiles = (fg_file,bg_file)
        
        MAX_FG = 2000
        
        #LOAD MARKOV BACKGROUND#
        print "Loading Markov background file from %s"%markov_file
        EM.loadMarkovBackground(markov_file)    

        ##################################################################################
        #divide input sequences into groups according to the desired cross-validation level
        ###################################################################################
        print "Processing input sequences...."
        self.fg_seqs = Fasta.load(fg_file)   #load foreground sequences
        for key in self.fg_seqs.keys():
            fseq = self.fg_seqs[key]
            self.fg_seqs[key] = fseq.split()[0]
        self.all_probes = Fasta.load(bg_file)   #load background sequences
        Fasta.delN(self.fg_seqs)
        Fasta.delN(self.all_probes)

        #first delete any sequences from background that are present in foreground
        for key in self.fg_seqs.keys():
            if (self.all_probes.has_key(key)):
                del self.all_probes[key]

        for key in self.all_probes.keys():
            if ((len(self.all_probes[key])==0) or (re.search('[SWMKRY]', self.all_probes[key]))):
                del self.all_probes[key]
                print "deleting %s"%key
                
        while (len(self.fg_seqs.keys())>MAX_FG):
            del self.fg_seqs[self.fg_seqs.keys()[random.randint(0,(len(self.fg_seqs.keys())-1))]]
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:43,代码来源:THEME.py

示例9: loadSeqs

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def loadSeqs(fastaPathList):
    """
    Takes list of paths.  Returns single dict full of seqs found in the files.
    Converts softMasking to hard.
    """
    rDict = {}
    
    for path in fastaPathList:
        rDict.update(Fasta.load(path))
    
    bioDefs.softMaskDict2HardMask(rDict)
    return rDict
开发者ID:xguse,项目名称:gusPyProj,代码行数:14,代码来源:miRNA_targeting.py

示例10: memefiles2tamo

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def memefiles2tamo(files, tamoname):
    global probefile, PROBESET, fsafile
    
    motifs = []
    for filename in files:
        print ">>>SDFSD>F ",filename
        if   re.search('\.ace$',filename):
            mdobject = AlignAce.AlignAce(filename)
            if not mdobject.fastafile: mdobject.fastafile=filename.replace('.ace','.fsa')
        elif re.search('\.meme.*$',filename):
            mdobject = Meme.Meme(filename)
            if not mdobject.fastafile:
                mdobject.fastafile=re.sub('\..\.meme','.meme',filename).replace('.meme','.fsa')
        motifs.extend(mdobject.motifs)

    #fsaname = find_fsa(mdobject.fastafile)
    print mdobject.fastafile
    if fsafile: fsaname = fsafile
    else:       fsaname = Fasta.find(mdobject.fastafile)
    fsaD    = Fasta.load(fsaname)
    probes  = fsaD.keys()
    if not probefile:
        PROBESET = MotifMetrics.ProbeSet('YEAST')
        #PROBESET= pick_genome(fsaname)
    for key,seq in fsaD.items():
        PROBESET.probes[key] = seq

    for motif in motifs:
        if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
        if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
        #if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
        #if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
        #if motif.E_seq  == None: motif.E_seq  = PROBESET.E_seq(motif,probes,'v')
        if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
        #if motif.MNCP   == None: motif.MNCP   = PROBESET.MNCP(motif,probes,'v')
        if motif.frac   == None: motif.frac   = PROBESET.frac(motif,probes,'v',0.7)
        if re.search('\.meme$',filename):
            motif.MAP = -math.log(motif.evalue)/math.log(10)
        if 0 and (motif.CRA == None):
            try:
                pass
                CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,probes,'v',tuple='YES')
                motif.CRA = CRA
                motif.Cfrac = Cfrac
            except: pass

    if re.search('\.meme$',filename):
        mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
    else:
        mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))

    MotifTools.save_motifs(motifs,tamoname)
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:54,代码来源:memeset2tamo.py

示例11: get_seq

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def get_seq(chr,start=None,stop=None):
    global ChrD
    if not ChrD:
        from TAMO.seq import Fasta
        ChrD = Fasta.load(SGDdir + 'NCBI_yeast_genome.fsa')
    if (type(chr) != type('')) or (chr.find('chr') != 0):  # 1 -> chr1, 'X' -> chrX
        chr = 'chr%s'%chr
    if (start == None) and chr.find(':') > 0:                  # chr4:454-465 -> chr4, 454, 465
        _chr,_range = chr.split(':')
        chr = _chr
        start, end = _range.split('-')
        start, end = int(start), int(end)
    return ChrD[chr][start-1:end]
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:15,代码来源:SGD.py

示例12: LoadDNA

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def LoadDNA(verbose=False):
	###############################################################################
	#
	#	Read DNA seqeuence
	#	Extract sub-sequence to model
	#	Define rules for DNA
	#
	###############################################################################
	START_POS = 0
	dna = ""
	fastafile = params.GetString(DNA_section,"FILE")
	if (fastafile):
		chromo = params.GetString(DNA_section,"CHR")
		chr_start  = params.GetInt(DNA_section,"START")
		chr_end    = params.GetInt(DNA_section,"END")
		if (not chr_end):
			chr_end = params.GetInt(DNA_section,"LENGTH")
			chr_end += chr_start
		if verbose:
			print ("Loading fasta: [%s]\n"%fastafile)

		seqs = Fasta.load(fastafile)

		seqkeys = seqs.keys()
		seqkeys.sort()

		n = 0
		for chr in seqkeys:
			n += len(seqs[chr])
		if verbose:
			print("Genome length = %d, # chromosomes = %d\n"%(n, len(seqkeys)))

		if (seqs.has_key(chromo)):
			seq = seqs[chromo]
			if verbose:
				print("Chr[%s] = %d nt\n"%(chromo,len(seq)))
			dna = seq[chr_start:chr_end]
			if verbose:
				print("DNA[%d:%d] = %d nt\n"%(chr_start,chr_end,len(dna)))
		else:
			if verbose:
				print("Cannot find [%s] chromosome in %s\n"%(chromo, filename))
		if (verbose):
			print("DNA:[%s]\n"%dna)

	return dna
开发者ID:DowellLab,项目名称:VizGroup,代码行数:48,代码来源:PlotModelResults.py

示例13: swp_find_and_format

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def swp_find_and_format(swp):
    global _swp_seqs
    if not _swp_seqs:
        _swp_seqs = Fasta.load(_SWPFASTA,key_func=lambda x:x)
    hits = []
    for key in _swp_seqs.keys():
        if key[0:60].find(swp) >= 0:
            hits.append(key)
    if not hits:
        return None
    if len(hits) > 1:
        print "# Multiple matches found for %s:"%swp
        for hit in hits: print '#',hit
        return None
    hit = hits[0]
    seq = _swp_seqs[hit]
    txt = ''
    for i in range(0,len(seq),70):
        txt = txt + seq[i:i+70] + '\n'
    return txt
开发者ID:adamlabadorf,项目名称:TAMO,代码行数:22,代码来源:SGD.py

示例14: __init__

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
 def __init__(self,fastaSeqs, motifDict, thresh=0.5,window=200):
     
     self.seqMaps = {}
     
     # Get seqs from fasta
     assert type(fastaSeqs) == type('string') or type(fastaSeqs) == type({}),\
            'MapLib arg(fastaSeqs) must be string pointing to file or a seqDict.'
     if type(fastaSeqs) == type('string'):
         seqs = Fasta.load(fastaSeqs)
     elif type(fastaSeqs) == type({}):
         seqs = fastaSeqs
     
     # Instantiate a SeqMap obj for each seq in seqs
     c = 0
     for k in seqs:
         c += 1
         assert c <= 250
         realT1 = time()
         self.seqMaps[k] = SeqMap(k, seqs[k], motifDict, thresh=thresh, window=window)
         realT2 = time()
         print '%.4f\t%s' % (realT2-realT1,c)
开发者ID:xguse,项目名称:gusPyProj,代码行数:23,代码来源:crmClasses.py

示例15: geneList2FastaDict

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def geneList2FastaDict(geneList, sourceFastaPath, hardMasked=True):
    """
    Returns a Dict of requested fasta recs in form SeqName:Sequence.
    Defaults to HardMasked return seqeunces.
    """
    
    sourceDict = Fasta.load(sourceFastaPath)
    
    # make new dict of all genes both in geneList AND sourceDict
    # new dict may be shorter than geneList!!!!!!
    
    newDict = {}
    for i in geneList:
        if sourceDict[i]:
            newDict[i] = sourceDict[i]
            
    print "%s genes names given, %s found." % (len(geneList), len(newDict))
    
    if hardMasked:
        softMaskDict2HardMask(newDict)
    
    return newDict
开发者ID:xguse,项目名称:gusPyProj,代码行数:24,代码来源:bioDefs.py


注:本文中的TAMO.seq.Fasta.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。