本文整理汇总了Python中TAMO.seq.Fasta.load方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.load方法的具体用法?Python Fasta.load怎么用?Python Fasta.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TAMO.seq.Fasta
的用法示例。
在下文中一共展示了Fasta.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: loadMiRNAs
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def loadMiRNAs(miRNA_Path):
"""
Takes fasta file of mature miRNAs.
Returns dict.
"""
return Fasta.load(miRNA_Path)
示例2: main
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def main():
seqsD = Fasta.load(sys.argv[1])
seqs = seqsD.values()
for w in range(1,7):
allnmers = permute(w)
nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
nmersD = {}
total = 0
for nmer in allnmers:
nmersD[nmer] = 1 #Pseudo count
total = total + 1
for nmer,count in nmersT[:]:
try:
rc = MotifTools.revcomplement(nmer)
nmersD[nmer] = nmersD[nmer] + count
nmersD[rc] = nmersD[rc] + count
total = total + 2*count
except KeyError:
pass
_t = nmersD.keys()
_t.sort()
print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
for nmer in _t:
print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
sys.stdout.flush()
示例3: SGDData
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def SGDData():
root = TAMO.paths.SGDdir
urlroot = 'ftp://genome-ftp.stanford.edu/pub/yeast/data_download/'
files = ['chromosomal_feature/SGD_features.tab',
'chromosomal_feature/dbxref.tab',
'chromosomal_feature/chromosome_length.tab',
'sequence/GenBank/yeast_nrpep.fasta.gz',
'sequence/genomic_sequence/orf_protein/orf_trans_all.fasta.gz',
('http://yeastgfp.ucsf.edu/allOrfData.txt','Huh_Nature_2003.tab')
]
chrs = '01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 mt'.split()
files.extend( ['sequence/NCBI_genome_source/chr%s.fsa'%x for x in chrs] )
downloadfiles(root,urlroot,files)
from TAMO.seq import Fasta
print "Assembling yeast genome sequence files into a single file (NCBI_yeast_genome.fsa)"
D = {}
for chr in chrs:
_d = Fasta.load('%s/chr%s.fsa'%(TAMO.paths.SGDdir,chr))
id, seq = _d.items()[0]
if chr[0] == '0': chr = chr[1]
D['chr%s %s'%(chr,id)] = seq
Fasta.write(D, TAMO.paths.SGDdir + 'NCBI_yeast_genome.fsa')
示例4: calcStats
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def calcStats(fastaPath):
seqFile = Fasta.load(fastaPath)
combinedSeq = ''
for each in seqFile:
combinedSeq += seqFile[each]
combinedSeq= combinedSeq.upper()
seqs = len(seqFile)
totNucs = len(combinedSeq)
aCnt = combinedSeq.count('A')
cCnt = combinedSeq.count('C')
gCnt = combinedSeq.count('G')
tCnt = combinedSeq.count('T')
nCnt = combinedSeq.count('N')
nonNs = aCnt+cCnt+gCnt+tCnt
n2tot = float(nCnt)/len(combinedSeq)
n2nonN = float(nCnt)/nonNs
percentGC = (float(gCnt)+cCnt)/nonNs
return {'seqLen':seqs,
'totNucs':totNucs,
'aCnt':aCnt,
'cCnt':cCnt,
'gCnt':gCnt,
'tCnt':tCnt,
'nCnt':nCnt,
'nonNs':nonNs,
'n2tot':n2tot,
'n2nonN':n2nonN,
'percentGC':percentGC}
示例5: main
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def main(fastafile, outDirectory): # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
seqsD = Fasta.load(fastafile)
seqs = seqsD.values()
output = []
for w in range(1,7):
allnmers = permute(w)
nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
nmersD = {}
total = 0
for nmer in allnmers:
nmersD[nmer] = 1 #Pseudo count
total = total + 1
for nmer,count in nmersT[:]:
try:
rc = MotifTools.revcomplement(nmer)
nmersD[nmer] = nmersD[nmer] + count
nmersD[rc] = nmersD[rc] + count
total = total + 2*count
except KeyError:
pass
_t = nmersD.keys()
_t.sort()
output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total)) # AD 02-27-09 added a '\n' to make file look right
for nmer in _t:
output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total)) # AD 02-27-09 added a '\n' to make file look right
# open output file and write out results
outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
outFile = open(outFile, 'w')
for index in output:
outFile.write(index)
示例6: orf2pseq
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def orf2pseq(orf):
global _orfpseqs
if not _orfpseqs:
from TAMO.seq import Fasta
_orfpseqs = Fasta.load(_ORFPSEQS)
for _orf,pseq in _orfpseqs.items():
if pseq[-1] == '*': _orfpseqs[_orf] = pseq[:-1]
if _orfpseqs.has_key(orf): return _orfpseqs[orf]
else: return ''
示例7: genomebg
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def genomebg(infile,outfile):
EXE = MDSCAN_DIR + 'genomebg.linux'
fsaD = Fasta.load(infile)
tmpfsa = tempfile.mktemp()
Fasta.write(fsaD,tmpfsa,linelen=1000000000)
CMD = '%s -i %s -o %s'%(EXE,tmpfsa,outfile)
FID = os.popen('( %s ;) 2>&1'%CMD,'r')
for line in FID.readlines(): print line
if FID.close(): print "Exited"
os.unlink(tmpfsa)
示例8: __init__
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def __init__(self, fg_file, bg_file, cv_level, markov_file):
self.cv_level = cv_level
self.randomize = 0
self.beta = 0.0
self.delta = 0.001
self.refine = 1
self.motif_file = 'dummy.out'
self.dump = 0
self.family = ''
self.datafiles = (fg_file,bg_file)
MAX_FG = 2000
#LOAD MARKOV BACKGROUND#
print "Loading Markov background file from %s"%markov_file
EM.loadMarkovBackground(markov_file)
##################################################################################
#divide input sequences into groups according to the desired cross-validation level
###################################################################################
print "Processing input sequences...."
self.fg_seqs = Fasta.load(fg_file) #load foreground sequences
for key in self.fg_seqs.keys():
fseq = self.fg_seqs[key]
self.fg_seqs[key] = fseq.split()[0]
self.all_probes = Fasta.load(bg_file) #load background sequences
Fasta.delN(self.fg_seqs)
Fasta.delN(self.all_probes)
#first delete any sequences from background that are present in foreground
for key in self.fg_seqs.keys():
if (self.all_probes.has_key(key)):
del self.all_probes[key]
for key in self.all_probes.keys():
if ((len(self.all_probes[key])==0) or (re.search('[SWMKRY]', self.all_probes[key]))):
del self.all_probes[key]
print "deleting %s"%key
while (len(self.fg_seqs.keys())>MAX_FG):
del self.fg_seqs[self.fg_seqs.keys()[random.randint(0,(len(self.fg_seqs.keys())-1))]]
示例9: loadSeqs
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def loadSeqs(fastaPathList):
"""
Takes list of paths. Returns single dict full of seqs found in the files.
Converts softMasking to hard.
"""
rDict = {}
for path in fastaPathList:
rDict.update(Fasta.load(path))
bioDefs.softMaskDict2HardMask(rDict)
return rDict
示例10: memefiles2tamo
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def memefiles2tamo(files, tamoname):
global probefile, PROBESET, fsafile
motifs = []
for filename in files:
print ">>>SDFSD>F ",filename
if re.search('\.ace$',filename):
mdobject = AlignAce.AlignAce(filename)
if not mdobject.fastafile: mdobject.fastafile=filename.replace('.ace','.fsa')
elif re.search('\.meme.*$',filename):
mdobject = Meme.Meme(filename)
if not mdobject.fastafile:
mdobject.fastafile=re.sub('\..\.meme','.meme',filename).replace('.meme','.fsa')
motifs.extend(mdobject.motifs)
#fsaname = find_fsa(mdobject.fastafile)
print mdobject.fastafile
if fsafile: fsaname = fsafile
else: fsaname = Fasta.find(mdobject.fastafile)
fsaD = Fasta.load(fsaname)
probes = fsaD.keys()
if not probefile:
PROBESET = MotifMetrics.ProbeSet('YEAST')
#PROBESET= pick_genome(fsaname)
for key,seq in fsaD.items():
PROBESET.probes[key] = seq
for motif in motifs:
if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
#if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
#if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
#if motif.E_seq == None: motif.E_seq = PROBESET.E_seq(motif,probes,'v')
if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
#if motif.MNCP == None: motif.MNCP = PROBESET.MNCP(motif,probes,'v')
if motif.frac == None: motif.frac = PROBESET.frac(motif,probes,'v',0.7)
if re.search('\.meme$',filename):
motif.MAP = -math.log(motif.evalue)/math.log(10)
if 0 and (motif.CRA == None):
try:
pass
CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,probes,'v',tuple='YES')
motif.CRA = CRA
motif.Cfrac = Cfrac
except: pass
if re.search('\.meme$',filename):
mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
else:
mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))
MotifTools.save_motifs(motifs,tamoname)
示例11: get_seq
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def get_seq(chr,start=None,stop=None):
global ChrD
if not ChrD:
from TAMO.seq import Fasta
ChrD = Fasta.load(SGDdir + 'NCBI_yeast_genome.fsa')
if (type(chr) != type('')) or (chr.find('chr') != 0): # 1 -> chr1, 'X' -> chrX
chr = 'chr%s'%chr
if (start == None) and chr.find(':') > 0: # chr4:454-465 -> chr4, 454, 465
_chr,_range = chr.split(':')
chr = _chr
start, end = _range.split('-')
start, end = int(start), int(end)
return ChrD[chr][start-1:end]
示例12: LoadDNA
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def LoadDNA(verbose=False):
###############################################################################
#
# Read DNA seqeuence
# Extract sub-sequence to model
# Define rules for DNA
#
###############################################################################
START_POS = 0
dna = ""
fastafile = params.GetString(DNA_section,"FILE")
if (fastafile):
chromo = params.GetString(DNA_section,"CHR")
chr_start = params.GetInt(DNA_section,"START")
chr_end = params.GetInt(DNA_section,"END")
if (not chr_end):
chr_end = params.GetInt(DNA_section,"LENGTH")
chr_end += chr_start
if verbose:
print ("Loading fasta: [%s]\n"%fastafile)
seqs = Fasta.load(fastafile)
seqkeys = seqs.keys()
seqkeys.sort()
n = 0
for chr in seqkeys:
n += len(seqs[chr])
if verbose:
print("Genome length = %d, # chromosomes = %d\n"%(n, len(seqkeys)))
if (seqs.has_key(chromo)):
seq = seqs[chromo]
if verbose:
print("Chr[%s] = %d nt\n"%(chromo,len(seq)))
dna = seq[chr_start:chr_end]
if verbose:
print("DNA[%d:%d] = %d nt\n"%(chr_start,chr_end,len(dna)))
else:
if verbose:
print("Cannot find [%s] chromosome in %s\n"%(chromo, filename))
if (verbose):
print("DNA:[%s]\n"%dna)
return dna
示例13: swp_find_and_format
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def swp_find_and_format(swp):
global _swp_seqs
if not _swp_seqs:
_swp_seqs = Fasta.load(_SWPFASTA,key_func=lambda x:x)
hits = []
for key in _swp_seqs.keys():
if key[0:60].find(swp) >= 0:
hits.append(key)
if not hits:
return None
if len(hits) > 1:
print "# Multiple matches found for %s:"%swp
for hit in hits: print '#',hit
return None
hit = hits[0]
seq = _swp_seqs[hit]
txt = ''
for i in range(0,len(seq),70):
txt = txt + seq[i:i+70] + '\n'
return txt
示例14: __init__
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def __init__(self,fastaSeqs, motifDict, thresh=0.5,window=200):
self.seqMaps = {}
# Get seqs from fasta
assert type(fastaSeqs) == type('string') or type(fastaSeqs) == type({}),\
'MapLib arg(fastaSeqs) must be string pointing to file or a seqDict.'
if type(fastaSeqs) == type('string'):
seqs = Fasta.load(fastaSeqs)
elif type(fastaSeqs) == type({}):
seqs = fastaSeqs
# Instantiate a SeqMap obj for each seq in seqs
c = 0
for k in seqs:
c += 1
assert c <= 250
realT1 = time()
self.seqMaps[k] = SeqMap(k, seqs[k], motifDict, thresh=thresh, window=window)
realT2 = time()
print '%.4f\t%s' % (realT2-realT1,c)
示例15: geneList2FastaDict
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import load [as 别名]
def geneList2FastaDict(geneList, sourceFastaPath, hardMasked=True):
"""
Returns a Dict of requested fasta recs in form SeqName:Sequence.
Defaults to HardMasked return seqeunces.
"""
sourceDict = Fasta.load(sourceFastaPath)
# make new dict of all genes both in geneList AND sourceDict
# new dict may be shorter than geneList!!!!!!
newDict = {}
for i in geneList:
if sourceDict[i]:
newDict[i] = sourceDict[i]
print "%s genes names given, %s found." % (len(geneList), len(newDict))
if hardMasked:
softMaskDict2HardMask(newDict)
return newDict