本文整理汇总了Python中TAMO.seq.Fasta类的典型用法代码示例。如果您正苦于以下问题:Python Fasta类的具体用法?Python Fasta怎么用?Python Fasta使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Fasta类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SGDData
def SGDData():
root = TAMO.paths.SGDdir
urlroot = 'ftp://genome-ftp.stanford.edu/pub/yeast/data_download/'
files = ['chromosomal_feature/SGD_features.tab',
'chromosomal_feature/dbxref.tab',
'chromosomal_feature/chromosome_length.tab',
'sequence/GenBank/yeast_nrpep.fasta.gz',
'sequence/genomic_sequence/orf_protein/orf_trans_all.fasta.gz',
('http://yeastgfp.ucsf.edu/allOrfData.txt','Huh_Nature_2003.tab')
]
chrs = '01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 mt'.split()
files.extend( ['sequence/NCBI_genome_source/chr%s.fsa'%x for x in chrs] )
downloadfiles(root,urlroot,files)
from TAMO.seq import Fasta
print "Assembling yeast genome sequence files into a single file (NCBI_yeast_genome.fsa)"
D = {}
for chr in chrs:
_d = Fasta.load('%s/chr%s.fsa'%(TAMO.paths.SGDdir,chr))
id, seq = _d.items()[0]
if chr[0] == '0': chr = chr[1]
D['chr%s %s'%(chr,id)] = seq
Fasta.write(D, TAMO.paths.SGDdir + 'NCBI_yeast_genome.fsa')
示例2: genomebg
def genomebg(infile,outfile):
EXE = MDSCAN_DIR + 'genomebg.linux'
fsaD = Fasta.load(infile)
tmpfsa = tempfile.mktemp()
Fasta.write(fsaD,tmpfsa,linelen=1000000000)
CMD = '%s -i %s -o %s'%(EXE,tmpfsa,outfile)
FID = os.popen('( %s ;) 2>&1'%CMD,'r')
for line in FID.readlines(): print line
if FID.close(): print "Exited"
os.unlink(tmpfsa)
示例3: memefiles2tamo
def memefiles2tamo(files, tamoname):
global probefile, PROBESET, fsafile
motifs = []
for filename in files:
print ">>>SDFSD>F ",filename
if re.search('\.ace$',filename):
mdobject = AlignAce.AlignAce(filename)
if not mdobject.fastafile: mdobject.fastafile=filename.replace('.ace','.fsa')
elif re.search('\.meme.*$',filename):
mdobject = Meme.Meme(filename)
if not mdobject.fastafile:
mdobject.fastafile=re.sub('\..\.meme','.meme',filename).replace('.meme','.fsa')
motifs.extend(mdobject.motifs)
#fsaname = find_fsa(mdobject.fastafile)
print mdobject.fastafile
if fsafile: fsaname = fsafile
else: fsaname = Fasta.find(mdobject.fastafile)
fsaD = Fasta.load(fsaname)
probes = fsaD.keys()
if not probefile:
PROBESET = MotifMetrics.ProbeSet('YEAST')
#PROBESET= pick_genome(fsaname)
for key,seq in fsaD.items():
PROBESET.probes[key] = seq
for motif in motifs:
if motif.pvalue == 1: motif.pvalue = PROBESET.p_value(motif,probes,'v')
if motif.church == 1: motif.church = PROBESET.church(motif,probes,'v')
#if motif.E_site == None: motif.E_site = PROBESET.E_sitef(motif,probes,3,'v')
#if motif.E_chi2 == None: motif.E_chi2 = PROBESET.E_chi2(motif,probes,None,'v')
#if motif.E_seq == None: motif.E_seq = PROBESET.E_seq(motif,probes,'v')
if motif.ROC_auc== None: motif.ROC_auc= PROBESET.ROC_AUC(motif,probes,'v')
#if motif.MNCP == None: motif.MNCP = PROBESET.MNCP(motif,probes,'v')
if motif.frac == None: motif.frac = PROBESET.frac(motif,probes,'v',0.7)
if re.search('\.meme$',filename):
motif.MAP = -math.log(motif.evalue)/math.log(10)
if 0 and (motif.CRA == None):
try:
pass
CRA, Cfrac = PROBESET.cons_ROC_AUC(motif,probes,'v',tuple='YES')
motif.CRA = CRA
motif.Cfrac = Cfrac
except: pass
if re.search('\.meme$',filename):
mdobject.motifs.sort(lambda x,y: cmp(x.pvalue, y.pvalue))
else:
mdobject.motifs.sort(lambda x,y: cmp(x.church, y.church))
MotifTools.save_motifs(motifs,tamoname)
示例4: calcStats
def calcStats(fastaPath):
seqFile = Fasta.load(fastaPath)
combinedSeq = ''
for each in seqFile:
combinedSeq += seqFile[each]
combinedSeq= combinedSeq.upper()
seqs = len(seqFile)
totNucs = len(combinedSeq)
aCnt = combinedSeq.count('A')
cCnt = combinedSeq.count('C')
gCnt = combinedSeq.count('G')
tCnt = combinedSeq.count('T')
nCnt = combinedSeq.count('N')
nonNs = aCnt+cCnt+gCnt+tCnt
n2tot = float(nCnt)/len(combinedSeq)
n2nonN = float(nCnt)/nonNs
percentGC = (float(gCnt)+cCnt)/nonNs
return {'seqLen':seqs,
'totNucs':totNucs,
'aCnt':aCnt,
'cCnt':cCnt,
'gCnt':gCnt,
'tCnt':tCnt,
'nCnt':nCnt,
'nonNs':nonNs,
'n2tot':n2tot,
'n2nonN':n2nonN,
'percentGC':percentGC}
示例5: info2seeds
def info2seeds(N,infofile,probefile,species='YEAST'):
G = ProbeSet(species)
IDs = G.ids_from_file(probefile)
Q = EM.theMarkovBackground.zeroth()
seqs = Fasta.seqs(infofile)
if not N:
nmers = seqs
else:
nmers= MotifTools.top_nmers(N,seqs)
if len(nmers) > 1000: nmers = nmers[0:1000]
print "Scoring enrichment of %d nmers from %s"%len(nmers,infofile)
sys.stdout.flush()
nmers_scoresT = []
for nmer in nmers:
if nmer.isalpha():
p = G.p_value(nmer,IDs,'') #'verbose'
nmers_scoresT.append((nmer,p))
nmers_scoresT.sort(lambda x,y: cmp(x[1],y[1]))
last = min(20,len(nmers_scoresT))
models = []
for i in range(last):
seq = nmers_scoresT[i][0]
m = MotifTools.Motif('',Q)
m.compute_from_text(seq,0.1)
models.append(m)
for tup in nmers_scoresT[0:40]:
print tup
return(models)
示例6: main
def main():
seqsD = Fasta.load(sys.argv[1])
seqs = seqsD.values()
for w in range(1,7):
allnmers = permute(w)
nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
nmersD = {}
total = 0
for nmer in allnmers:
nmersD[nmer] = 1 #Pseudo count
total = total + 1
for nmer,count in nmersT[:]:
try:
rc = MotifTools.revcomplement(nmer)
nmersD[nmer] = nmersD[nmer] + count
nmersD[rc] = nmersD[rc] + count
total = total + 2*count
except KeyError:
pass
_t = nmersD.keys()
_t.sort()
print "# freq in %s (total %d with pseudocounts)"%(sys.argv[1],total)
for nmer in _t:
print "%-7s %20.17f"%(nmer,float(nmersD[nmer]) / total)
sys.stdout.flush()
示例7: main
def main(fastafile, outDirectory): # !! 1/2/09 AD added 'fastafile' var and changed 'if __name__' as way to call this from script.
seqsD = Fasta.load(fastafile)
seqs = seqsD.values()
output = []
for w in range(1,7):
allnmers = permute(w)
nmersT = MotifTools.top_nmers(w,seqs,'with counts','purge Ns')
nmersD = {}
total = 0
for nmer in allnmers:
nmersD[nmer] = 1 #Pseudo count
total = total + 1
for nmer,count in nmersT[:]:
try:
rc = MotifTools.revcomplement(nmer)
nmersD[nmer] = nmersD[nmer] + count
nmersD[rc] = nmersD[rc] + count
total = total + 2*count
except KeyError:
pass
_t = nmersD.keys()
_t.sort()
output.append("# freq in %s (total %d with pseudocounts)\n"%(fastafile.split('/')[-1],total)) # AD 02-27-09 added a '\n' to make file look right
for nmer in _t:
output.append( "%-7s %20.17f\n"%(nmer,float(nmersD[nmer]) / total)) # AD 02-27-09 added a '\n' to make file look right
# open output file and write out results
outFile = '%s/%s.freq' % (outDirectory, fastafile.split('/')[-1])
outFile = open(outFile, 'w')
for index in output:
outFile.write(index)
示例8: loadMiRNAs
def loadMiRNAs(miRNA_Path):
"""
Takes fasta file of mature miRNAs.
Returns dict.
"""
return Fasta.load(miRNA_Path)
示例9: orf2pseq
def orf2pseq(orf):
global _orfpseqs
if not _orfpseqs:
from TAMO.seq import Fasta
_orfpseqs = Fasta.load(_ORFPSEQS)
for _orf,pseq in _orfpseqs.items():
if pseq[-1] == '*': _orfpseqs[_orf] = pseq[:-1]
if _orfpseqs.has_key(orf): return _orfpseqs[orf]
else: return ''
示例10: train_final
def train_final(self, model, fg, bg, N, beta):
input_seqs = []
for s in fg:
iseq = self.all_probes[s].upper()
iseq = re.sub(";","",iseq)
if (re.search("N", iseq)):
iseq = re.sub("N","",iseq)
if (len(iseq)>0): input_seqs.append(iseq)
if (self.refine):
final_motif = self.train_model(model, input_seqs, beta)
else:
final_motif = self.models[model]
train_pos = self.get_LLRs(final_motif, fg)
train_neg = self.get_LLRs(final_motif, bg)
over_sampled_positive = self.SMOTE([train_pos], N, N)[0]
#Train SVM to classify our training set
c_vals = [1.0e-10, 1.0e-4, 1.0e-3, 1.0e-2, 0.05, 0.1, 1.0, 10.0, 100.0]
best_classifier = None
best_err = 1.0
for c in c_vals:
classifier = self.SVM_train(over_sampled_positive, train_neg, c)
train_err = self.SVM_test(classifier, over_sampled_positive, train_neg)
if (train_err<best_err):
best_err = train_err
best_classifier = classifier
(train_err, fp, fn) = self.SVM_test(best_classifier, train_pos, [], 1)
if (self.dump):
motif = {}
no_motif = {}
for name, val in zip(fg,train_pos):
train_err = self.SVM_test(best_classifier, [val], [])
if (train_err):
no_motif[name] = self.all_probes[name]
else:
motif[name] = self.all_probes[name]
motif_fsa = self.motif_file.split('.')[0] + '.pos.fsa'
no_motif_fsa = self.motif_file.split('.')[0] + '.neg.fsa'
Fasta.write(motif, motif_fsa)
Fasta.write(no_motif, no_motif_fsa)
return((final_motif, best_classifier, fn))
示例11: loadSeqs
def loadSeqs(fastaPathList):
"""
Takes list of paths. Returns single dict full of seqs found in the files.
Converts softMasking to hard.
"""
rDict = {}
for path in fastaPathList:
rDict.update(Fasta.load(path))
bioDefs.softMaskDict2HardMask(rDict)
return rDict
示例12: get_seq
def get_seq(chr,start=None,stop=None):
global ChrD
if not ChrD:
from TAMO.seq import Fasta
ChrD = Fasta.load(SGDdir + 'NCBI_yeast_genome.fsa')
if (type(chr) != type('')) or (chr.find('chr') != 0): # 1 -> chr1, 'X' -> chrX
chr = 'chr%s'%chr
if (start == None) and chr.find(':') > 0: # chr4:454-465 -> chr4, 454, 465
_chr,_range = chr.split(':')
chr = _chr
start, end = _range.split('-')
start, end = int(start), int(end)
return ChrD[chr][start-1:end]
示例13: swp2swp
def swp2swp(swp):
'Converts, when possible, from P014543 to ADR1_YEAST'
'Only works for yeast right now'
global _swp2swp
if not _swp2swp:
lines = Fasta.keys(_SWPFASTA,key_func=lambda x:x)
for line in lines:
toks = line.split()
text_name = toks[1]
numeric_name = toks[2]
if text_name[0:2] == 'SW' and numeric_name[0] == 'P':
_swp2swp[text_name[3:]] = numeric_name
_swp2swp[numeric_name] = text_name[3:]
if _swp2swp.has_key(swp):
return _swp2swp[swp]
示例14: go
def go(self):
"""Execution function: runs TAMO.MD.Meme.Meme and catches the output in self.output for access from MDAP."""
import time
# write a temp fasta file of coregulated seqs to use as input to Meme(file=TempFasta)
ctimeStr = time.ctime().replace(' ','_')
fileName = 'tempFastaOfCoRegSeqs.MDAP.%s.fas' %(ctimeStr)
tFasta = open(fileName, 'w')
tFastaTxt = Fasta.text(self.coRegSeqs[0])
tFasta.write(tFastaTxt)
# Call TAMO to do its thing:
self.output = Meme(file=fileName, width='', extra_args=self.extra_args, bfile=self.bfile)
# delete temp file
os.remove(fileName)
示例15: LoadDNA
def LoadDNA(verbose=False):
###############################################################################
#
# Read DNA seqeuence
# Extract sub-sequence to model
# Define rules for DNA
#
###############################################################################
START_POS = 0
dna = ""
fastafile = params.GetString(DNA_section,"FILE")
if (fastafile):
chromo = params.GetString(DNA_section,"CHR")
chr_start = params.GetInt(DNA_section,"START")
chr_end = params.GetInt(DNA_section,"END")
if (not chr_end):
chr_end = params.GetInt(DNA_section,"LENGTH")
chr_end += chr_start
if verbose:
print ("Loading fasta: [%s]\n"%fastafile)
seqs = Fasta.load(fastafile)
seqkeys = seqs.keys()
seqkeys.sort()
n = 0
for chr in seqkeys:
n += len(seqs[chr])
if verbose:
print("Genome length = %d, # chromosomes = %d\n"%(n, len(seqkeys)))
if (seqs.has_key(chromo)):
seq = seqs[chromo]
if verbose:
print("Chr[%s] = %d nt\n"%(chromo,len(seq)))
dna = seq[chr_start:chr_end]
if verbose:
print("DNA[%d:%d] = %d nt\n"%(chr_start,chr_end,len(dna)))
else:
if verbose:
print("Cannot find [%s] chromosome in %s\n"%(chromo, filename))
if (verbose):
print("DNA:[%s]\n"%dna)
return dna