本文整理汇总了Python中TAMO.seq.Fasta.write方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.write方法的具体用法?Python Fasta.write怎么用?Python Fasta.write使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TAMO.seq.Fasta
的用法示例。
在下文中一共展示了Fasta.write方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SGDData
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
def SGDData():
root = TAMO.paths.SGDdir
urlroot = 'ftp://genome-ftp.stanford.edu/pub/yeast/data_download/'
files = ['chromosomal_feature/SGD_features.tab',
'chromosomal_feature/dbxref.tab',
'chromosomal_feature/chromosome_length.tab',
'sequence/GenBank/yeast_nrpep.fasta.gz',
'sequence/genomic_sequence/orf_protein/orf_trans_all.fasta.gz',
('http://yeastgfp.ucsf.edu/allOrfData.txt','Huh_Nature_2003.tab')
]
chrs = '01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 mt'.split()
files.extend( ['sequence/NCBI_genome_source/chr%s.fsa'%x for x in chrs] )
downloadfiles(root,urlroot,files)
from TAMO.seq import Fasta
print "Assembling yeast genome sequence files into a single file (NCBI_yeast_genome.fsa)"
D = {}
for chr in chrs:
_d = Fasta.load('%s/chr%s.fsa'%(TAMO.paths.SGDdir,chr))
id, seq = _d.items()[0]
if chr[0] == '0': chr = chr[1]
D['chr%s %s'%(chr,id)] = seq
Fasta.write(D, TAMO.paths.SGDdir + 'NCBI_yeast_genome.fsa')
示例2: genomebg
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
def genomebg(infile,outfile):
EXE = MDSCAN_DIR + 'genomebg.linux'
fsaD = Fasta.load(infile)
tmpfsa = tempfile.mktemp()
Fasta.write(fsaD,tmpfsa,linelen=1000000000)
CMD = '%s -i %s -o %s'%(EXE,tmpfsa,outfile)
FID = os.popen('( %s ;) 2>&1'%CMD,'r')
for line in FID.readlines(): print line
if FID.close(): print "Exited"
os.unlink(tmpfsa)
示例3: train_final
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
def train_final(self, model, fg, bg, N, beta):
input_seqs = []
for s in fg:
iseq = self.all_probes[s].upper()
iseq = re.sub(";","",iseq)
if (re.search("N", iseq)):
iseq = re.sub("N","",iseq)
if (len(iseq)>0): input_seqs.append(iseq)
if (self.refine):
final_motif = self.train_model(model, input_seqs, beta)
else:
final_motif = self.models[model]
train_pos = self.get_LLRs(final_motif, fg)
train_neg = self.get_LLRs(final_motif, bg)
over_sampled_positive = self.SMOTE([train_pos], N, N)[0]
#Train SVM to classify our training set
c_vals = [1.0e-10, 1.0e-4, 1.0e-3, 1.0e-2, 0.05, 0.1, 1.0, 10.0, 100.0]
best_classifier = None
best_err = 1.0
for c in c_vals:
classifier = self.SVM_train(over_sampled_positive, train_neg, c)
train_err = self.SVM_test(classifier, over_sampled_positive, train_neg)
if (train_err<best_err):
best_err = train_err
best_classifier = classifier
(train_err, fp, fn) = self.SVM_test(best_classifier, train_pos, [], 1)
if (self.dump):
motif = {}
no_motif = {}
for name, val in zip(fg,train_pos):
train_err = self.SVM_test(best_classifier, [val], [])
if (train_err):
no_motif[name] = self.all_probes[name]
else:
motif[name] = self.all_probes[name]
motif_fsa = self.motif_file.split('.')[0] + '.pos.fsa'
no_motif_fsa = self.motif_file.split('.')[0] + '.neg.fsa'
Fasta.write(motif, motif_fsa)
Fasta.write(no_motif, no_motif_fsa)
return((final_motif, best_classifier, fn))
示例4:
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
from TAMO.seq import Fasta
fasFile = '/Users/biggus/Documents/James/Writings_Talks/Grants/09_Feb/PrelimData_Grant_Feb09/Clus2_247genes.fas'
oFile1= '/Users/biggus/Documents/James/Writings_Talks/Grants/09_Feb/PrelimData_Grant_Feb09/Clus2_247genes.sample2.fas'
oFile2= '/Users/biggus/Documents/James/Writings_Talks/Grants/09_Feb/PrelimData_Grant_Feb09/Clus2_247genes.test2.fas'
firstDic, secDic = Fasta.random_split(fasFile,0.25)
Fasta.write(firstDic,oFile1)
Fasta.write(secDic,oFile2)
print 'done'
示例5:
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
oligoType = 'control' # 'match' or 'control'
assert oligoType == 'match' or 'control', 'oligoType MUST be only "match" or "control".'
# Load miRNA fastas into dict.
miRNAs = Fasta.file2dict(miRNAFile)
# Create new dict for seeds.
seeds = {}
# 1) Cycle through miRNA dict taking 7mers starting at pos 1
# and then pos2. Adapt key to reflect which.
# 2) Convert to all uppers and convert U's to T's
# 3) If oligoType == 'match', rvcmp each 7mer and adapt key
# to reflect which.
for miRNA in miRNAs:
pos1_seed = miRNAs[miRNA][:7].upper().replace('U','T')
pos2_seed = miRNAs[miRNA][1:8].upper().replace('U','T')
if oligoType == 'match':
seeds[miRNA+'_match_pos1'] = bioDefs.revComp(pos1_seed)
seeds[miRNA+'_match_pos2'] = bioDefs.revComp(pos2_seed)
else:
seeds[miRNA+'_ctrl_pos1'] = pos1_seed
seeds[miRNA+'_ctrl_pos2'] = pos2_seed
# Write out seed dict as fasta.
Fasta.write(seeds,seedFile)
print "Done."
示例6: mkdirp
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
mkdirp(opts.out_dir)
for i in range(len(randClusterLists)):
oFileName = args[0].replace('.txt','randomGeneNames_%s.txt' % (i)).split('/')[-1]
oFile = open('%s/%s' % (opts.out_dir,oFileName), 'w')
for name in randClusterLists[i]:
oFile.write(name+'\n')
oFile.close()
# --- If Asked, Create Fastas ---
if opts.make_fasta:
fNames = map(lambda l: l.strip(),open('%s/%s' % (opts.out_dir,oFileName), 'rU').readlines())
fastas = {}
for f in fNames:
fastas[f] = totalSeqs[f]
Fasta.write(fastas,'%s/%s' % (opts.out_dir,oFileName.replace('.txt','.fas')))
del(oFile)
print 'Done.'
print "Original list:"
origLens = []
for g in geneNames:
origLens.append(len(totalSeqs[g]))
origLens.sort()
print origLens
print 'Avg: %.3f' % (average(origLens))
print "Randomized lists:"
示例7: seq_msp
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
def seq_msp(fafile,seqfile,genome='mm9',convert=True,bedFrag=False):
start=-3
hang='NNN'
match=[]
#find CCGG positions using Fasta file
fa=open(fafile)
for line in fa:
l=line.strip('\n')
if l[0]=='>':
ch=l[1:]
continue
if l=='NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN':
start+=len(l)
hang=l[-3:]
continue
else:
seq=hang+l
mers=[seq[x:(x+4)] for x in range(len(seq)-4)]
for i,m in enumerate(mers):
if m=='ccgg': match.append(start+i)
hang=seq[-3:]
start+=len(l)
print len(match)
fa.close()
FRAG=[]
#find cut sites 40-220bp and save as tuple
for x,y in zip(match[:-1],match[1:]):
d=y-x
if d>40 and d<250: FRAG.append((x,y))
print len(FRAG)
#nibDB the cut sites 40bp 5'-3' and
#save each as a pair of Fasta items with keys chr:position(strand)
seq_dict={}
ids,loci=[],[]
BF=[]
for x,y in FRAG:
if bedFrag: BF.append([ch,str(x+1),str(y+3)])
#for x
start=x+1
stop=x+41
key=ch+':'+str(start)+'+'
loc=(ch,start,stop,'+')
ids.append(key)
loci.append(loc)
#for y
start=y-37
stop=y+3
key=ch+':'+str(stop)+'-'
loc=(ch,start,stop,'-')
ids.append(key)
loci.append(loc)
if bedFrag: np.savetxt(seqfile.replace('.fa','_frag.bed'),BF,fmt='%s',delimiter='\t')
if genome=='hg18': DB=NibDB(nib_dirs='/nfs/genomes/human_gp_mar_06/')
else: DB=NibDB(nib_dirs=chipsequtil.get_org_settings('mm9')['genome_dir'])
fa_ids,seqs=DB.get_fasta_batch(loci)
for id,seq in zip(ids,seqs):
if convert: biseq=seq.replace('c','t')
else: biseq=seq
if id[-1]=='+':
seq_dict[id]=biseq
else:
#seq_dict[id]=seq[::-1]
seq_dict[id]=biseq
Fasta.write(seq_dict,seqfile)
示例8:
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
# Instantiate the fasta rec lists
originalFastaDict = Fasta.load(originalFastaDict)
# New dict to catch copied seqObjs
desiredFastaDict = {}
for rec in desiredFastaList:
if originalFastaDict.has_key(rec):
desiredFastaDict[rec] = originalFastaDict[rec]
else:
print rec+' not found in source fasta list!'
# Hard Mask if requested
if hardMask:
for x in desiredFastaDict:
desiredFastaDict[x] = desiredFastaDict[x].replace('a','N')
desiredFastaDict[x] = desiredFastaDict[x].replace('c','N')
desiredFastaDict[x] = desiredFastaDict[x].replace('g','N')
desiredFastaDict[x] = desiredFastaDict[x].replace('t','N')
else:
for x in desiredFastaDict:
desiredFastaDict[x] = desiredFastaDict[x].upper() # make sure all letters are uppercase for downstream compatibility
# Write selected recs to outFile
Fasta.write(desiredFastaDict, outFile)
print "Done."
示例9: time
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
from gusPyCode.MDAP_proj.MDAP_defs import shuffleSeqDict
from TAMO.seq import Fasta
from gusPyCode.defs.bioDefs import softMaskDict2HardMask
from time import time
from gusPyCode.defs.mosqData import promoterSeqPaths
# User Variables:
inFile = promoterSeqPaths.Aa_2000bpUp_softMasked
outFile = '/Users/biggus/Documents/James/Data/2KB/2kb_Sequence/2kb_Aedes/aedes2KBupStreamTSS.UnMasked.geneStrand.shuffledSeqs.1.fas'
hardMask = None
d = Fasta.load(inFile)
#d = {1:'AACTGCANACTGACNNNACTGATGNNN'}
if not hardMask:
for x in d:
d[x] = d[x].upper()
t1 = time()
sD = shuffleSeqDict(d)
t2 = time()
Fasta.write(sD,outFile)
print 'Shuffling took %.2f min.' % ((float(t2)-t1)/60)
示例10: len
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import write [as 别名]
import sys
import time
from TAMO.seq import Fasta
"""Takes a fastaFilePath and a fraction between 0 and 1. Returns two fasta
files containing random sequences from fastaFilePath split randomly into files
of size 'fraction' and 1-'fraction'. Files named as: fastaFilePath_fraction.Date_Time.fas"""
assert len(sys.argv[1:]) == 2, \
'usage = %s fastaFilePath fraction<0 to 1>' % (sys.argv[0].split('/')[-1])
assert float(sys.argv[2]) <= 1 and float(sys.argv[2]) >= 0, \
'usage = %s fastaFilePath fraction<0 to 1>' % (sys.argv[0].split('/')[-1])
filePath = sys.argv[1]
frac = float(sys.argv[2])
versionID = time.ctime().split(' ')
versionID = '%s%s_%s' % (versionID[1],versionID[2],versionID[3].replace(':','-'))
dict1,dict2 = Fasta.random_split(filePath,frac)
out1 = '%s_%s_%s.fas' % (filePath.split('/')[-1].rstrip('.fas'),frac,versionID)
out2 = '%s_%s_%s.fas' % (filePath.split('/')[-1].rstrip('.fas'),1-frac,versionID)
Fasta.write(dict1,out1,linelen=100)
Fasta.write(dict2,out2,linelen=100)