本文整理汇总了Python中TAMO.seq.Fasta.file2dict方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.file2dict方法的具体用法?Python Fasta.file2dict怎么用?Python Fasta.file2dict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类TAMO.seq.Fasta
的用法示例。
在下文中一共展示了Fasta.file2dict方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: spawnOrthoGroups
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
def spawnOrthoGroups(promoterFileList,nWayOrthoList):
"""Takes promoterFileList<listOfPaths> and nWayOrthoList<listOfLists> and spawns the orthoGroup
objects in a dictionary with keys = 'geneName1:geneName2:etc' that will be used to run the combined
hypergeometric analysis."""
# validation
assert type(promoterFileList) == type([]), \
'''promoterFileList must be a list of file paths.
You provided type: "%s"'''\
% (type(promoterFileList))
assert type(promoterFileList[0]) == type(''), \
'''promoterFileList must be a list of file paths.
promoterFileList[0] != type(''): "%s"'''\
% (type(promoterFileList[0]))
# load promoters
allPromoters = {}
for i in range(len(promoterFileList)):
oneGenome = Fasta.file2dict(promoterFileList[i])
for j in oneGenome:
allKeys = allPromoters.keys()
assert j not in allKeys, \
'''Detected duplicate gene name in promoterFileList! "%s"'''\
% (j)
allPromoters[j] = oneGenome[j]
# Build Groups
orthoGroups = {}
for i in range(len(nWayOrthoList)):
groupDict = {}
for j in range(len(nWayOrthoList[i])):
if allPromoters[nWayOrthoList[i][j]]:
groupDict[nWayOrthoList[i][j]] = allPromoters[nWayOrthoList[i][j]]
else:
break # we do not want orthoGroups that are missing members
if len(groupDict) != len(nWayOrthoList[i]):
break # we do not want orthoGroups that are missing members
else:
nWayOrthoList[i].sort()
orthoGroups[':'.join(nWayOrthoList[i])] = OrthoGroup(groupDict)
return orthoGroups
示例2:
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.seq import Fasta
from gusPyCode.defs import bioDefs
miRNAFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.fa'
seedFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'
oligoType = 'control' # 'match' or 'control'
assert oligoType == 'match' or 'control', 'oligoType MUST be only "match" or "control".'
# Load miRNA fastas into dict.
miRNAs = Fasta.file2dict(miRNAFile)
# Create new dict for seeds.
seeds = {}
# 1) Cycle through miRNA dict taking 7mers starting at pos 1
# and then pos2. Adapt key to reflect which.
# 2) Convert to all uppers and convert U's to T's
# 3) If oligoType == 'match', rvcmp each 7mer and adapt key
# to reflect which.
for miRNA in miRNAs:
pos1_seed = miRNAs[miRNA][:7].upper().replace('U','T')
pos2_seed = miRNAs[miRNA][1:8].upper().replace('U','T')
if oligoType == 'match':
seeds[miRNA+'_match_pos1'] = bioDefs.revComp(pos1_seed)
seeds[miRNA+'_match_pos2'] = bioDefs.revComp(pos2_seed)
else:
seeds[miRNA+'_ctrl_pos1'] = pos1_seed
seeds[miRNA+'_ctrl_pos2'] = pos2_seed
示例3: ifKmerInAll
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.MotifTools import top_nmers,Motif
from TAMO import MotifTools
from TAMO.seq import Fasta
from gusPyCode.defs.bioDefs import ifKmerInAll
seqFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.fas'
outFile = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.8mersInAll.txt'
kmerSize = 8
scoreThresh = 0.999999
seqs = Fasta.file2dict(seqFile)
# create new dict to store the seqs' kmers
seqsKmers = {}
for i in seqs:
seqsKmers[i] = top_nmers(kmerSize,[seqs[i]], purge_Ns = 1) # for some reason top_nmers fails silently if given str instead of list
inAllSeqs = []
count = 0
for seq in seqsKmers:
for kmer in seqsKmers[seq]:
if ifKmerInAll(kmer,seqs,scoreThresh):
if kmer not in inAllSeqs:
inAllSeqs.append(kmer)
count+=1
print count
outFile = open(outFile, 'w')
示例4: len
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
parser.add_option('-f', dest="make_fasta", action="store_true",default=False,
help="""Produce relavent fasta files too. (default=%default)""")
(opts, args) = parser.parse_args()
# --- A Little Extra Input Validation ---
if len(args) < 2:
parser.print_help()
print '\nERROR: Both geneListFile and fastaFile are required!'
exit(1)
geneNames = map(lambda l: l.strip(),open(args[0], 'rU').readlines())
totalSeqs = Fasta.file2dict(args[1])
randClusterLists = genRandClusters(geneNames,totalSeqs,N=opts.N, keepLen=1)
# -- Make Out Folder --
mkdirp(opts.out_dir)
for i in range(len(randClusterLists)):
oFileName = args[0].replace('.txt','randomGeneNames_%s.txt' % (i)).split('/')[-1]
oFile = open('%s/%s' % (opts.out_dir,oFileName), 'w')
for name in randClusterLists[i]:
oFile.write(name+'\n')
oFile.close()
# --- If Asked, Create Fastas ---
if opts.make_fasta:
fNames = map(lambda l: l.strip(),open('%s/%s' % (opts.out_dir,oFileName), 'rU').readlines())
示例5: findSeedsInSeq
# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.seq import Fasta
#from gusPyCode.defs.JamesDefs import revComp
fFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/Fastas/Aa_500afterCoding.usuable.stpCdn.fas'
sFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'
oFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/SeedCountOutPut/counts/miRBaseMatureSeedsOn_Aa_500afterCoding.ctrl.txt'
print 'WARNING!!\nThis script now takes the exact k-mer to be searched!!!\nGive it the "match" or the "control" specifically.\n(match is rvcmp\'d version of miRNA seed)\nIT WILL _NOT_ REVCOMP IT FOR YOU!!!!\n'
# --------- Fasta Prep ---------
fastas = Fasta.file2dict(fFile)
seqNames = fastas.keys()
seqNames.sort()
# seqs are softMasked. This unMaskes them.
for seq in fastas:
fastas[seq] = fastas[seq].upper()
# --------- Seed Prep ---------
seeds = Fasta.file2dict(sFile)
seedNames = seeds.keys()
seedNames.sort()
# to make sure we are only looking for uppercase strings
for seed in seeds:
seeds[seed] = seeds[seed].upper()
results = ['#seqName\t'+'\t'.join(seedNames)]
def findSeedsInSeq(seeds,seedNames,seqStr,seqName):
'''take dict of seeds, a seq, and its name. Return tsv string of
seqName followed by 0s and 1s corelating with presence