当前位置: 首页>>代码示例>>Python>>正文


Python Fasta.file2dict方法代码示例

本文整理汇总了Python中TAMO.seq.Fasta.file2dict方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.file2dict方法的具体用法?Python Fasta.file2dict怎么用?Python Fasta.file2dict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在TAMO.seq.Fasta的用法示例。


在下文中一共展示了Fasta.file2dict方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: spawnOrthoGroups

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
def spawnOrthoGroups(promoterFileList,nWayOrthoList):
    """Takes promoterFileList<listOfPaths> and nWayOrthoList<listOfLists> and spawns the orthoGroup
    objects in a dictionary with keys = 'geneName1:geneName2:etc' that will be used to run the combined
    hypergeometric analysis."""
    
    
    
    # validation
    assert type(promoterFileList) == type([]), \
           '''promoterFileList must be a list of file paths.
           You provided type: "%s"'''\
           % (type(promoterFileList))
    assert type(promoterFileList[0]) == type(''), \
           '''promoterFileList must be a list of file paths.
           promoterFileList[0] != type(''): "%s"'''\
           % (type(promoterFileList[0]))
    
    # load promoters
    allPromoters = {}
    for i in range(len(promoterFileList)):
        oneGenome = Fasta.file2dict(promoterFileList[i])
        for j in oneGenome:
            allKeys = allPromoters.keys()
            assert j not in allKeys, \
                   '''Detected duplicate gene name in promoterFileList! "%s"'''\
                   % (j)
            allPromoters[j] = oneGenome[j]
    
    # Build Groups
    orthoGroups = {}
    for i in range(len(nWayOrthoList)):
        groupDict = {}
        for j in range(len(nWayOrthoList[i])):
            if allPromoters[nWayOrthoList[i][j]]:
                groupDict[nWayOrthoList[i][j]] = allPromoters[nWayOrthoList[i][j]]
            else:
                break # we do not want orthoGroups that are missing members
        
        if len(groupDict) != len(nWayOrthoList[i]):
            break # we do not want orthoGroups that are missing members
        else:
            nWayOrthoList[i].sort()
            orthoGroups[':'.join(nWayOrthoList[i])] = OrthoGroup(groupDict)
            
    return orthoGroups
开发者ID:xguse,项目名称:gusPyProj,代码行数:47,代码来源:defs.py

示例2:

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.seq import Fasta
from gusPyCode.defs import bioDefs

miRNAFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.fa'
seedFile  = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'

oligoType = 'control' # 'match' or 'control'
assert oligoType == 'match' or 'control', 'oligoType MUST be only "match" or "control".'

# Load miRNA fastas into dict.
miRNAs = Fasta.file2dict(miRNAFile)

# Create new dict for seeds.
seeds = {}

# 1) Cycle through miRNA dict taking 7mers starting at pos 1 
#    and then pos2. Adapt key to reflect which. 
# 2) Convert to all uppers and convert U's to T's
# 3) If oligoType == 'match', rvcmp each 7mer and adapt key
#    to reflect which.
for miRNA in miRNAs:
    pos1_seed = miRNAs[miRNA][:7].upper().replace('U','T')
    pos2_seed = miRNAs[miRNA][1:8].upper().replace('U','T')


    if oligoType == 'match':
        seeds[miRNA+'_match_pos1'] = bioDefs.revComp(pos1_seed)
        seeds[miRNA+'_match_pos2'] = bioDefs.revComp(pos2_seed)
    else:
        seeds[miRNA+'_ctrl_pos1'] = pos1_seed
        seeds[miRNA+'_ctrl_pos2'] = pos2_seed
开发者ID:xguse,项目名称:gusPyProj,代码行数:33,代码来源:miRNAs2SeedFastas.py

示例3: ifKmerInAll

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.MotifTools import top_nmers,Motif
from TAMO import MotifTools
from TAMO.seq import Fasta
from gusPyCode.defs.bioDefs import ifKmerInAll

seqFile     = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.fas'
outFile     = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.8mersInAll.txt'
kmerSize    = 8
scoreThresh = 0.999999

seqs = Fasta.file2dict(seqFile)



# create new dict to store the seqs' kmers
seqsKmers = {}
for i in seqs:
    seqsKmers[i] = top_nmers(kmerSize,[seqs[i]], purge_Ns = 1)   # for some reason top_nmers fails silently if given str instead of list

inAllSeqs = []
count = 0
for seq in seqsKmers:
    for kmer in seqsKmers[seq]:
        if ifKmerInAll(kmer,seqs,scoreThresh):
            if kmer not in inAllSeqs:
                inAllSeqs.append(kmer)
                count+=1
                print count


outFile = open(outFile, 'w')
开发者ID:xguse,项目名称:gusPyProj,代码行数:33,代码来源:getKmersInAllSeqs.py

示例4: len

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
parser.add_option('-f', dest="make_fasta", action="store_true",default=False,
                  help="""Produce relavent fasta files too. (default=%default)""")


(opts, args) = parser.parse_args()

# --- A Little Extra Input Validation ---
if len(args) < 2:
    parser.print_help()
    print '\nERROR: Both geneListFile and fastaFile are required!'
    exit(1)



geneNames = map(lambda l: l.strip(),open(args[0], 'rU').readlines())
totalSeqs = Fasta.file2dict(args[1])
randClusterLists = genRandClusters(geneNames,totalSeqs,N=opts.N, keepLen=1)

# -- Make Out Folder --
mkdirp(opts.out_dir)
    

for i in range(len(randClusterLists)):
    oFileName = args[0].replace('.txt','randomGeneNames_%s.txt' % (i)).split('/')[-1]
    oFile = open('%s/%s' % (opts.out_dir,oFileName), 'w')
    for name in randClusterLists[i]:
        oFile.write(name+'\n')
    oFile.close()
    # --- If Asked, Create Fastas ---
    if opts.make_fasta:
        fNames  = map(lambda l: l.strip(),open('%s/%s' % (opts.out_dir,oFileName), 'rU').readlines())
开发者ID:xguse,项目名称:gusPyProj,代码行数:33,代码来源:writeRandClustFiles.py

示例5: findSeedsInSeq

# 需要导入模块: from TAMO.seq import Fasta [as 别名]
# 或者: from TAMO.seq.Fasta import file2dict [as 别名]
from TAMO.seq import Fasta
#from gusPyCode.defs.JamesDefs import revComp

fFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/Fastas/Aa_500afterCoding.usuable.stpCdn.fas'
sFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'
oFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/SeedCountOutPut/counts/miRBaseMatureSeedsOn_Aa_500afterCoding.ctrl.txt'

print 'WARNING!!\nThis script now takes the exact k-mer to be searched!!!\nGive it the "match" or the "control" specifically.\n(match is rvcmp\'d version of miRNA seed)\nIT WILL _NOT_ REVCOMP IT FOR YOU!!!!\n'

# --------- Fasta Prep ---------
fastas    = Fasta.file2dict(fFile)
seqNames  = fastas.keys()
seqNames.sort()
# seqs are softMasked.  This unMaskes them.
for seq in fastas:
    fastas[seq] = fastas[seq].upper()

# --------- Seed Prep ---------
seeds     = Fasta.file2dict(sFile)
seedNames = seeds.keys()
seedNames.sort()
# to make sure we are only looking for uppercase strings
for seed in seeds:
    seeds[seed] = seeds[seed].upper()


results = ['#seqName\t'+'\t'.join(seedNames)]

def findSeedsInSeq(seeds,seedNames,seqStr,seqName):
    '''take dict of seeds, a seq, and its name. Return tsv string of 
    seqName followed by 0s and 1s corelating with presence
开发者ID:xguse,项目名称:gusPyProj,代码行数:33,代码来源:tabulate_miRNAseeds.py


注:本文中的TAMO.seq.Fasta.file2dict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。