當前位置: 首頁>>代碼示例>>Python>>正文


Python Fasta.file2dict方法代碼示例

本文整理匯總了Python中TAMO.seq.Fasta.file2dict方法的典型用法代碼示例。如果您正苦於以下問題:Python Fasta.file2dict方法的具體用法?Python Fasta.file2dict怎麽用?Python Fasta.file2dict使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在TAMO.seq.Fasta的用法示例。


在下文中一共展示了Fasta.file2dict方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: spawnOrthoGroups

# 需要導入模塊: from TAMO.seq import Fasta [as 別名]
# 或者: from TAMO.seq.Fasta import file2dict [as 別名]
def spawnOrthoGroups(promoterFileList,nWayOrthoList):
    """Takes promoterFileList<listOfPaths> and nWayOrthoList<listOfLists> and spawns the orthoGroup
    objects in a dictionary with keys = 'geneName1:geneName2:etc' that will be used to run the combined
    hypergeometric analysis."""
    
    
    
    # validation
    assert type(promoterFileList) == type([]), \
           '''promoterFileList must be a list of file paths.
           You provided type: "%s"'''\
           % (type(promoterFileList))
    assert type(promoterFileList[0]) == type(''), \
           '''promoterFileList must be a list of file paths.
           promoterFileList[0] != type(''): "%s"'''\
           % (type(promoterFileList[0]))
    
    # load promoters
    allPromoters = {}
    for i in range(len(promoterFileList)):
        oneGenome = Fasta.file2dict(promoterFileList[i])
        for j in oneGenome:
            allKeys = allPromoters.keys()
            assert j not in allKeys, \
                   '''Detected duplicate gene name in promoterFileList! "%s"'''\
                   % (j)
            allPromoters[j] = oneGenome[j]
    
    # Build Groups
    orthoGroups = {}
    for i in range(len(nWayOrthoList)):
        groupDict = {}
        for j in range(len(nWayOrthoList[i])):
            if allPromoters[nWayOrthoList[i][j]]:
                groupDict[nWayOrthoList[i][j]] = allPromoters[nWayOrthoList[i][j]]
            else:
                break # we do not want orthoGroups that are missing members
        
        if len(groupDict) != len(nWayOrthoList[i]):
            break # we do not want orthoGroups that are missing members
        else:
            nWayOrthoList[i].sort()
            orthoGroups[':'.join(nWayOrthoList[i])] = OrthoGroup(groupDict)
            
    return orthoGroups
開發者ID:xguse,項目名稱:gusPyProj,代碼行數:47,代碼來源:defs.py

示例2:

# 需要導入模塊: from TAMO.seq import Fasta [as 別名]
# 或者: from TAMO.seq.Fasta import file2dict [as 別名]
from TAMO.seq import Fasta
from gusPyCode.defs import bioDefs

miRNAFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.fa'
seedFile  = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'

oligoType = 'control' # 'match' or 'control'
assert oligoType == 'match' or 'control', 'oligoType MUST be only "match" or "control".'

# Load miRNA fastas into dict.
miRNAs = Fasta.file2dict(miRNAFile)

# Create new dict for seeds.
seeds = {}

# 1) Cycle through miRNA dict taking 7mers starting at pos 1 
#    and then pos2. Adapt key to reflect which. 
# 2) Convert to all uppers and convert U's to T's
# 3) If oligoType == 'match', rvcmp each 7mer and adapt key
#    to reflect which.
for miRNA in miRNAs:
    pos1_seed = miRNAs[miRNA][:7].upper().replace('U','T')
    pos2_seed = miRNAs[miRNA][1:8].upper().replace('U','T')


    if oligoType == 'match':
        seeds[miRNA+'_match_pos1'] = bioDefs.revComp(pos1_seed)
        seeds[miRNA+'_match_pos2'] = bioDefs.revComp(pos2_seed)
    else:
        seeds[miRNA+'_ctrl_pos1'] = pos1_seed
        seeds[miRNA+'_ctrl_pos2'] = pos2_seed
開發者ID:xguse,項目名稱:gusPyProj,代碼行數:33,代碼來源:miRNAs2SeedFastas.py

示例3: ifKmerInAll

# 需要導入模塊: from TAMO.seq import Fasta [as 別名]
# 或者: from TAMO.seq.Fasta import file2dict [as 別名]
from TAMO.MotifTools import top_nmers,Motif
from TAMO import MotifTools
from TAMO.seq import Fasta
from gusPyCode.defs.bioDefs import ifKmerInAll

seqFile     = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.fas'
outFile     = '/Users/biggus/Documents/James/Collaborations/Campbell/data/mainTwoGenes.8mersInAll.txt'
kmerSize    = 8
scoreThresh = 0.999999

seqs = Fasta.file2dict(seqFile)



# create new dict to store the seqs' kmers
seqsKmers = {}
for i in seqs:
    seqsKmers[i] = top_nmers(kmerSize,[seqs[i]], purge_Ns = 1)   # for some reason top_nmers fails silently if given str instead of list

inAllSeqs = []
count = 0
for seq in seqsKmers:
    for kmer in seqsKmers[seq]:
        if ifKmerInAll(kmer,seqs,scoreThresh):
            if kmer not in inAllSeqs:
                inAllSeqs.append(kmer)
                count+=1
                print count


outFile = open(outFile, 'w')
開發者ID:xguse,項目名稱:gusPyProj,代碼行數:33,代碼來源:getKmersInAllSeqs.py

示例4: len

# 需要導入模塊: from TAMO.seq import Fasta [as 別名]
# 或者: from TAMO.seq.Fasta import file2dict [as 別名]
parser.add_option('-f', dest="make_fasta", action="store_true",default=False,
                  help="""Produce relavent fasta files too. (default=%default)""")


(opts, args) = parser.parse_args()

# --- A Little Extra Input Validation ---
if len(args) < 2:
    parser.print_help()
    print '\nERROR: Both geneListFile and fastaFile are required!'
    exit(1)



geneNames = map(lambda l: l.strip(),open(args[0], 'rU').readlines())
totalSeqs = Fasta.file2dict(args[1])
randClusterLists = genRandClusters(geneNames,totalSeqs,N=opts.N, keepLen=1)

# -- Make Out Folder --
mkdirp(opts.out_dir)
    

for i in range(len(randClusterLists)):
    oFileName = args[0].replace('.txt','randomGeneNames_%s.txt' % (i)).split('/')[-1]
    oFile = open('%s/%s' % (opts.out_dir,oFileName), 'w')
    for name in randClusterLists[i]:
        oFile.write(name+'\n')
    oFile.close()
    # --- If Asked, Create Fastas ---
    if opts.make_fasta:
        fNames  = map(lambda l: l.strip(),open('%s/%s' % (opts.out_dir,oFileName), 'rU').readlines())
開發者ID:xguse,項目名稱:gusPyProj,代碼行數:33,代碼來源:writeRandClustFiles.py

示例5: findSeedsInSeq

# 需要導入模塊: from TAMO.seq import Fasta [as 別名]
# 或者: from TAMO.seq.Fasta import file2dict [as 別名]
from TAMO.seq import Fasta
#from gusPyCode.defs.JamesDefs import revComp

fFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/Fastas/Aa_500afterCoding.usuable.stpCdn.fas'
sFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/miRNAs/miRBase/mature.aga.seeds.ctrl.fa'
oFile = '/Users/biggus/Documents/James/Data/Tu_miRNA/SeedCountOutPut/counts/miRBaseMatureSeedsOn_Aa_500afterCoding.ctrl.txt'

print 'WARNING!!\nThis script now takes the exact k-mer to be searched!!!\nGive it the "match" or the "control" specifically.\n(match is rvcmp\'d version of miRNA seed)\nIT WILL _NOT_ REVCOMP IT FOR YOU!!!!\n'

# --------- Fasta Prep ---------
fastas    = Fasta.file2dict(fFile)
seqNames  = fastas.keys()
seqNames.sort()
# seqs are softMasked.  This unMaskes them.
for seq in fastas:
    fastas[seq] = fastas[seq].upper()

# --------- Seed Prep ---------
seeds     = Fasta.file2dict(sFile)
seedNames = seeds.keys()
seedNames.sort()
# to make sure we are only looking for uppercase strings
for seed in seeds:
    seeds[seed] = seeds[seed].upper()


results = ['#seqName\t'+'\t'.join(seedNames)]

def findSeedsInSeq(seeds,seedNames,seqStr,seqName):
    '''take dict of seeds, a seq, and its name. Return tsv string of 
    seqName followed by 0s and 1s corelating with presence
開發者ID:xguse,項目名稱:gusPyProj,代碼行數:33,代碼來源:tabulate_miRNAseeds.py


注:本文中的TAMO.seq.Fasta.file2dict方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。