当前位置: 首页>>代码示例>>Python>>正文


Python utils.parseTable函数代码示例

本文整理汇总了Python中utils.parseTable函数的典型用法代码示例。如果您正苦于以下问题:Python parseTable函数的具体用法?Python parseTable怎么用?Python parseTable使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了parseTable函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: makeFoldTable

def makeFoldTable(annotFile,analysisName,testName,controlName,testMMR,controlMMR,testIdxFile,controlIdxFile,outputFolder,epsilon = 1):

    '''
    makes the fold table and writes to disk
    fold table is ranked by fold change
    first column is guideID, second column is gene name, third is fold change
    '''

    guideDict,geneDict = makeAnnotDict(annotFile)

    testIdx = utils.parseTable(testIdxFile,'\t')
    controlIdx = utils.parseTable(controlIdxFile,'\t')

    #for each guide, divide the count by the MMR then add 1 then take the log2 ratio

    outTable = [['GUIDE_ID','GENE','LOG2_RATIO',testName,controlName]]
    for i in range(len(testIdx)):

        guideID = testIdx[i][0]
        gene = guideDict[guideID]
        
        testCount = float(testIdx[i][2])/testMMR + epsilon
        controlCount = float(controlIdx[i][2])/controlMMR + epsilon

        log2Ratio = numpy.log2(testCount/controlCount)

        newLine = [guideID,gene,log2Ratio,round(testCount,4),round(controlCount,4)]

        outTable.append(newLine)

    outputFile = '%s%s_log2Ratio.txt' % (outputFolder,analysisName)
    utils.unParseTable(outTable,outputFile,'\t')
    return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:33,代码来源:processGeckoBam.py

示例2: makeSEDict

def makeSEDict(enhancerFile, name, superOnly=True):

    """
    makes an attribute dict for enhancers keyed by uniqueID
    """

    seDict = {}
    enhancerTable = utils.parseTable(enhancerFile, "\t")

    superLoci = []
    for line in enhancerTable:
        if line[0][0] == "#":
            continue
        if line[0][0] == "R":
            header = line
            supColumn = header.index("isSuper")
            continue
        if superOnly:
            if int(line[supColumn]) == 1:

                signal = float(line[6]) - float(line[7])
                rank = int(line[-2])
                enhancerID = name + "_" + line[0]
                seDict[enhancerID] = {"rank": rank, "signal": signal}

        else:

            signal = float(line[6]) - float(line[7])
            rank = int(line[-2])
            enhancerID = name + "_" + line[0]
            seDict[enhancerID] = {"rank": rank, "signal": signal}

    return seDict
开发者ID:jdimatteo,项目名称:pipeline,代码行数:33,代码来源:dynamicEnhancer.py

示例3: makeEnhancerSignalTable

def makeEnhancerSignalTable(mergedRegionMap,medianDict,analysisName,genome,outputFolder):

    '''
    makes a table where each row is an enhancer and each column is the log2 
    background corrected signal vs. median
    '''

    #load in the region map
    regionMap = utils.parseTable(mergedRegionMap,'\t')
    namesList = medianDict.keys()
    signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]
    for line in regionMap[1:]:

        newLine = line[0:6]
        for i in range(len(namesList)):
            enhancerIndex = (i*2) + 6
            controlIndex = (i*2) + 7
            enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
            if enhancerSignal < 0:
                enhancerSignal = 0
            enhancerSignal = enhancerSignal/medianDict[namesList[i]]
            newLine.append(enhancerSignal)

        signalTable.append(newLine)

    outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
    print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
    utils.unParseTable(signalTable,outputFile,'\t')
    return outputFile
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:29,代码来源:clusterEnhancer.py

示例4: callMergeSupers

def callMergeSupers(dataFile,superFile1,superFile2,name1,name2,mergeName,genome,parentFolder):

    '''
    this is the main run function for the script
    all of the work should occur here, but no functions should be defined here
    '''
    mergedGFFFile = '%s%s_%s_MERGED_REGIONS_-0_+0.gff' % (parentFolder,string.upper(genome),mergeName)    

    #check to make sure this hasn't been done yet
    roseOutput = "%s%s_ROSE/%s_%s_MERGED_REGIONS_-0_+0_SuperEnhancers_ENHANCER_TO_GENE.txt" % (parentFolder,name1,string.upper(genome),mergeName)

    try:
        foo = utils.parseTable(roseOutput,'\t')
        print "ROSE OUTPUT ALREADY FOUND HERE %s" % (roseOutput)
        return roseOutput
    except IOError:
        
        print "MERGING ENHANCER REGIONS FROM %s and %s" % (superFile1,superFile2)
        mergedGFF = mergeCollections(superFile1,superFile2,name1,name2,mergedGFFFile)

        #call rose on the merged shit    


        roseBashFile = callRoseMerged(dataFile,mergedGFF,name1,name2,parentFolder)
        print('i can has rose bash file %s' % (roseBashFile))

        #run the bash command
        os.system('bash %s' % (roseBashFile))

        #check for and return output
        if utils.checkOutput(roseOutput,1,30):
            return roseOutput
        else:
            print "ERROR: ROSE CALL ON MERGED REGIONS FAILED"
            sys.exit()
开发者ID:afederation,项目名称:pipeline,代码行数:35,代码来源:dynamicEnhancer.py

示例5: makeBedCollection

def makeBedCollection(bedFileList):
    '''
    takes in a list of bedFiles and makes a single huge collection
    each locus has as its ID the name of the bed file
    '''

    bedLoci = []
    print("MAKING BED COLLECTION FOR:")
    for bedFile in bedFileList:

        bedName = bedFile.split('/')[-1].split('.')[0]
        print(bedName)
        bed = utils.parseTable(bedFile, '\t')
        for line in bed:
            if len(line) >= 3:
                #check that line[0]
                if line[0][0:3] == 'chr':
                    try:
                        coords = [int(line[1]),int(line[2])]
                        bedLocus = utils.Locus(line[0], min(coords), max(coords), '.', bedName)
                        bedLoci.append(bedLocus)

                    except ValueError:
                        pass

        print("IDENTIFIED %s BED REGIONS" % (len(bedLoci)))

    return utils.LocusCollection(bedLoci, 50)
开发者ID:linlabcode,项目名称:pipeline,代码行数:28,代码来源:bamPlot_turbo.py

示例6: makeSEDict

def makeSEDict(enhancerFile,name,superOnly = True):

    '''
    makes an attribute dict for enhancers keyed by uniqueID
    '''

    seDict = {}
    enhancerTable = utils.parseTable(enhancerFile,'\t')

    superLoci = []
    for line in enhancerTable:
        if line[0][0] == '#':
            continue
        if line[0][0] == 'R':
            header = line
            supColumn = header.index('isSuper')
            continue
        if superOnly:
            if int(line[supColumn]) == 1:
                
                signal = float(line[6]) - float(line[7])
                rank = int(line[-2])
                enhancerID = name+'_'+line[0]
                seDict[enhancerID] = {'rank':rank,'signal':signal}

        else:

            signal = float(line[6]) - float(line[7])
            rank = int(line[-2])
            enhancerID = name+'_'+line[0]
            seDict[enhancerID] = {'rank':rank,'signal':signal}

    return seDict
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:33,代码来源:dynamicEnhancer.py

示例7: loadGenome

def loadGenome(genome_build,config_file = ''):

    '''
    loads annotation for a genome into a genome object
    '''

    #this nested dictionary has all of the useful information and likely will have to be
    #edited so it can be configured any time
    genome_build = string.upper(genome_build)

        
    genomeDict = {
        'HG19':{'annot_file':'%sannotation/hg19_refseq.ucsc' % (pipeline_dir),
                'genome_directory':'/storage/cylin/grail/genomes/Homo_sapiens/UCSC/hg19/Sequence/Chromosomes/',
                'tf_file':'%s/annotation/TFlist_NMid_hg19.txt' % (whereAmI),     
                'mask_file':'/storage/cylin/grail/genomes/Homo_sapiens/UCSC/hg19/Annotation/Masks/hg19_encode_blacklist.bed',
                'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
                'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
                },
        'RN6':{'annot_file':'%sannotation/rn6_refseq.ucsc' % (pipeline_dir),
                'genome_directory':'/storage/cylin/grail/genomes/Rattus_norvegicus/UCSC/rn6/Sequence/Chromosomes/',
                'tf_file':'%s/annotation/TFlist_NMid_rn6.txt' % (whereAmI),      
                'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
                'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
                },
        'MM10':{'annot_file':'%sannotation/mm10_refseq.ucsc' % (pipeline_dir),
                'genome_directory':'/storage/cylin/grail/genomes/Mus_musculus/UCSC/mm10/Sequence/Chromosomes/',
                'tf_file':'%s/annotation/TFlist_NMid_mm10.txt' % (whereAmI),
                'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
                'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
                }

        }

    #allow an optional config file to overwrite default paths
    if len(config_file) >0:
        config_table = utils.parseTable(config_file,'\t')
        for line in config_table[1:]:
            (build,field,feature_path) = line[0].split(':')
            genomeDict[string.upper(build)][string.lower(field)] = feature_path
    
    if genome_build not in genomeDict:
        print('ERROR: UNSUPPORTED GENOME BUILD %s. EXITING NOW' % (genome_build))
        sys.exit()
    else:
        print('USING BUILD %s WITH FOLLOWING FIELDS:' % (genome_build))
        print(genomeDict[genome_build])

    #now attempt to load the genome
    genome = Genome(genome_build,genomeDict[genome_build]['genome_directory'],genomeDict[genome_build]['annot_file'])

    #adding additional optional features
    genome.addFeature('tf_file',genomeDict[genome_build]['tf_file'])
    if genome_build == 'HG19':
        genome.addFeature('mask',genomeDict[genome_build]['mask_file'])
    genome.addFeature('motif_convert',genomeDict[genome_build]['motif_convert'])
    genome.addFeature('motif_database',genomeDict[genome_build]['motif_database'])

    return genome
开发者ID:linlabcode,项目名称:pipeline,代码行数:59,代码来源:CRC3.py

示例8: makeEnhancerSignalTable

def makeEnhancerSignalTable(nameDict,mergedRegionMap,medianDict,analysisName,genome,outputFolder):

    '''
    makes a table where each row is an enhancer and each column is the log2 
    background corrected signal vs. median
    '''

    #load in the region map
    regionMap = utils.parseTable(mergedRegionMap,'\t')
    namesList = nameDict.keys()
    namesList.sort()
    signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]

    print("len of %s for namesList" % (len(namesList)))
    print(namesList)
    for line in regionMap[1:]:

        newLine = line[0:6]
        
        
        #a little tricky here to add datasets sequentially
        i = 6 #start w/ the first column w/ data
        for name in namesList:
            
            if nameDict[name]['background'] == True:
                enhancerIndex = int(i)
                i +=1
                controlIndex = int(i)
                i +=1
                try:
                    enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
                except IndexError:
                    print line
                    print len(line)
                    print enhancerIndex
                    print controlIndex
                    sys.exit()
                
            else:
                enhancerIndex = int(i)
                i+=1
                enhancerSignal = float(line[enhancerIndex])

            if enhancerSignal < 0:
                enhancerSignal = 0
            enhancerSignal = enhancerSignal/medianDict[name]
            newLine.append(enhancerSignal)
                
            


        signalTable.append(newLine)

    outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
    print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
    utils.unParseTable(signalTable,outputFile,'\t')
    return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:57,代码来源:clusterEnhancer.py

示例9: findMotifs

def findMotifs(subpeakFasta,bg_path,candidate_tf_list, projectFolder, analysis_name, motifConvertFile, motifDatabaseFile):
    '''
    takes the refseq to subpeak seq dict
    returns the networkx object with all connections
    '''
    fimoFolder = utils.formatFolder(projectFolder + 'FIMO/', True)
    subpeak_name = subpeakFasta.split('/')[-1].split('.')[0]
    output = '%s%s_fimo.txt'  % (fimoFolder,subpeak_name)
    # Create a dictionary to call motif names keyed on gene names
    motifDatabase = utils.parseTable(motifConvertFile, '\t')
    motifDatabaseDict = {} #create a dict keyed by TF with multiple motifs

    for line in motifDatabase:
        motifDatabaseDict[line[1]] = []
    for line in motifDatabase:
        motifDatabaseDict[line[1]].append(line[0])

    candidate_tf_list.sort()
    
    print(candidate_tf_list)

    #now make a list of all motifs
    motif_list = []
    for tf in candidate_tf_list:
        motif_list += motifDatabaseDict[tf]

    motif_list = utils.uniquify(motif_list)

    fimo_bash_path = '%s%s_fimo.sh' % (fimoFolder,analysis_name)
    fimo_bash = open(fimo_bash_path,'w')
    fimo_bash.write('#!/usr/bin/bash\n\n')

    fimoCmd = 'fimo'
    for motif in motif_list:
        fimoCmd += ' --motif ' + "'%s'" % (str(motif))


    #fimoCmd += ' --thresh 1e-5' #if you want to increase stringency
    fimoCmd += ' -verbosity 1'  # thanks for that ;)!
    fimoCmd += ' -text'
    fimoCmd += ' -oc ' + projectFolder + 'FIMO'
    fimoCmd += ' --bgfile %s' % (bg_path)
    fimoCmd += ' ' + motifDatabaseFile + ' '
    fimoCmd += subpeakFasta
    fimoCmd += ' > '+ output
    print fimoCmd
    fimo_bash.write(fimoCmd)
    fimo_bash.close()

    fimoOutput = subprocess.call(fimoCmd, shell=True)  #will wait that fimo is done to go on

    return output
开发者ID:linlabcode,项目名称:pipeline,代码行数:52,代码来源:CRC3.py

示例10: assignEnhancerRank

def assignEnhancerRank(enhancerToGeneFile,enhancerFile1,enhancerFile2,name1,name2,rankOutput=''):

    '''
    for all genes in the enhancerToGene Table, assigns the highest overlapping ranked enhancer in the other tables
    '''

    enhancerToGene = utils.parseTable(enhancerToGeneFile,'\t')

    enhancerCollection1 = makeSECollection(enhancerFile1,name1,False)
    enhancerCollection2 = makeSECollection(enhancerFile2,name2,False)

    enhancerDict1 = makeSEDict(enhancerFile1,name1,False)
    enhancerDict2 = makeSEDict(enhancerFile2,name2,False)

    
    #we're going to update the enhancerToGeneTable

    enhancerToGene[0] += ['%s_rank' % name1,'%s_rank' % name2]
    
    for i in range(1,len(enhancerToGene)):

        line = enhancerToGene[i]
        
        locusLine = utils.Locus(line[1],line[2],line[3],'.',line[0])
        
        #if the enhancer doesn't exist, its ranking is dead last on the enhancer list

        enhancer1Overlap = enhancerCollection1.getOverlap(locusLine,'both')
        if len(enhancer1Overlap) == 0:
            enhancer1Rank = len(enhancerCollection1)
        else:
            
            rankList1 = [enhancerDict1[x.ID()]['rank'] for x in enhancer1Overlap]
            enhancer1Rank = min(rankList1)


        enhancer2Overlap = enhancerCollection2.getOverlap(locusLine,'both')
        if len(enhancer2Overlap) == 0:
            enhancer2Rank = len(enhancerCollection2)
        else:
            
            rankList2 = [enhancerDict2[x.ID()]['rank'] for x in enhancer2Overlap]
            enhancer2Rank = min(rankList2)
        enhancerToGene[i]+=[enhancer1Rank,enhancer2Rank]


    if len(rankOutput) == 0:
        return enhancerToGene
    else:
        utils.unParseTable(enhancerToGene,rankOutput,'\t')
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:50,代码来源:dynamicEnhancer.py

示例11: makeSignalDict

def makeSignalDict(mappedGFFFile, controlMappedGFFFile=''):
    '''
    makes a signal dict
    '''
    print('\t called makeSignalDict on %s (ctrl: %s)' % (mappedGFFFile, controlMappedGFFFile))
    signalDict = defaultdict(float)

    mappedGFF = utils.parseTable(mappedGFFFile, '\t')
    if len(controlMappedGFFFile) > 0:
        controlGFF = utils.parseTable(controlMappedGFFFile, '\t')

        for i in range(1, len(mappedGFF)):

            signal = float(mappedGFF[i][2]) - float(controlGFF[i][2])
            if signal < 0:
                signal = 0.0
            signalDict[mappedGFF[i][0]] = signal
    else:
        for i in range(1, len(mappedGFF)):
            signal = float(mappedGFF[i][2])
            signalDict[mappedGFF[i][0]] = signal

    return signalDict
开发者ID:afederation,项目名称:pipeline,代码行数:23,代码来源:ROSE2_geneMapper.py

示例12: getMedianSignalEnhancer

def getMedianSignalEnhancer(enhancerFile,name,dataFile):

    '''
    returns the median enhancer signal of a file
    '''
    dataDict = pipeline_dfci.loadDataTable(dataFile)
    enhancerTable = utils.parseTable(enhancerFile,'\t')

    enhancerVector = [float(line[6]) for line in enhancerTable[6:]]
            

    median= numpy.median(enhancerVector)

    return median
开发者ID:linlabcode,项目名称:pipeline,代码行数:14,代码来源:dynamicEnhancer_meta.py

示例13: getSignalVector

def getSignalVector(regionFile,name,dataFile):

    '''
    returns the median enhancer signal of a file
    '''
    dataDict = pipeline_dfci.loadDataTable(dataFile)
    regionTable = utils.parseTable(regionFile,'\t')

    bamPath = dataDict[name]['bam']
    bamName = bamPath.split('/')[-1]

    colID = regionTable[0].index(bamName)
    signalVector = [float(line[colID]) for line in regionTable[1:]]
            
    return signalVector
开发者ID:linlabcode,项目名称:pipeline,代码行数:15,代码来源:dynamicEnhancer_meta.py

示例14: makeAnnotDict

def makeAnnotDict(annotFile):

    '''
    makes a dictionary keyed by guideID
    '''

    guideDict = defaultdict(str)
    geneDict = defaultdict(list)

    geckoAnnot = utils.parseTable(annotFile,'\t')
    
    for line in geckoAnnot[1:]:
        guideDict[line[1]] = line[0]
        geneDict[line[0]].append(line[1])

    return guideDict,geneDict
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:16,代码来源:processGeckoBam.py

示例15: callMergeSupers

def callMergeSupers(dataFile, superFile1, superFile2, name1, name2, mergeName, genome, parentFolder):

    """
    this is the main run function for the script
    all of the work should occur here, but no functions should be defined here
    """
    mergedGFFFile = "%s%s_%s_MERGED_REGIONS_-0_+0.gff" % (parentFolder, string.upper(genome), mergeName)

    # check to make sure this hasn't been done yet
    roseOutput = "%s%s_ROSE/%s_%s_MERGED_REGIONS_-0_+0_SuperEnhancers_ENHANCER_TO_GENE.txt" % (
        parentFolder,
        name1,
        string.upper(genome),
        mergeName,
    )

    try:
        foo = utils.parseTable(roseOutput, "\t")
        print "ROSE OUTPUT ALREADY FOUND HERE %s" % (roseOutput)
        return roseOutput
    except IOError:

        print "MERGING ENHANCER REGIONS FROM %s and %s" % (superFile1, superFile2)
        mergedGFF = mergeCollections(superFile1, superFile2, name1, name2, mergedGFFFile)

        # call rose on the merged shit

        roseBashFile = callRoseMerged(dataFile, mergedGFF, name1, name2, parentFolder)
        print ("i can has rose bash file %s" % (roseBashFile))

        # run the bash command
        os.system("bash %s" % (roseBashFile))

        # check for and return output
        if utils.checkOutput(roseOutput, 1, 10):
            return roseOutput
        else:
            # try finding it w/ a different name
            # this will bug out if nothing is there
            roseFolder = "%s%s_ROSE/" % (parentFolder, name1)
            roseFileList = [x for x in os.listdir(roseFolder) if x[0] != "."]  # no hidden files
            if len(roseFileList) == 0:
                print "No files found in %s" % (roseFolder)
                sys.exit()

            enhancerToGeneFile = getFile("_SuperEnhancers_ENHANCER_TO_GENE.txt", roseFileList, roseFolder)
开发者ID:lg72cu,项目名称:pipeline,代码行数:46,代码来源:dynamicEnhancer.py


注:本文中的utils.parseTable函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。