当前位置: 首页>>代码示例>>Python>>正文


Python utils.unParseTable函数代码示例

本文整理汇总了Python中utils.unParseTable函数的典型用法代码示例。如果您正苦于以下问题:Python unParseTable函数的具体用法?Python unParseTable怎么用?Python unParseTable使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了unParseTable函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: makeFoldTable

def makeFoldTable(annotFile,analysisName,testName,controlName,testMMR,controlMMR,testIdxFile,controlIdxFile,outputFolder,epsilon = 1):

    '''
    makes the fold table and writes to disk
    fold table is ranked by fold change
    first column is guideID, second column is gene name, third is fold change
    '''

    guideDict,geneDict = makeAnnotDict(annotFile)

    testIdx = utils.parseTable(testIdxFile,'\t')
    controlIdx = utils.parseTable(controlIdxFile,'\t')

    #for each guide, divide the count by the MMR then add 1 then take the log2 ratio

    outTable = [['GUIDE_ID','GENE','LOG2_RATIO',testName,controlName]]
    for i in range(len(testIdx)):

        guideID = testIdx[i][0]
        gene = guideDict[guideID]
        
        testCount = float(testIdx[i][2])/testMMR + epsilon
        controlCount = float(controlIdx[i][2])/controlMMR + epsilon

        log2Ratio = numpy.log2(testCount/controlCount)

        newLine = [guideID,gene,log2Ratio,round(testCount,4),round(controlCount,4)]

        outTable.append(newLine)

    outputFile = '%s%s_log2Ratio.txt' % (outputFolder,analysisName)
    utils.unParseTable(outTable,outputFile,'\t')
    return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:33,代码来源:processGeckoBam.py

示例2: makeEnhancerSignalTable

def makeEnhancerSignalTable(mergedRegionMap,medianDict,analysisName,genome,outputFolder):

    '''
    makes a table where each row is an enhancer and each column is the log2 
    background corrected signal vs. median
    '''

    #load in the region map
    regionMap = utils.parseTable(mergedRegionMap,'\t')
    namesList = medianDict.keys()
    signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]
    for line in regionMap[1:]:

        newLine = line[0:6]
        for i in range(len(namesList)):
            enhancerIndex = (i*2) + 6
            controlIndex = (i*2) + 7
            enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
            if enhancerSignal < 0:
                enhancerSignal = 0
            enhancerSignal = enhancerSignal/medianDict[namesList[i]]
            newLine.append(enhancerSignal)

        signalTable.append(newLine)

    outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
    print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
    utils.unParseTable(signalTable,outputFile,'\t')
    return outputFile
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:29,代码来源:clusterEnhancer.py

示例3: filterSubpeaks

def filterSubpeaks(subpeakFile,gene_to_enhancer_dict, analysis_name,output_folder):
    '''
    takes the initial subpeaks in, stitches them, 
    '''


    # stitch the subpeaks
    print(subpeakFile)
    subpeakCollection = utils.importBoundRegion(subpeakFile,'%s_subpeak' % (analysis_name))
    
    subpeakCollection = subpeakCollection.stitchCollection()
    
    subpeakLoci = subpeakCollection.getLoci()


    all_sub_bed = []
    for locus in subpeakLoci:
        bed_line = [locus.chr(),locus.start(),locus.end(),'.',locus.ID()]
        all_sub_bed.append(bed_line)


    all_bed_path = output_folder + analysis_name + '_all_subpeak.bed'
    utils.unParseTable(all_sub_bed, all_bed_path, '\t')

    return all_bed_path
开发者ID:linlabcode,项目名称:pipeline,代码行数:25,代码来源:CRC3.py

示例4: makeEnhancerSignalTable

def makeEnhancerSignalTable(nameDict,mergedRegionMap,medianDict,analysisName,genome,outputFolder):

    '''
    makes a table where each row is an enhancer and each column is the log2 
    background corrected signal vs. median
    '''

    #load in the region map
    regionMap = utils.parseTable(mergedRegionMap,'\t')
    namesList = nameDict.keys()
    namesList.sort()
    signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]

    print("len of %s for namesList" % (len(namesList)))
    print(namesList)
    for line in regionMap[1:]:

        newLine = line[0:6]
        
        
        #a little tricky here to add datasets sequentially
        i = 6 #start w/ the first column w/ data
        for name in namesList:
            
            if nameDict[name]['background'] == True:
                enhancerIndex = int(i)
                i +=1
                controlIndex = int(i)
                i +=1
                try:
                    enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
                except IndexError:
                    print line
                    print len(line)
                    print enhancerIndex
                    print controlIndex
                    sys.exit()
                
            else:
                enhancerIndex = int(i)
                i+=1
                enhancerSignal = float(line[enhancerIndex])

            if enhancerSignal < 0:
                enhancerSignal = 0
            enhancerSignal = enhancerSignal/medianDict[name]
            newLine.append(enhancerSignal)
                
            


        signalTable.append(newLine)

    outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
    print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
    utils.unParseTable(signalTable,outputFile,'\t')
    return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:57,代码来源:clusterEnhancer.py

示例5: assignEnhancerRank

def assignEnhancerRank(enhancerToGeneFile,enhancerFile1,enhancerFile2,name1,name2,rankOutput=''):

    '''
    for all genes in the enhancerToGene Table, assigns the highest overlapping ranked enhancer in the other tables
    '''

    enhancerToGene = utils.parseTable(enhancerToGeneFile,'\t')

    enhancerCollection1 = makeSECollection(enhancerFile1,name1,False)
    enhancerCollection2 = makeSECollection(enhancerFile2,name2,False)

    enhancerDict1 = makeSEDict(enhancerFile1,name1,False)
    enhancerDict2 = makeSEDict(enhancerFile2,name2,False)

    
    #we're going to update the enhancerToGeneTable

    enhancerToGene[0] += ['%s_rank' % name1,'%s_rank' % name2]
    
    for i in range(1,len(enhancerToGene)):

        line = enhancerToGene[i]
        
        locusLine = utils.Locus(line[1],line[2],line[3],'.',line[0])
        
        #if the enhancer doesn't exist, its ranking is dead last on the enhancer list

        enhancer1Overlap = enhancerCollection1.getOverlap(locusLine,'both')
        if len(enhancer1Overlap) == 0:
            enhancer1Rank = len(enhancerCollection1)
        else:
            
            rankList1 = [enhancerDict1[x.ID()]['rank'] for x in enhancer1Overlap]
            enhancer1Rank = min(rankList1)


        enhancer2Overlap = enhancerCollection2.getOverlap(locusLine,'both')
        if len(enhancer2Overlap) == 0:
            enhancer2Rank = len(enhancerCollection2)
        else:
            
            rankList2 = [enhancerDict2[x.ID()]['rank'] for x in enhancer2Overlap]
            enhancer2Rank = min(rankList2)
        enhancerToGene[i]+=[enhancer1Rank,enhancer2Rank]


    if len(rankOutput) == 0:
        return enhancerToGene
    else:
        utils.unParseTable(enhancerToGene,rankOutput,'\t')
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:50,代码来源:dynamicEnhancer.py

示例6: mergeCollections

def mergeCollections(nameDict,analysisName,output='',superOnly=True):

    '''
    merges them collections
    '''

    allLoci = []
    namesList = nameDict.keys()
    for name in namesList:
        
        seCollection =makeSECollection(nameDict[name]['enhancerFile'],name,superOnly)
        if superOnly:
            print "DATASET: %s HAS %s SUPERENHANCERS" % (name,len(seCollection))
        else:
            print "DATASET: %s HAS %s ENHANCERS" % (name,len(seCollection))
        allLoci += seCollection.getLoci()

    print len(allLoci)


    mergedCollection = utils.LocusCollection(allLoci,50)

    #stitch the collection together
    stitchedCollection = mergedCollection.stitchCollection()

    stitchedLoci = stitchedCollection.getLoci()
    print "IDENTIFIED %s CONSENSUS ENHANCER REGIONS" % (len(stitchedLoci))
    #sort by size and provide a unique ID

    sizeList = [locus.len() for locus in stitchedLoci]

    sizeOrder = utils.order(sizeList,decreasing=True)
    
    orderedLoci = [stitchedLoci[i] for i in sizeOrder]

    for i in range(len(orderedLoci)):
        orderedLoci[i]._ID = 'merged_%s_%s' % (analysisName,str(i+1))

    mergedGFF = []
    for locus in orderedLoci:
        newLine = [locus.chr(),locus.ID(),'',locus.start(),locus.end(),'',locus.sense(),'',locus.ID()]
        mergedGFF.append(newLine)


    if len(output) == 0:
        return mergedGFF
    else:
        print "writing merged gff to %s" % (output)
        utils.unParseTable(mergedGFF,output,'\t')
        return output
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:50,代码来源:clusterEnhancer.py

示例7: getTonyInfo

def getTonyInfo(uniqueIDList,colList):

    '''
    pass this a uniqueID List and a list of columns

    '''

    uniqueIDString = string.join(uniqueIDList,',')

    columnString = string.join([str(x) for x in colList],',')

    cmd = "perl /ark/tony/admin/getDB_Data.pl -i %s -c %s -o TAB" % (uniqueIDString,columnString)
    
    sqlOut = subprocess.Popen(cmd,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True)

    sqlText = sqlOut.communicate()

    sqlText = sqlText[0]
    
    sqlTable = sqlText.split('\n')
    sqlTable = [x for x in sqlTable if len(x) > 0]

    sqlTable = [x.split('\t') for x in sqlTable]

    header = [x.split(':')[-1] for x in sqlTable[0][1:]]
    header= [str.upper(x) for x in header]
    header = ['GENOME', 'SOURCE', 'CELL_TYPE', 'NAME', 'BAMFILE']
    tonyDict = {}
    for line in sqlTable[1:]:
        uniqueID = line[0]
        tonyDict[uniqueID] = {}
        for i in range(len(header)):
            tonyDict[uniqueID][header[i]] = line[(i+1)]
    newTable = []        
    newTable.append(header)

    for key in tonyDict.keys():
        newLine = []
        newLine.append(str.upper(tonyDict[key]['GENOME']))
        newLine.append(tonyDict[key]['SOURCE'])
        newLine.append(tonyDict[key]['CELL_TYPE'])
        newLine.append(tonyDict[key]['NAME'])
        newLine.append(tonyDict[key]['BAMFILE'])
        newTable.append(newLine)

    #print newTable
    
    utils.unParseTable(newTable, '/grail/projects/masterBamTable.txt', '\t')
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:48,代码来源:bamTableUpdate.py

示例8: findValleys

def findValleys(gene_to_enhancer_dict, bamFileList, projectName, projectFolder, cutoff = 0.2):
    '''
    takes in the super dict
    returns a dictionary of refseqs with all valley loci that are associated
    returns 2 kinds of bed files...
    1 = all 
    '''

    #first make the bamDict


    all_valley_bed = []
    valleyDict = {}

    #start w/ a bamFileList and make a list of bam type objects
    bam_list = [utils.Bam(bam_path) for bam_path in bamFileList]
    max_read_length = max([bam.getReadLengths()[0] for bam in bam_list])

    gene_list = gene_to_enhancer_dict.keys()
    gene_list.sort()
    ticker = 0
    print('number of regions processed:')
    for gene in gene_list:
        
        valleyDict[gene] = []

        for region in gene_to_enhancer_dict[gene]:
            if ticker %100 == 0:
                print(ticker)
            ticker+=1
            scoreArray = scoreValley(region, bam_list,max_read_length,projectName, projectFolder)
            for index,score in enumerate(scoreArray):
                if score > cutoff:
                    valley = utils.Locus(region.chr(), region.start() + index*10,
                                         region.start() + (index+1)*10, '.')
                    valleyDict[gene].append(valley)

        stitchedValleys = stitchValleys(valleyDict[gene])
        for valley in stitchedValleys:
            all_valley_bed.append([valley.chr(), valley.start(), valley.end()])
            valleyDict[gene] = stitchedValleys


    all_bed_path = projectFolder + projectName + '_all_valleys.bed'
    utils.unParseTable(all_valley_bed, all_bed_path, '\t')


    return all_bed_path
开发者ID:linlabcode,项目名称:pipeline,代码行数:48,代码来源:CRC3.py

示例9: mapGFFLineToBed

def mapGFFLineToBed(gffLine, outFolder, nBins, bedCollection, header=''):
    '''
    for every line produces a file with all of the rectangles to draw
    '''

    if len(header) == 0:
        gffString = '%s_%s_%s_%s' % (gffLine[0], gffLine[6], gffLine[3], gffLine[4])
    else:
        gffString = header
    diagramTable = [[0, 0, 0, 0]]
    nameTable = [['', 0, 0]]
    gffLocus = utils.Locus(gffLine[0], int(gffLine[3]), int(gffLine[4]), gffLine[6], gffLine[1])

    scaleFactor = float(nBins) / gffLocus.len()
    # plotting buffer for diagrams
    # plotBuffer = int(gffLocus.len() / float(nBins) * 20) # UNUSED (?)

    overlapLoci = bedCollection.getOverlap(gffLocus, sense='both')
    print("IDENTIFIED %s OVERLAPPING BED LOCI FOR REGION %s" % (len(overlapLoci),gffLine))

    # since beds come from multiple sources, we want to figure out how to offset them
    offsetDict = {}  # this will store each ID name
    bedNamesList = utils.uniquify([locus.ID() for locus in overlapLoci])
    bedNamesList.sort()
    for i in range(len(bedNamesList)):
        offsetDict[bedNamesList[i]] = 2 * i  # offsets different categories of bed regions

    if gffLine[6] == '-':
        refPoint = int(gffLine[4])
    else:
        refPoint = int(gffLine[3])

    # fill out the name table
    for name in bedNamesList:
        offset = offsetDict[name]
        nameTable.append([name, 0, 0.0 - offset])

    for bedLocus in overlapLoci:

        offset = offsetDict[bedLocus.ID()]

        [start, stop] = [abs(x - refPoint) * scaleFactor for x in bedLocus.coords()]

        diagramTable.append([start, -0.5 - offset, stop, 0.5 - offset])

    utils.unParseTable(diagramTable, outFolder + gffString + '_bedDiagramTemp.txt', '\t')
    utils.unParseTable(nameTable, outFolder + gffString + '_bedNameTemp.txt', '\t')
开发者ID:linlabcode,项目名称:pipeline,代码行数:47,代码来源:bamPlot_turbo.py

示例10: buildGraph

def buildGraph(edgeDict,gene_to_enhancer_dict,output_folder, analysis_name,cutoff=1):
    '''
    from the collapsed edge dictionary, build a target graph
    require at least n motifs to constitute an edge where n is set by cutoff. 
    default is 1
    '''

    node_list = edgeDict.keys()
    node_list.sort()
    #this is only edges between TFs
    graph = nx.DiGraph(name=analysis_name)
    graph.add_nodes_from(node_list)
    

    #this stores ALL edges identified by motifs
    edge_table = [['SOURCE','TARGET','CHROM','START','STOP','REGION_ID','TF_INTERACTION']]
    edge_output = '%s%s_EDGE_TABLE.txt' % (output_folder,analysis_name)

    for source in node_list:
        print(source)
        target_list = edgeDict[source].keys()
        target_list.sort()
        for target in target_list:

            #now we need to see which target regions this guy overlaps
            target_regions = gene_to_enhancer_dict[target]
            target_collection = utils.LocusCollection(target_regions,50)

            #get the edges hitting that target
            edgeLoci = edgeDict[source][target]
            if node_list.count(target) > 0:
                tf_interaction = 1
            else:
                tf_interaction = 0
            #only add to the graph if this is a TF/TF interaction
            if len(edgeLoci) >= cutoff and node_list.count(target) > 0:
                graph.add_edge(source,target)
                
            #now for each edge, add to the table
            for edgeLocus in edgeLoci:
                regionString = ','.join([locus.ID() for locus in target_collection.getOverlap(edgeLocus)])
                edgeLine = [source,target,edgeLocus.chr(),edgeLocus.start(),edgeLocus.end(),regionString,tf_interaction]
                edge_table.append(edgeLine)

    utils.unParseTable(edge_table,edge_output,'\t')
    return graph
开发者ID:linlabcode,项目名称:pipeline,代码行数:46,代码来源:CRC3.py

示例11: mergeCollections

def mergeCollections(superFile1,superFile2,name1,name2,output=''):

    '''
    merges them collections
    '''

    conSuperCollection = makeSECollection(superFile1,name1)

    tnfSuperCollection = makeSECollection(superFile2,name2)


    #now merge them
    mergedLoci = conSuperCollection.getLoci() + tnfSuperCollection.getLoci()

    mergedCollection = utils.LocusCollection(mergedLoci,50)

    #stitch the collection together
    stitchedCollection = mergedCollection.stitchCollection()

    stitchedLoci = stitchedCollection.getLoci()
    
    #loci that are in both get renamed with a new unique identifier

    renamedLoci =[]
    ticker = 1
    for locus in stitchedLoci:

        if len(conSuperCollection.getOverlap(locus)) > 0 and len(tnfSuperCollection.getOverlap(locus)):

            newID = 'CONSERVED_%s' % (str(ticker))
            ticker +=1
            locus._ID = newID
        else:
            locus._ID = locus.ID()[2:]
        renamedLoci.append(locus)

    #now we turn this into a gff and write it out
    gff = utils.locusCollectionToGFF(utils.LocusCollection(renamedLoci,50))

    if len(output) == 0:
        return gff
    else:
        print "writing merged gff to %s" % (output)
        utils.unParseTable(gff,output,'\t')
        return output
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:45,代码来源:dynamicEnhancer.py

示例12: makeRigerTable

def makeRigerTable(foldTableFile,output=''):

    '''
    blah
    '''

    #need a table of this format
    rigerTable = [['Construct','GeneSymbol','NormalizedScore','Construct Rank','HairpinWeight']]
    #set weight to 1 for now

    foldTable = utils.parseTable(foldTableFile,'\t')

    constructOrder = utils.order([float(line[2]) for line in foldTable[1:]],decreasing=True)

    #make geneCountDict
    print("making gene count dictionary")
    geneCountDict= defaultdict(int)
    for line in foldTable[1:]:
        geneCountDict[line[1]] +=1

    print("iterating through constructs")
    constructRank = 1
    for i in constructOrder:
        rowIndex = i+1 # accounts for the header
        geneName = foldTable[rowIndex][1]
        if geneCountDict[geneName] == 1:
            print("Gene %s only has one guide RNA. Excluding from FRIGER analysis" % (geneName))
            continue

        newLine = foldTable[rowIndex][0:3] + [constructRank,1]
        rigerTable.append(newLine)
        constructRank += 1

    if len(output) == 0:
        output = string.replace(foldTableFile,'_log2Ratio.txt','_friger.txt')
    
    utils.unParseTable(rigerTable,output,'\t')

    return output
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:39,代码来源:processGeckoBam.py

示例13: collapseRegionMap

def collapseRegionMap(regionMapFile,name='',controlBams=False):

    '''
    takes a regionMap file and collapses signal into a single column
    also fixes any stupid start/stop sorting issues
    needs to take into account whether or not controls were used
    '''

    regionMap = utils.parseTable(regionMapFile,'\t')

    for n,line in enumerate(regionMap):
        
        if n ==0:
            #new header
            if len(name) == 0:
                name = 'MERGED_SIGNAL'
            regionMap[n] = line[0:6] +[name]

        else:
            newLine = list(line[0:6])
            if controlBams:
                signalLine = [float(x) for x in line[6:]]
                rankbyIndexes = range(0,len(signalLine)/2,1)
                controlIndexes = range(len(signalLine)/2,len(signalLine),1)
                metaVector = []
                for i,j in zip(rankbyIndexes,controlIndexes):
                    #min signal is 0
                    metaVector.append(max(0,signalLine[i] - signalLine[j]))
                metaSignal = numpy.mean(metaVector)
            else:
                metaSignal = numpy.mean([float(x) for x in line[6:]])
            regionMap[n] = newLine + [metaSignal]

    outputFile = string.replace(regionMapFile,'REGION','META')
    utils.unParseTable(regionMap,outputFile,'\t')
    return(outputFile)
开发者ID:linlabcode,项目名称:pipeline,代码行数:36,代码来源:ROSE2_META.py

示例14: mapCollection

def mapCollection(stitchedCollection, referenceCollection, bamFileList, mappedFolder, output, refName):
    '''
    makes a table of factor density in a stitched locus and ranks table by number of loci stitched together
    '''

    print('FORMATTING TABLE')
    loci = stitchedCollection.getLoci()

    locusTable = [['REGION_ID', 'CHROM', 'START', 'STOP', 'NUM_LOCI', 'CONSTITUENT_SIZE']]

    lociLenList = []

    # strip out any that are in chrY
    for locus in list(loci):
        if locus.chr() == 'chrY':
            loci.remove(locus)

    for locus in loci:
        # numLociList.append(int(stitchLocus.ID().split('_')[1]))
        lociLenList.append(locus.len())
        # numOrder = order(numLociList,decreasing=True)
    lenOrder = utils.order(lociLenList, decreasing=True)
    ticker = 0
    for i in lenOrder:
        ticker += 1
        if ticker % 1000 == 0:
            print(ticker)
        locus = loci[i]

        # First get the size of the enriched regions within the stitched locus
        refEnrichSize = 0
        refOverlappingLoci = referenceCollection.getOverlap(locus, 'both')
        for refLocus in refOverlappingLoci:
            refEnrichSize += refLocus.len()

        try:
            stitchCount = int(locus.ID().split('_')[0])
        except ValueError:
            stitchCount = 1
        coords = [int(x) for x in locus.coords()]

        locusTable.append([locus.ID(), locus.chr(), min(coords), max(coords), stitchCount, refEnrichSize])

    print('GETTING MAPPED DATA')
    print("USING A BAMFILE LIST:")
    print(bamFileList)
    for bamFile in bamFileList:

        bamFileName = bamFile.split('/')[-1]

        print('GETTING MAPPING DATA FOR  %s' % bamFile)
        # assumes standard convention for naming enriched region gffs

        # opening up the mapped GFF
        print('OPENING %s%s_%s_MAPPED/matrix.txt' % (mappedFolder, refName, bamFileName))

        mappedGFF = utils.parseTable('%s%s_%s_MAPPED/matrix.txt' % (mappedFolder, refName, bamFileName), '\t')

        signalDict = defaultdict(float)
        print('MAKING SIGNAL DICT FOR %s' % (bamFile))
        mappedLoci = []
        for line in mappedGFF[1:]:

            chrom = line[1].split('(')[0]
            start = int(line[1].split(':')[-1].split('-')[0])
            end = int(line[1].split(':')[-1].split('-')[1])
            mappedLoci.append(utils.Locus(chrom, start, end, '.', line[0]))
            try:
                signalDict[line[0]] = float(line[2]) * (abs(end - start))
            except ValueError:
                print('WARNING NO SIGNAL FOR LINE:')
                print(line)
                continue

        mappedCollection = utils.LocusCollection(mappedLoci, 500)
        locusTable[0].append(bamFileName)

        for i in range(1, len(locusTable)):
            signal = 0.0
            line = locusTable[i]
            lineLocus = utils.Locus(line[1], line[2], line[3], '.')
            overlappingRegions = mappedCollection.getOverlap(lineLocus, sense='both')
            for region in overlappingRegions:
                signal += signalDict[region.ID()]
            locusTable[i].append(signal)

    utils.unParseTable(locusTable, output, '\t')
开发者ID:linlabcode,项目名称:pipeline,代码行数:87,代码来源:ROSE2_META.py

示例15: optimizeStitching

def optimizeStitching(locusCollection, name, outFolder, stepSize=500):
    '''
    takes a locus collection and starts writing out stitching stats at step sized intervals
    '''
    maxStitch = 15000  # set a hard wired match stitching parameter

    stitchTable = [['STEP', 'NUM_REGIONS', 'TOTAL_CONSTIT', 'TOTAL_REGION', 'MEAN_CONSTIT', 'MEDIAN_CONSTIT', 'MEAN_REGION', 'MEDIAN_REGION', 'MEAN_STITCH_FRACTION', 'MEDIAN_STITCH_FRACTION']]
    # first consolidate the collection
    locusCollection = locusCollection.stitchCollection(stitchWindow=0)
    total_constit = sum([locus.len() for locus in locusCollection.getLoci()])
    step = 0
    while step <= maxStitch:

        print("Getting stitch stats for %s (bp)" % (step))
        stitchCollection = locusCollection.stitchCollection(stitchWindow=step)
        num_regions = len(stitchCollection)
        stitchLoci = stitchCollection.getLoci()
        regionLengths = [locus.len() for locus in stitchLoci]
        total_region = sum(regionLengths)
        constitLengths = []
        for locus in stitchLoci:

            constitLoci = locusCollection.getOverlap(locus)
            constitLengths.append(sum([locus.len() for locus in constitLoci]))

        meanConstit = round(numpy.mean(constitLengths), 2)
        medianConstit = round(numpy.median(constitLengths), 2)

        meanRegion = round(numpy.mean(regionLengths), 2)
        medianRegion = round(numpy.median(regionLengths), 2)

        stitchFractions = [float(constitLengths[i]) / float(regionLengths[i]) for i in range(len(regionLengths))]
        meanStitchFraction = round(numpy.mean(stitchFractions), 2)
        medianStitchFraction = round(numpy.median(stitchFractions), 2)

        newLine = [step, num_regions, total_constit, total_region, meanConstit, medianConstit, meanRegion, medianRegion, meanStitchFraction, medianStitchFraction]

        stitchTable.append(newLine)

        step += stepSize

    # write the stitch table to disk
    stitchParamFile = '%s%s_stitch_params.tmp' % (outFolder, name)
    utils.unParseTable(stitchTable, stitchParamFile, '\t')
    # call the rscript
    rCmd = 'Rscript ./ROSE2_stitchOpt.R %s %s %s' % (stitchParamFile, outFolder, name)
    print(rCmd)
    # get back the stitch parameter
    rOutput = subprocess.Popen(rCmd, stdout=subprocess.PIPE, shell=True)
    rOutputTest = rOutput.communicate()

    print(rOutputTest)

    stitchParam = rOutputTest[0].split('\n')[2]
    try:
        stitchParam = int(stitchParam)
    except ValueError:
        print("INVALID STITCHING PARAMETER. STITCHING OPTIMIZATION FAILED")
        sys.exit()

    # delete? the table
    # os.system('rm -f %s' % (stitchParamFile))
    return stitchParam
开发者ID:linlabcode,项目名称:pipeline,代码行数:63,代码来源:ROSE2_META.py


注:本文中的utils.unParseTable函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。