当前位置: 首页>>代码示例>>Python>>正文


Python BigWigFile.summarize方法代码示例

本文整理汇总了Python中bx.bbi.bigwig_file.BigWigFile.summarize方法的典型用法代码示例。如果您正苦于以下问题:Python BigWigFile.summarize方法的具体用法?Python BigWigFile.summarize怎么用?Python BigWigFile.summarize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在bx.bbi.bigwig_file.BigWigFile的用法示例。


在下文中一共展示了BigWigFile.summarize方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: summarize

# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
    def summarize(self, interval, bins=None, method='summarize',
                  function='mean'):

        # We may be dividing by zero in some cases, which raises a warning in
        # NumPy based on the IEEE 754 standard (see
        # http://docs.scipy.org/doc/numpy/reference/generated/
        #       numpy.seterr.html)
        #
        # That's OK -- we're expecting that to happen sometimes. So temporarily
        # disable this error reporting for the duration of this method.
        orig = np.geterr()['invalid']
        np.seterr(invalid='ignore')

        if (bins is None) or (method == 'get_as_array'):
            bw = BigWigFile(open(self.fn))
            s = bw.get_as_array(
                interval.chrom,
                interval.start,
                interval.stop,)
            if s is None:
                s = np.zeros((interval.stop - interval.start,))
            else:
                s[np.isnan(s)] = 0

        elif method == 'ucsc_summarize':
            if function in ['mean', 'min', 'max', 'std', 'coverage']:
                return self.ucsc_summarize(interval, bins, function=function)
            else:
                raise ValueError('function "%s" not supported by UCSC\'s'
                                 'bigWigSummary')

        else:
            bw = BigWigFile(open(self.fn))
            s = bw.summarize(
                interval.chrom,
                interval.start,
                interval.stop, bins)
            if s is None:
                s = np.zeros((bins,))
            else:
                if function == 'sum':
                    s = s.sum_data
                if function == 'mean':
                    s = s.sum_data / s.valid_count
                    s[np.isnan(s)] = 0
                if function == 'min':
                    s = s.min_val
                    s[np.isinf(s)] = 0
                if function == 'max':
                    s = s.max_val
                    s[np.isinf(s)] = 0
                if function == 'std':
                    s = (s.sum_squares / s.valid_count)
                    s[np.isnan(s)] = 0

        # Reset NumPy error reporting
        np.seterr(divide=orig)
        return s
开发者ID:bakerwm,项目名称:metaseq,代码行数:60,代码来源:filetype_adapters.py

示例2: extract_phastcons

# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
def extract_phastcons ( bedfile, phas_chrnames, width, pf_res ):
    """Extract phastcons scores from a bed file.

    Return the average scores
    """
    info("read bed file...")
    bfhd = open(bedfile)
    bed = parse_BED(bfhd)

    # calculate the middle point of bed regions then extend left and right by 1/2 width
    bchrs = bed.peaks.keys()
    bchrs.sort()

    chrs = []
    for c in phas_chrnames:
        if c in bchrs:
            chrs.append(c)

    sumscores = []
    for chrom in chrs:
        info("processing chromosome: %s" %chrom)
        pchrom = bed.peaks[chrom]
        bw = BigWigFile(open(chrom+'.bw', 'rb'))
        for i in range(len(pchrom)):
            mid = int((pchrom[i][0]+pchrom[i][1])/2)
            left = int(mid - width/2)
            right = int(mid + width/2)

            if left < 0:
                left = 0
                right = width

            summarize = bw.summarize(chrom, left, right, width/pf_res)
            if not summarize:
                continue
            dat = summarize.sum_data / summarize.valid_count
            #dat = dat.strip().split('\t')
            sumscores.append(dat)

    ## a list with each element is a list of conservation score at the same coordinate
    sumscores = map(list, zip(*sumscores))

    ## exclude na
    sumscores = [[t2 for t2 in t if not math.isnan(t2)] for t in sumscores]
    try:
        conscores = [sum(t)/len(t) for t in sumscores]
    except ZeroDivisionError:
        conscores = [0] * (width/pf_res)

    return conscores
开发者ID:asntech,项目名称:chilin,代码行数:52,代码来源:conservation_plot.py

示例3: TestBigWig

# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
class TestBigWig(unittest.TestCase):
    def setUp(self):
        f = open( "test_data/bbi_tests/test.bw" )
        self.bw = BigWigFile(file=f)
        
    def test_get_summary(self):
        data = self.bw.query("chr1", 10000, 20000, 10)
        means = [ x['mean'] for x in data ]
        print means
        assert numpy.allclose( map(float, means), [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Summarize variant
        sd = self.bw.summarize( "chr1", 10000, 20000, 10)
        assert numpy.allclose( sd.sum_data / sd.valid_count, [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Test min and max for this entire summary region
        data = self.bw.query("chr1", 10000, 20000, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.289000004529953] )
        self.assertEqual( map(float, mins), [-3.9100000858306885] )
        
    def test_get_leaf(self):
        data = self.bw.query("chr1", 11000, 11005, 5)
        means = [ x['mean'] for x in data ]
        assert numpy.allclose( map(float, means), [0.050842501223087311, -2.4589500427246094, 0.050842501223087311, 0.050842501223087311, 0.050842501223087311] )
        
        # Test min and max for this entire leaf region
        data = self.bw.query("chr1", 11000, 11005, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.050842501223087311] )
        self.assertEqual( map(float, mins), [-2.4589500427246094] )
        
    def test_wrong_nochrom(self):
        data = self.bw.query("chr2", 0, 10000, 10)
        self.assertEqual( data, None )
开发者ID:jeffhsu3,项目名称:bx-python,代码行数:39,代码来源:bigwig_tests.py

示例4: HilbertMatrixBigWig

# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
class HilbertMatrixBigWig(HilbertMatrix):
    # Need to override build(), but otherwise just like a HilbertMatrix
    def __init__(self, *args, **kwargs):
        """
        Subclass of HilbertMatrix specifically for bigWig format files
        """
        super(HilbertMatrixBigWig, self).__init__(*args, **kwargs)

    def build(self):
        """
        Build the matrix.

        Since bigWig files are essentially pre-summarized, this just extracts
        the chrom/start/stop represented by each cell in the matrix and fills
        it with the value from the bigWig file.
        """
        self.bigwig = BigWigFile(open(self.file))

        chrom_rc, chrom_bins = self.chrom2rc()

        if self.chrom == 'genome':
            chroms = self.chromdict.keys()

        else:
            chroms = [self.chrom]

        for chrom in chroms:
            rc = chrom_rc[chrom]
            nbins = chrom_bins[chrom]

            start, stop = self.chromdict[chrom]
            results = self.bigwig.summarize(chrom, start, stop, nbins)
            values = results.sum_data / results.valid_count
            values[np.isnan(values)] = 0

            self.matrix[rc[:,0], rc[:, 1]] = values

        self._cleanup()


    def chrom2rc(self):
        """
        Return a dictionary of {chrom: (rows, cols)} and {chrom: nbins}
        """
        precomputed = np.load(
            os.path.join(
                os.path.dirname(__file__),
                'precomputed.npz'))
        rc = precomputed['_%s' % self.matrix_dim]

        d = {}
        bins = {}
        last_stop = 0
        for chrom, startstop in self.chromdict.items():
            start, stop = startstop
            frac = self.chromdict[chrom][1] / float(self.chrom_length)
            nbins = int(frac * (self.matrix_dim * self.matrix_dim))
            d_start = last_stop
            d_stop = d_start + nbins
            d[chrom] = rc[d_start:d_stop, :]
            bins[chrom] = nbins
            last_stop += nbins
        return d, bins
开发者ID:arq5x,项目名称:scurgen,代码行数:65,代码来源:hilbert.py

示例5: main

# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
def main():
    usage = "usage: %prog <-r rfile> [options] <bigwig files> ..."
    description = "Draw correlation plot for many bigwig files. Based on qc_chIP_whole.py"
    
    optparser = OptionParser(version="%prog 0.1",description=description,usage=usage,add_help_option=False)
    optparser.add_option("-h","--help",action="help",help="Show this help message and exit.")
    #optparser.add_option("-d","--db",type="str",dest="dbname",help="UCSC db name for the assembly. Default: ce4",default="ce4")
    optparser.add_option("-r","--rfile",dest="rfile",
                         help="R output file. If not set, do not save R file.")
    optparser.add_option("-s","--step",dest="step",type="int",
                         help="sampling step in kbps. default: 100, minimal: 1",default=100)
    optparser.add_option("-z","--imgsize",dest="imgsize",type="int",
                         help="image size in inches, note the PNG dpi is 72. default: 10, minimal: 10",default=10)    
    optparser.add_option("-f","--format",dest="imgformat",type="string",
                         help="image format. PDF or PNG",default='PDF')
    #optparser.add_option("-m","--method",dest="method",type="string",default="median",
    #                     help="method to process the paired two sets of data in the sampling step. Choices are 'median', 'mean', and 'sample' (just take one point out of a data set). Default: median")
    optparser.add_option("-l","--wig-label",dest="wiglabel",type="string",action="append",
                         help="the wiggle file labels in the figure. No space is allowed. This option should be used same times as wiggle files, and please input them in the same order as -w option. default: will use the wiggle file filename as labels.")
    optparser.add_option("--min-score",dest="minscore",type="float",default=-10000,
                         help="minimum score included in calculation. Points w/ score lower than this will be discarded.")
    optparser.add_option("--max-score",dest="maxscore",type="float",default=10000,
                         help="maximum score included in calculation. Points w/ score larger than this will be discarded.")
    optparser.add_option("-H","--heatmap",dest="heatmap",action="store_true",default=False,
                         help="If True, a heatmap image will be generated instead of paired scatterplot image.")
    
    (options,wigfiles) = optparser.parse_args()

    imgfmt = options.imgformat.upper()
    if imgfmt != 'PDF' and imgfmt != 'PNG':
        print "unrecognized format: %s" % imgfmt
        sys.exit(1)

    medfunc = mean

    wigfilenum = len(wigfiles)
    if wigfilenum < 2 or not options.rfile:
        error("must provide >=2 wiggle files")
        optparser.print_help()
        sys.exit(1)

    # wig labels
    if options.wiglabel and len(options.wiglabel) == wigfilenum:
        wiglabel = options.wiglabel
    else:  # or use the filename
        wiglabel = map(lambda x:os.path.basename(x),wigfiles)
        
    if options.step < 1:
        error("Step can not be lower than 1!")
        sys.exit(1)
    if options.imgsize < 10:
        error("Image size can not be lower than 10!")
        sys.exit(1)

    # check the files
    for f in wigfiles:
        if not os.path.isfile(f):
            error("%s is not valid!" % f)
            sys.exit(1)
        
    info("number of bigwig files: %d" % wigfilenum)

    #get chromosome length from optins.wig[0]:
    p=BwIO(wigfiles[0])
    chrom_len = {}
    for i in p.chromosomeTree['nodes']:
        chrom_len[i['key']] = i['chromSize']
        
    # get the common chromosome list:
    chrset = set([t['key'] for t in p.chromosomeTree['nodes']])
    for bw in wigfiles[1:]:
        p=BwIO(bw)
        chrset = chrset.intersection(set([t['key'] for t in p.chromosomeTree['nodes']]))
    chroms = list(chrset)

    if not chroms:
        error('No common chrom found')
        sys.exit()
    info("common chromosomes are %s." % ",".join(chroms))

    # Start writing R file
    if options.rfile:
        rfhd = open(options.rfile,"w")
        rfhd.write('''require("RColorBrewer") ## from CRAN\n''')

    # for each wig file, sample...
    for i in range(len(wigfiles)):
        bw = BigWigFile(open(wigfiles[i],'rb'))
        
        info("read wiggle track from bigwig file #%d" % (i+1))
        profile = []
        for chrom in chroms:

            # The too-short chromosome will cause error in bw.summarize function below
            # So filter them out
            if chrom_len[chrom]/options.step/1000==0:
                warn("A very-short chromosome (%s) found and skipped"%chrom)
                continue
            
            summary = bw.summarize(chrom, 0, chrom_len[chrom], chrom_len[chrom]/options.step/1000)
#.........这里部分代码省略.........
开发者ID:qinqian,项目名称:coloncancer,代码行数:103,代码来源:bigwig_correlation.py


注:本文中的bx.bbi.bigwig_file.BigWigFile.summarize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。