本文整理汇总了Python中bx.bbi.bigwig_file.BigWigFile.summarize方法的典型用法代码示例。如果您正苦于以下问题:Python BigWigFile.summarize方法的具体用法?Python BigWigFile.summarize怎么用?Python BigWigFile.summarize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bx.bbi.bigwig_file.BigWigFile
的用法示例。
在下文中一共展示了BigWigFile.summarize方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: summarize
# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
def summarize(self, interval, bins=None, method='summarize',
function='mean'):
# We may be dividing by zero in some cases, which raises a warning in
# NumPy based on the IEEE 754 standard (see
# http://docs.scipy.org/doc/numpy/reference/generated/
# numpy.seterr.html)
#
# That's OK -- we're expecting that to happen sometimes. So temporarily
# disable this error reporting for the duration of this method.
orig = np.geterr()['invalid']
np.seterr(invalid='ignore')
if (bins is None) or (method == 'get_as_array'):
bw = BigWigFile(open(self.fn))
s = bw.get_as_array(
interval.chrom,
interval.start,
interval.stop,)
if s is None:
s = np.zeros((interval.stop - interval.start,))
else:
s[np.isnan(s)] = 0
elif method == 'ucsc_summarize':
if function in ['mean', 'min', 'max', 'std', 'coverage']:
return self.ucsc_summarize(interval, bins, function=function)
else:
raise ValueError('function "%s" not supported by UCSC\'s'
'bigWigSummary')
else:
bw = BigWigFile(open(self.fn))
s = bw.summarize(
interval.chrom,
interval.start,
interval.stop, bins)
if s is None:
s = np.zeros((bins,))
else:
if function == 'sum':
s = s.sum_data
if function == 'mean':
s = s.sum_data / s.valid_count
s[np.isnan(s)] = 0
if function == 'min':
s = s.min_val
s[np.isinf(s)] = 0
if function == 'max':
s = s.max_val
s[np.isinf(s)] = 0
if function == 'std':
s = (s.sum_squares / s.valid_count)
s[np.isnan(s)] = 0
# Reset NumPy error reporting
np.seterr(divide=orig)
return s
示例2: extract_phastcons
# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
def extract_phastcons ( bedfile, phas_chrnames, width, pf_res ):
"""Extract phastcons scores from a bed file.
Return the average scores
"""
info("read bed file...")
bfhd = open(bedfile)
bed = parse_BED(bfhd)
# calculate the middle point of bed regions then extend left and right by 1/2 width
bchrs = bed.peaks.keys()
bchrs.sort()
chrs = []
for c in phas_chrnames:
if c in bchrs:
chrs.append(c)
sumscores = []
for chrom in chrs:
info("processing chromosome: %s" %chrom)
pchrom = bed.peaks[chrom]
bw = BigWigFile(open(chrom+'.bw', 'rb'))
for i in range(len(pchrom)):
mid = int((pchrom[i][0]+pchrom[i][1])/2)
left = int(mid - width/2)
right = int(mid + width/2)
if left < 0:
left = 0
right = width
summarize = bw.summarize(chrom, left, right, width/pf_res)
if not summarize:
continue
dat = summarize.sum_data / summarize.valid_count
#dat = dat.strip().split('\t')
sumscores.append(dat)
## a list with each element is a list of conservation score at the same coordinate
sumscores = map(list, zip(*sumscores))
## exclude na
sumscores = [[t2 for t2 in t if not math.isnan(t2)] for t in sumscores]
try:
conscores = [sum(t)/len(t) for t in sumscores]
except ZeroDivisionError:
conscores = [0] * (width/pf_res)
return conscores
示例3: TestBigWig
# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
class TestBigWig(unittest.TestCase):
def setUp(self):
f = open( "test_data/bbi_tests/test.bw" )
self.bw = BigWigFile(file=f)
def test_get_summary(self):
data = self.bw.query("chr1", 10000, 20000, 10)
means = [ x['mean'] for x in data ]
print means
assert numpy.allclose( map(float, means), [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
# Summarize variant
sd = self.bw.summarize( "chr1", 10000, 20000, 10)
assert numpy.allclose( sd.sum_data / sd.valid_count, [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
# Test min and max for this entire summary region
data = self.bw.query("chr1", 10000, 20000, 1)
maxs = [ x['max'] for x in data ]
mins = [ x['min'] for x in data ]
self.assertEqual( map(float, maxs), [0.289000004529953] )
self.assertEqual( map(float, mins), [-3.9100000858306885] )
def test_get_leaf(self):
data = self.bw.query("chr1", 11000, 11005, 5)
means = [ x['mean'] for x in data ]
assert numpy.allclose( map(float, means), [0.050842501223087311, -2.4589500427246094, 0.050842501223087311, 0.050842501223087311, 0.050842501223087311] )
# Test min and max for this entire leaf region
data = self.bw.query("chr1", 11000, 11005, 1)
maxs = [ x['max'] for x in data ]
mins = [ x['min'] for x in data ]
self.assertEqual( map(float, maxs), [0.050842501223087311] )
self.assertEqual( map(float, mins), [-2.4589500427246094] )
def test_wrong_nochrom(self):
data = self.bw.query("chr2", 0, 10000, 10)
self.assertEqual( data, None )
示例4: HilbertMatrixBigWig
# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
class HilbertMatrixBigWig(HilbertMatrix):
# Need to override build(), but otherwise just like a HilbertMatrix
def __init__(self, *args, **kwargs):
"""
Subclass of HilbertMatrix specifically for bigWig format files
"""
super(HilbertMatrixBigWig, self).__init__(*args, **kwargs)
def build(self):
"""
Build the matrix.
Since bigWig files are essentially pre-summarized, this just extracts
the chrom/start/stop represented by each cell in the matrix and fills
it with the value from the bigWig file.
"""
self.bigwig = BigWigFile(open(self.file))
chrom_rc, chrom_bins = self.chrom2rc()
if self.chrom == 'genome':
chroms = self.chromdict.keys()
else:
chroms = [self.chrom]
for chrom in chroms:
rc = chrom_rc[chrom]
nbins = chrom_bins[chrom]
start, stop = self.chromdict[chrom]
results = self.bigwig.summarize(chrom, start, stop, nbins)
values = results.sum_data / results.valid_count
values[np.isnan(values)] = 0
self.matrix[rc[:,0], rc[:, 1]] = values
self._cleanup()
def chrom2rc(self):
"""
Return a dictionary of {chrom: (rows, cols)} and {chrom: nbins}
"""
precomputed = np.load(
os.path.join(
os.path.dirname(__file__),
'precomputed.npz'))
rc = precomputed['_%s' % self.matrix_dim]
d = {}
bins = {}
last_stop = 0
for chrom, startstop in self.chromdict.items():
start, stop = startstop
frac = self.chromdict[chrom][1] / float(self.chrom_length)
nbins = int(frac * (self.matrix_dim * self.matrix_dim))
d_start = last_stop
d_stop = d_start + nbins
d[chrom] = rc[d_start:d_stop, :]
bins[chrom] = nbins
last_stop += nbins
return d, bins
示例5: main
# 需要导入模块: from bx.bbi.bigwig_file import BigWigFile [as 别名]
# 或者: from bx.bbi.bigwig_file.BigWigFile import summarize [as 别名]
def main():
usage = "usage: %prog <-r rfile> [options] <bigwig files> ..."
description = "Draw correlation plot for many bigwig files. Based on qc_chIP_whole.py"
optparser = OptionParser(version="%prog 0.1",description=description,usage=usage,add_help_option=False)
optparser.add_option("-h","--help",action="help",help="Show this help message and exit.")
#optparser.add_option("-d","--db",type="str",dest="dbname",help="UCSC db name for the assembly. Default: ce4",default="ce4")
optparser.add_option("-r","--rfile",dest="rfile",
help="R output file. If not set, do not save R file.")
optparser.add_option("-s","--step",dest="step",type="int",
help="sampling step in kbps. default: 100, minimal: 1",default=100)
optparser.add_option("-z","--imgsize",dest="imgsize",type="int",
help="image size in inches, note the PNG dpi is 72. default: 10, minimal: 10",default=10)
optparser.add_option("-f","--format",dest="imgformat",type="string",
help="image format. PDF or PNG",default='PDF')
#optparser.add_option("-m","--method",dest="method",type="string",default="median",
# help="method to process the paired two sets of data in the sampling step. Choices are 'median', 'mean', and 'sample' (just take one point out of a data set). Default: median")
optparser.add_option("-l","--wig-label",dest="wiglabel",type="string",action="append",
help="the wiggle file labels in the figure. No space is allowed. This option should be used same times as wiggle files, and please input them in the same order as -w option. default: will use the wiggle file filename as labels.")
optparser.add_option("--min-score",dest="minscore",type="float",default=-10000,
help="minimum score included in calculation. Points w/ score lower than this will be discarded.")
optparser.add_option("--max-score",dest="maxscore",type="float",default=10000,
help="maximum score included in calculation. Points w/ score larger than this will be discarded.")
optparser.add_option("-H","--heatmap",dest="heatmap",action="store_true",default=False,
help="If True, a heatmap image will be generated instead of paired scatterplot image.")
(options,wigfiles) = optparser.parse_args()
imgfmt = options.imgformat.upper()
if imgfmt != 'PDF' and imgfmt != 'PNG':
print "unrecognized format: %s" % imgfmt
sys.exit(1)
medfunc = mean
wigfilenum = len(wigfiles)
if wigfilenum < 2 or not options.rfile:
error("must provide >=2 wiggle files")
optparser.print_help()
sys.exit(1)
# wig labels
if options.wiglabel and len(options.wiglabel) == wigfilenum:
wiglabel = options.wiglabel
else: # or use the filename
wiglabel = map(lambda x:os.path.basename(x),wigfiles)
if options.step < 1:
error("Step can not be lower than 1!")
sys.exit(1)
if options.imgsize < 10:
error("Image size can not be lower than 10!")
sys.exit(1)
# check the files
for f in wigfiles:
if not os.path.isfile(f):
error("%s is not valid!" % f)
sys.exit(1)
info("number of bigwig files: %d" % wigfilenum)
#get chromosome length from optins.wig[0]:
p=BwIO(wigfiles[0])
chrom_len = {}
for i in p.chromosomeTree['nodes']:
chrom_len[i['key']] = i['chromSize']
# get the common chromosome list:
chrset = set([t['key'] for t in p.chromosomeTree['nodes']])
for bw in wigfiles[1:]:
p=BwIO(bw)
chrset = chrset.intersection(set([t['key'] for t in p.chromosomeTree['nodes']]))
chroms = list(chrset)
if not chroms:
error('No common chrom found')
sys.exit()
info("common chromosomes are %s." % ",".join(chroms))
# Start writing R file
if options.rfile:
rfhd = open(options.rfile,"w")
rfhd.write('''require("RColorBrewer") ## from CRAN\n''')
# for each wig file, sample...
for i in range(len(wigfiles)):
bw = BigWigFile(open(wigfiles[i],'rb'))
info("read wiggle track from bigwig file #%d" % (i+1))
profile = []
for chrom in chroms:
# The too-short chromosome will cause error in bw.summarize function below
# So filter them out
if chrom_len[chrom]/options.step/1000==0:
warn("A very-short chromosome (%s) found and skipped"%chrom)
continue
summary = bw.summarize(chrom, 0, chrom_len[chrom], chrom_len[chrom]/options.step/1000)
#.........这里部分代码省略.........