本文整理汇总了Python中bx.bbi.bigwig_file.BigWigFile类的典型用法代码示例。如果您正苦于以下问题:Python BigWigFile类的具体用法?Python BigWigFile怎么用?Python BigWigFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BigWigFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
usage="%prog [options]"
parser = OptionParser(usage,version="%prog " + __version__)
parser.add_option("-i","--bwfile1",action="store",type="string",dest="BigWig_File1",help="BigWig files")
parser.add_option("-j","--bwfile2",action="store",type="string",dest="BigWig_File2",help="BigWig files")
parser.add_option("-a","--action",action="store",type="string",dest="action",help='After pairwise align two bigwig files, perform the follow actions (Only select one keyword):"Add" = add signals. "Average" = average signals. "Division"= divide bigwig2 from bigwig1. Add 1 to both bigwig. "Max" = pick the signal that is larger. "Min" = pick the signal that is smaller. "Product" = multiply signals. "Subtract" = subtract signals in 2nd bigwig file from the corresponiding ones in the 1st bigwig file. "geometricMean" = take the geometric mean of signals.')
parser.add_option("-o","--output",action="store",type="string",dest="output_wig",help="Output wig file")
parser.add_option("-s","--chromSize",action="store",type="string",dest="chromSize",help="Chromosome size file. Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome.")
parser.add_option("-c","--chunk",action="store",type="int",dest="chunk_size",default=100000,help="Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp)")
(options,args)=parser.parse_args()
if not (options.BigWig_File1 and options.BigWig_File2 and options.output_wig and options.chromSize):
parser.print_help()
sys.exit(0)
OUT=open(options.output_wig,'w')
bw1 = BigWigFile( file=open(options.BigWig_File1) )
bw2 = BigWigFile( file=open(options.BigWig_File2) )
chrom_sizes = load_chromsize(options.chromSize)
for chr_name, chr_size in chrom_sizes.items(): #iterate each chrom
print >>sys.stderr, "Processing " + chr_name + " ..."
OUT.write('variableStep chrom='+chr_name+'\n')
for interval in BED.tillingBed(chrName = chr_name,chrSize = chr_size,stepSize = options.chunk_size):
coord = interval[1]
bw_signal1 = bw1.get_as_array(chr_name,interval[1],interval[2])
bw_signal2 = bw2.get_as_array(chr_name,interval[1],interval[2])
if all_nan(bw_signal1) and all_nan(bw_signal2):
continue
bw_signal1 = replace_nan( bw_signal1 )
bw_signal2 = replace_nan( bw_signal2 )
call_back = getattr(twoList,options.action)
for v in call_back(bw_signal1,bw_signal2):
coord +=1
if v != 0: print >>OUT, "%d\t%.2f" % (coord,v)
示例2: main
def main():
p = optparse.OptionParser(__doc__)
p.add_option('-A', '--absolute', action='store_true',dest='A',\
default=False, help='absolute threshold')
p.add_option('-s','--standard_background', action='store_true',\
dest='stdbg')
p.add_option('-D', '--debug', action='store_true', dest='debug')
options, args = p.parse_args()
debug_c = 0
BEDFILE = open(args[0], 'rU')
BW = BigWigFile(file=open(args[1]))
BEDout = open(args[2], 'w')
for line in BEDFILE:
print(line)
line = line.strip().split('\t')
x = BW.query(line[0], int(line[1]), int(line[2]),1)
line.append(str(round(x[0]['mean'], 5)))
BEDout.write("\t".join(line)+"\n")
"""
for i in x:
print i['mean']
"""
if options.debug:
debug_c +=1
if debug_c >= 10:
break
if __name__ == '__main__':
main()
示例3: findInsertions
def findInsertions(bwFile, bedData, interval, x):
if interval =='start':
sL = int(bedData[x][1])-options.l
sR = int(bedData[x][1])+options.r
elif interval == 'end':
sL = int(bedData[x][2])-options.l
sR = int(bedData[x][2])+options.r
else:
sL = int(bedData[x][1])-options.l
sR = int(bedData[x][2])+options.r
# get signal data
f = open(bwFile, "rb")
bigwig_class = BigWigFile(f)
try: signal = bigwig_class.get_as_array(bedData[x][0],sL,sR)
except OverflowError: signal = np.array([np.nan]*(sR-sL))
f.close()
if signal is not None:
if np.sum(np.isfinite(signal)) > 0:
out = np.nanmean(signal)
else: out = 0
else: out = 0
out = signal
return out
示例4: get_mean_phastcons
def get_mean_phastcons(bedtool, phastcons_location):
"""
Get means phastcons scores for all intervals in a bed tool
bedtool - bedtool to extract data from
phastcons_location - location of phastcons file
"""
f = open(phastcons_location, 'r')
bw = BigWigFile(file=f)
#if bedtool
data = np.ndarray(len(bedtool))
for i, bedline in enumerate(bedtool):
conservation_values = bw.get_as_array(bedline.chrom, bedline.start, bedline.stop)
if len(conservation_values) > 0:
mean_phastcons = np.mean(conservation_values)
else:
mean_phastcons = 0
data[i] = mean_phastcons
return data
示例5: createMappabilityList
def createMappabilityList(fragmentsMap, bwfile, fragmentCount, options):
# keep record which fragment has decent mappability
mappable = np.zeros((fragmentCount,), dtype=np.float)
# lazy load
from bx.intervals.io import GenomicIntervalReader
from bx.bbi.bigwig_file import BigWigFile
bw = BigWigFile( open( bwfile ) )
for fragmentId in fragmentsMap.keys():
(chrom, start, end) = fragmentsMap[fragmentId]
if (options.vverbose):
print >> sys.stdout, "- process %s %d-%d " % (chrom, start, end)
try:
mappable[fragmentId] = bw.query(chrom, start, end, 1)[0]["mean"]
if (np.isnan(mappable[fragmentId])):
mappable[fragmentId] = 0
except:
mappable[fragmentId] = 0.
# problem with invalid values
if (options.vverbose):
print >> sys.stderr, "Problem with bw file at %s %d-%d" % (chrom, start, end)
print traceback.format_exc()
return mappable
示例6: get_mean_phastcons
def get_mean_phastcons(bedtool, phastcons_location, sample_size = 1000):
"""
Get means phastcons scores for all intervals in a bed tool
bedtool - bedtool to extract data from
phastcons_location - location of phastcons file
"""
with open(phastcons_location) as bw_file:
bw = BigWigFile(bw_file)
data = []
for bedline in bedtool.random_subset(min(len(bedtool), sample_size)):
conservation_values = bw.get_as_array(bedline.chrom, bedline.start, bedline.stop)
try:
if len(conservation_values) > 0:
mean_phastcons = np.mean(conservation_values)
else:
mean_phastcons = 0
data.append(mean_phastcons)
except TypeError:
pass
return data
示例7: summarize
def summarize(self, interval, bins=None, method='summarize',
function='mean'):
# We may be dividing by zero in some cases, which raises a warning in
# NumPy based on the IEEE 754 standard (see
# http://docs.scipy.org/doc/numpy/reference/generated/
# numpy.seterr.html)
#
# That's OK -- we're expecting that to happen sometimes. So temporarily
# disable this error reporting for the duration of this method.
orig = np.geterr()['invalid']
np.seterr(invalid='ignore')
if (bins is None) or (method == 'get_as_array'):
bw = BigWigFile(open(self.fn))
s = bw.get_as_array(
interval.chrom,
interval.start,
interval.stop,)
if s is None:
s = np.zeros((interval.stop - interval.start,))
else:
s[np.isnan(s)] = 0
elif method == 'ucsc_summarize':
if function in ['mean', 'min', 'max', 'std', 'coverage']:
return self.ucsc_summarize(interval, bins, function=function)
else:
raise ValueError('function "%s" not supported by UCSC\'s'
'bigWigSummary')
else:
bw = BigWigFile(open(self.fn))
s = bw.summarize(
interval.chrom,
interval.start,
interval.stop, bins)
if s is None:
s = np.zeros((bins,))
else:
if function == 'sum':
s = s.sum_data
if function == 'mean':
s = s.sum_data / s.valid_count
s[np.isnan(s)] = 0
if function == 'min':
s = s.min_val
s[np.isinf(s)] = 0
if function == 'max':
s = s.max_val
s[np.isinf(s)] = 0
if function == 'std':
s = (s.sum_squares / s.valid_count)
s[np.isnan(s)] = 0
# Reset NumPy error reporting
np.seterr(divide=orig)
return s
示例8: extract_phastcons
def extract_phastcons ( bedfile, phas_chrnames, width, pf_res ):
"""Extract phastcons scores from a bed file.
Return the average scores
"""
info("read bed file...")
bfhd = open(bedfile)
bed = parse_BED(bfhd)
# calculate the middle point of bed regions then extend left and right by 1/2 width
bchrs = bed.peaks.keys()
bchrs.sort()
chrs = []
for c in phas_chrnames:
if c in bchrs:
chrs.append(c)
sumscores = []
for chrom in chrs:
info("processing chromosome: %s" %chrom)
pchrom = bed.peaks[chrom]
bw = BigWigFile(open(chrom+'.bw', 'rb'))
for i in range(len(pchrom)):
mid = int((pchrom[i][0]+pchrom[i][1])/2)
left = int(mid - width/2)
right = int(mid + width/2)
if left < 0:
left = 0
right = width
summarize = bw.summarize(chrom, left, right, width/pf_res)
if not summarize:
continue
dat = summarize.sum_data / summarize.valid_count
#dat = dat.strip().split('\t')
sumscores.append(dat)
## a list with each element is a list of conservation score at the same coordinate
sumscores = map(list, zip(*sumscores))
## exclude na
sumscores = [[t2 for t2 in t if not math.isnan(t2)] for t in sumscores]
try:
conscores = [sum(t)/len(t) for t in sumscores]
except ZeroDivisionError:
conscores = [0] * (width/pf_res)
return conscores
示例9: build
def build(self):
"""
Build the matrix.
Since bigWig files are essentially pre-summarized, this just extracts
the chrom/start/stop represented by each cell in the matrix and fills
it with the value from the bigWig file.
"""
self.bigwig = BigWigFile(open(self.file))
chrom_rc, chrom_bins = self.chrom2rc()
if self.chrom == 'genome':
chroms = self.chromdict.keys()
else:
chroms = [self.chrom]
for chrom in chroms:
rc = chrom_rc[chrom]
nbins = chrom_bins[chrom]
start, stop = self.chromdict[chrom]
results = self.bigwig.summarize(chrom, start, stop, nbins)
values = results.sum_data / results.valid_count
values[np.isnan(values)] = 0
self.matrix[rc[:,0], rc[:, 1]] = values
self._cleanup()
示例10: BigWigWrapper
class BigWigWrapper(object):
"""A wrapper for bx-python BigWig file"""
def __init__(self, filepath):
self.bw = BigWigFile(open(filepath))
def __getitem__(self, iv):
return self.bw.get_as_array(iv.chrom, iv.start, iv.end)
示例11: get_phastcons
def get_phastcons(bedtool, phastcons_location, species=None, index=None, ):
"""
Get phastcons scores for intervals in a bed tool
"""
if species is None and index is None:
print "Error, must select species or index"
f = open(phastcons_location, 'r')
bw = BigWigFile(file=f)
try:
#if its a line
#for each line fetch bigwig values
type(bedtool)
v = bedtool.chrom #is a single interval
vals = bw.get(bedtool.chrom, bedtool.start, bedtool.stop)
consvals = list(v[-1] for v in vals)
if len(consvals) > 0:
mean_phastcons = np.mean(consvals)
else:
mean_phastcons=0
data = mean_phastcons
except:
#if bedtool
for i, bedline in enumerate(bedtool):
data = np.ndarray(len(bedtool))
vals = bw.get(bedline.chrom, bedline.start, bedline.stop)
consvals = list(v[-1] for v in vals)
if len(consvals) > 0:
mean_phastcons = np.mean(consvals)
else:
mean_phastcons=0
data[i] = mean_phastcons
#returns mean phastcons score for each line
#returns inconistant data types, need to convert so it just returns an array
return data
示例12: main
def main():
input_filename, output_filename, loc_filename, loc_key, chrom_col, start_col = sys.argv[1:]
# open input, output, and bigwig files
location_file = LocationFile( loc_filename )
bigwig_filename = location_file.get_values( loc_key )
bwfh = open_or_die( bigwig_filename, message='Error opening BigWig file %s' % bigwig_filename )
bw = BigWigFile( file=bwfh )
ifh = open_or_die( input_filename, message='Error opening input file %s' % input_filename )
ofh = open_or_die( output_filename, mode='w', message='Error opening output file %s' % output_filename )
# make column numbers 0-based
chrom_col = int( chrom_col ) - 1
start_col = int( start_col ) - 1
min_cols = max( chrom_col, start_col )
# add score column to imput file
line_number = 0
for line in ifh:
line_number += 1
line = line.rstrip( '\r\n' )
elems = line.split( '\t' )
if len( elems ) > min_cols:
chrom = elems[chrom_col].strip()
# base-0 position in chrom
start = int( elems[start_col] )
score_list = bw.get( chrom, start, start + 1 )
score_list_len = len( score_list )
if score_list_len == 1:
beg, end, score = score_list[0]
score_val = '%1.3f' % score
elif score_list_len == 0:
score_val = 'NA'
else:
die( '%s line %d: chrom=%s, start=%d, score_list_len = %d' % ( input_filename, line_number, chrom, start, score_list_len ) )
print('\t'.join( [line, score_val] ), file=ofh)
else:
print(line, file=ofh)
bwfh.close()
ifh.close()
ofh.close()
示例13: getNumberOfFragmentsPerRegionFromBigWig
def getNumberOfFragmentsPerRegionFromBigWig(bw, chromSizes):
"""
Get the number of all mapped fragments per region in all chromosomes
from a bigWig. Utilizing bx-python.
Test dataset with two samples covering 200 bp.
>>> test = Tester()
Get number of fragments in sample.
>>> getNumberOfFragmentsPerRegionFromBigWig(test.bwFile1, [('3R', 200)])
3.0
>>> getNumberOfFragmentsPerRegionFromBigWig(test.bwFile2, [('3R', 200)])
4.0
"""
bwh = BigWigFile(open(bw, "rb"))
mapped = 0
for cname, csize in chromSizes:
regions = bwh.get(cname, 0, csize) # region = bwh.get(chrom_name, start, end)
for region in regions:
mapped += region[2]
return mapped
示例14: get_GA_from_bw
def get_GA_from_bw(self, plus, minus, GTF, filterfxn):
##bx-python 'get' method is 0 based, fully closed
ga = HTSeq.GenomicArray( "auto", typecode='d' , stranded = True)
with open(plus) as f:
bw_file = BigWigFile(file=f)
for GF in GTF:
if filterfxn( GF ) == False: continue
window = GF.iv
chrom, start, stop = window.chrom, window.start, window.end
vals = bw_file.get(chrom, start, stop)
for start, stop, value in vals:
ga[ HTSeq.GenomicPosition(chrom, start, '+') ] = value
with open(minus) as f:
bw_file = BigWigFile(file=f)
for GF in GTF:
if filterfxn( GF ) == False: continue
window = GF.iv
chrom, start, stop = window.chrom, window.start, window.end
vals = bw_file.get(chrom, start, stop)
for start, stop, value in vals:
ga[ HTSeq.GenomicPosition(chrom, start, '-') ] = value
return ga
示例15: findInsertions
def findInsertions(bwFile, bedData, x):
if options.tn5 is not None:
bwFile = options.b + options.tn5 + "." + bedData[x][0] + ".Scores.bw"
sL = int(bedData[x][1]) - options.l
sR = int(bedData[x][2]) + options.r
# get signal data
f = open(bwFile, "rb")
bw = BigWigFile(f)
try:
signal = bw.get_as_array(bedData[x][0], sL, sR)
except OverflowError:
signal = np.array([np.nan] * (sR - sL))
f.close()
out = signal
try:
if bedData[x][3] == "-":
out = out[::-1]
except IndexError:
pass
return out