本文整理汇总了Python中CGAT.Intervals类的典型用法代码示例。如果您正苦于以下问题:Python Intervals类的具体用法?Python Intervals怎么用?Python Intervals使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Intervals类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testNoOverlap
def testNoOverlap(self):
"""test empty input."""
self.assertEqual(
Intervals.truncate([(0, 5), (10, 15)], [(5, 10)]), [(0, 5), (10, 15)])
self.assertEqual(
Intervals.truncate([(5, 10)], [(0, 5), (10, 15)]), [(5, 10)])
self.assertEqual(
Intervals.truncate([(0, 5), (5, 10)], [(10, 15)]), [(0, 5), (5, 10)])
示例2: processChunk
def processChunk( contig, regions ):
if contig == None: return
start = 0
end = contigs[contig]
regions = Intervals.combineIntervals( regions )
for xstart, xend in Intervals.complementIntervals( regions, start, end ):
locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, ".") )
示例3: testMultiple
def testMultiple(self):
"""test empty input."""
self.assertEqual(
Intervals.intersect([(0, 5), (10, 15)], [(0, 5)]), [(0, 5)])
self.assertEqual(
Intervals.intersect([(0, 5), (10, 15)], [(0, 10)]), [(0, 5)])
self.assertEqual(
Intervals.intersect([(0, 5), (10, 15)], [(0, 15)]), [(0, 5), (10, 15)])
self.assertEqual(
Intervals.intersect([(0, 5), (5, 10)], [(0, 10)]), [(0, 5), (5, 10)])
示例4: testSingle
def testSingle(self):
"""test empty input."""
self.assertEqual(Intervals.truncate([(0, 5)], [(0, 5)]), [])
self.assertEqual(Intervals.truncate([(0, 5)], [(0, 3)]), [(3, 5)])
self.assertEqual(Intervals.truncate([(0, 3)], [(0, 5)]), [])
self.assertEqual(Intervals.truncate([(0, 5)], [(3, 5)]), [(0, 3)])
self.assertEqual(Intervals.truncate([(3, 5)], [(0, 5)]), [])
self.assertEqual(Intervals.truncate([(5, 10)], [(5, 10)]), [])
self.assertEqual(Intervals.truncate([(5, 10)], [(5, 20)]), [])
self.assertEqual(Intervals.truncate([(5, 10)], [(0, 10)]), [])
self.assertEqual(Intervals.truncate([(5, 10)], [(0, 10)]), [])
self.assertEqual(Intervals.truncate([(5, 10)], [(0, 20)]), [])
示例5: count
def count( self, bed ):
'''update internal counts.'''
results = []
for track in self.tracks:
try:
overlaps = [ (x[0],x[1]) for x in self.index[track][bed.contig].find( bed.start, bed.end ) ]
except KeyError:
overlaps = []
results.append( (len(overlaps),
Intervals.calculateOverlap( [(bed.start, bed.end),],
Intervals.combine( overlaps ) ) ) )
self.data = results
示例6: toIntronIntervals
def toIntronIntervals(chunk):
"""convert a set of gtf elements within a transcript to intron coordinates.
Will use first transcript_id found.
Note that coordinates will still be forward strand coordinates
"""
if len(chunk) == 0:
return []
contig, strand, transcript_id = (chunk[0].contig, chunk[0].strand, chunk[0].transcript_id)
for gff in chunk:
assert gff.strand == strand, "features on different strands."
assert gff.contig == contig, "features on different contigs."
intervals = Intervals.combine([(x.start, x.end) for x in chunk if x.feature == "exon"])
return Intervals.complement(intervals)
示例7: combineMergedIntervals
def combineMergedIntervals(bedfiles):
'''combine intervals in a collection of bed files.
Overlapping intervals between tracks are merged.
Algorithm:
1. collect all intervals in all tracks into a single track
2. merge overlapping intervals
3. report all intervals that overlap with an interval in each track.
'''
# get all intervals
data_per_contig = collections.defaultdict(list)
for bedfile in bedfiles:
for contig in bedfile.contigs:
i = []
for bed in bedfile.fetch(contig, parser=pysam.asBed()):
i.append((bed.start, bed.end))
data_per_contig[contig].extend(i)
# merge intervals
for contig in data_per_contig.keys():
data_per_contig[contig] = Intervals.combine(data_per_contig[contig])
# filter intervals - take only those present in all bedfiles
for contig, data in data_per_contig.iteritems():
for start, end in data:
if isContainedInAll(contig, start, end, bedfiles):
yield contig, start, end
示例8: UTR3
def UTR3(transcript):
exons = GTF.asRanges(transcript, "exon")
cds = GTF.asRanges(transcript, "CDS")
if len(cds) == 0:
return list()
utrs = Intervals.truncate(exons, cds)
if transcript[0].strand == "+":
utr3 = [exon for exon in utrs
if exon[0] >= cds[-1][1]]
else:
utr3 = [exon for exon in utrs
if exon[-1] <= cds[0][0]]
for e in transcript:
if e.feature == "exon":
template_exon = e
break
returned_exons = []
for e in utr3:
gtf = GTF.Entry().fromGTF(template_exon)
gtf.start = e[0]
gtf.end = e[1]
returned_exons.append(gtf)
return returned_exons
示例9: toSequence
def toSequence(chunk, fasta):
"""convert a list of gff attributes to a single sequence.
This function ensures correct in-order concatenation on
positive/negative strand. Overlapping regions are merged.
"""
if len(chunk) == 0:
return ""
contig, strand = chunk[0].contig, chunk[0].strand
for gff in chunk:
assert gff.strand == strand, "features on different strands."
assert gff.contig == contig, "features on different contigs."
intervals = Intervals.combine([(x.start, x.end) for x in chunk])
lcontig = fasta.getLength(contig)
positive = Genomics.IsPositiveStrand(strand)
if not positive:
intervals = [(lcontig - end, lcontig - start) for start, end in intervals]
intervals.reverse()
s = [fasta.getSequence(contig, strand, start, end) for start, end in intervals]
return "".join(s)
示例10: annotateRegulons
def annotateRegulons(iterator, fasta, tss, options):
"""annotate regulons within iterator.
Entries specied with ``--restrict-source`` are annotated.
"""
gene_iterator = GTF.gene_iterator(iterator)
ngenes, ntranscripts, nregulons = 0, 0, 0
upstream, downstream = options.upstream, options.downstream
for gene in gene_iterator:
ngenes += 1
is_negative_strand = Genomics.IsNegativeStrand(gene[0][0].strand)
lcontig = fasta.getLength(gene[0][0].contig)
regulons = []
transcript_ids = []
for transcript in gene:
ntranscripts += 1
mi, ma = min([x.start for x in transcript]), max([x.end for x in transcript])
if tss:
# add range to both sides of tss
if is_negative_strand:
interval = ma - options.downstream, ma + options.upstream
else:
interval = mi - options.upstream, mi + options.downstream
else:
# add range to both sides of tts
if is_negative_strand:
interval = mi - options.downstream, mi + options.upstream
else:
interval = ma - options.upstream, ma + options.downstream
interval = (min(lcontig, max(0, interval[0])), min(lcontig, max(0, interval[1])))
regulons.append(interval)
transcript_ids.append(transcript[0].transcript_id)
if options.merge_promotors:
# merge the regulons (and rename - as sort order might have
# changed)
regulons = Intervals.combine(regulons)
transcript_ids = ["%i" % (x + 1) for x in range(len(regulons))]
gtf = GTF.Entry()
gtf.fromGTF(gene[0][0], gene[0][0].gene_id, gene[0][0].gene_id)
gtf.source = "regulon"
x = 0
for start, end in regulons:
gtf.start, gtf.end = start, end
gtf.transcript_id = transcript_ids[x]
options.stdout.write("%s\n" % str(gtf))
nregulons += 1
x += 1
E.info("ngenes=%i, ntranscripts=%i, nregulons=%i" % (ngenes, ntranscripts, nregulons))
示例11: cropGFF
def cropGFF(gffs, options):
"""crop intervals in gff file."""
# read regions to crop with and convert intervals to intersectors
E.info("reading gff for cropping: started.")
other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r"))
cropper = GTF.readAsIntervals(other_gffs)
ntotal = 0
for contig in cropper.keys():
intersector = bx.intervals.intersection.Intersecter()
for start, end in cropper[contig]:
intersector.add_interval(bx.intervals.Interval(start, end))
ntotal += 1
cropper[contig] = intersector
E.info("reading gff for cropping: finished.")
E.info("reading gff for cropping: %i contigs with %i intervals." %
(len(cropper), ntotal))
ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0
# do the actual cropping
for gff in gffs:
ninput += 1
if gff.contig in cropper:
start, end = gff.start, gff.end
overlaps = cropper[gff.contig].find(start, end)
if overlaps:
l = end - start
a = numpy.ones(l)
for i in overlaps:
s = max(0, i.start - start)
e = min(l, i.end - start)
a[s:e] = 0
segments = Intervals.fromArray(a)
if len(segments) == 0:
ndeleted += 1
else:
ncropped += 1
for s, e in segments:
gff.start, gff.end = s + start, e + start
noutput += 1
options.stdout.write("%s\n" % gff)
continue
noutput += 1
options.stdout.write("%s\n" % gff)
if options.loglevel >= 1:
options.stdlog.write("# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" % (
ninput, noutput, ncropped, ndeleted))
示例12: iterator_min_feature_length
def iterator_min_feature_length(gff_iterator, min_length, feature="exon"):
"""select only those genes with a minimum length of a given feature."""
for gffs in gff_iterator:
intervals = [(x.start, x.end) for x in gffs if x.feature == feature]
intervals = Intervals.combine(intervals)
t = sum((x[1] - x[0] for x in intervals))
if t >= min_length:
yield gffs
示例13: FilterEliminateOverlappingTranscripts
def FilterEliminateOverlappingTranscripts(
exons, filter_exons,
eliminated_predictions, contig_sizes, options):
"""eliminate predictions that overlap or span a positive set of transcripts.
"""
eliminated = []
# convert list of filter exons into a list of ranges.
filter_ranges = getRangesFromExons(
filter_exons,
both_strands=options.filter_remove_spanning_both_strands,
contig_sizes=contig_sizes)
for k, r in filter_ranges.items():
filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r))
exon_ranges = getRangesFromExons(exons,
both_strands=False)
# and now go through exons and delete transcripts whose
# exons overlap one of the forbidden ranges
for k, ee in exon_ranges.items():
if k not in filter_ranges:
continue
ff = filter_ranges[k]
ee.sort()
# set exon index e and filter index f
# (both are indices in sorted lists)
e, f = 0, 0
while e < len(ee):
efrom, eto, id = ee[e]
# increment filter, such that its extent
# is larger than current range ee[e] to test.
while f < len(ff) and ff[f][1] < efrom:
f += 1
if f == len(ff):
break
if eto < ff[f][0]:
# no overlap
pass
else:
options.stdout.write(
"%s\t%s\n" % (id, "eliminated: filtered by %s:%i:%i" % (k, ff[f][0], ff[f][1])))
eliminated_predictions[id] = 0
eliminated.append((id, "f"))
e += 1
return eliminated
示例14: get_windows
def get_windows(pvalues, window_size, threshold):
# intervals are close closed
windows = [(pos-window_size, pos+window_size+1)
for pos in pvalues.index.values]
merged_windows = Intervals.combine(windows)
windows_min_p = [pvalues.ix[float(start):float(end-1)].min()
for start, end in merged_windows]
return zip(merged_windows, windows_min_p)
示例15: toIntronIntervals
def toIntronIntervals(chunk):
'''convert a set of gtf elements within a transcript to intron coordinates.
Will raise an error if more than one transcript is submitted.
Note that coordinates will still be forward strand coordinates
'''
if len(chunk) == 0:
return []
contig, strand, transcript_id = chunk[
0].contig, chunk[0].strand, chunk[0].transcript_id
for gff in chunk:
assert gff.strand == strand, "features on different strands."
assert gff.contig == contig, "features on different contigs."
assert gff.transcript_id == transcript_id, "more than one transcript submitted"
intervals = Intervals.combine([(x.start, x.end)
for x in chunk if x.feature == "exon"])
return Intervals.complement(intervals)