当前位置: 首页>>代码示例>>Python>>正文


Python CGAT.Intervals类代码示例

本文整理汇总了Python中CGAT.Intervals的典型用法代码示例。如果您正苦于以下问题:Python Intervals类的具体用法?Python Intervals怎么用?Python Intervals使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Intervals类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testNoOverlap

 def testNoOverlap(self):
     """test empty input."""
     self.assertEqual(
         Intervals.truncate([(0, 5), (10, 15)], [(5, 10)]), [(0, 5), (10, 15)])
     self.assertEqual(
         Intervals.truncate([(5, 10)], [(0, 5), (10, 15)]), [(5, 10)])
     self.assertEqual(
         Intervals.truncate([(0, 5), (5, 10)], [(10, 15)]), [(0, 5), (5, 10)])
开发者ID:Charlie-George,项目名称:cgat,代码行数:8,代码来源:Intervals_test.py

示例2: processChunk

    def processChunk( contig, regions ):
        if contig == None: return
        
        start = 0
        end = contigs[contig]

        regions = Intervals.combineIntervals( regions )
        for xstart, xend in Intervals.complementIntervals( regions, start, end ):
            locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, ".") )
开发者ID:siping,项目名称:cgat,代码行数:9,代码来源:regions2gff.py

示例3: testMultiple

 def testMultiple(self):
     """test empty input."""
     self.assertEqual(
         Intervals.intersect([(0, 5), (10, 15)], [(0, 5)]), [(0, 5)])
     self.assertEqual(
         Intervals.intersect([(0, 5), (10, 15)], [(0, 10)]), [(0, 5)])
     self.assertEqual(
         Intervals.intersect([(0, 5), (10, 15)], [(0, 15)]), [(0, 5), (10, 15)])
     self.assertEqual(
         Intervals.intersect([(0, 5), (5, 10)], [(0, 10)]), [(0, 5), (5, 10)])
开发者ID:Charlie-George,项目名称:cgat,代码行数:10,代码来源:Intervals_test.py

示例4: testSingle

 def testSingle(self):
     """test empty input."""
     self.assertEqual(Intervals.truncate([(0, 5)], [(0, 5)]), [])
     self.assertEqual(Intervals.truncate([(0, 5)], [(0, 3)]), [(3, 5)])
     self.assertEqual(Intervals.truncate([(0, 3)], [(0, 5)]), [])
     self.assertEqual(Intervals.truncate([(0, 5)], [(3, 5)]), [(0, 3)])
     self.assertEqual(Intervals.truncate([(3, 5)], [(0, 5)]), [])
     self.assertEqual(Intervals.truncate([(5, 10)], [(5, 10)]), [])
     self.assertEqual(Intervals.truncate([(5, 10)], [(5, 20)]), [])
     self.assertEqual(Intervals.truncate([(5, 10)], [(0, 10)]), [])
     self.assertEqual(Intervals.truncate([(5, 10)], [(0, 10)]), [])
     self.assertEqual(Intervals.truncate([(5, 10)], [(0, 20)]), [])
开发者ID:Charlie-George,项目名称:cgat,代码行数:12,代码来源:Intervals_test.py

示例5: count

    def count( self, bed ):
        '''update internal counts.'''

        results = []
        for track in self.tracks:
            try:
                overlaps = [ (x[0],x[1]) for x in self.index[track][bed.contig].find( bed.start, bed.end ) ]
            except KeyError:
                overlaps = []

            results.append( (len(overlaps), 
                             Intervals.calculateOverlap( [(bed.start, bed.end),],
                                                         Intervals.combine( overlaps ) ) ) )

        self.data = results
开发者ID:siping,项目名称:cgat,代码行数:15,代码来源:bed2table.py

示例6: toIntronIntervals

def toIntronIntervals(chunk):
    """convert a set of gtf elements within a transcript to intron coordinates.

    Will use first transcript_id found.

    Note that coordinates will still be forward strand coordinates
    """
    if len(chunk) == 0:
        return []
    contig, strand, transcript_id = (chunk[0].contig, chunk[0].strand, chunk[0].transcript_id)
    for gff in chunk:
        assert gff.strand == strand, "features on different strands."
        assert gff.contig == contig, "features on different contigs."

    intervals = Intervals.combine([(x.start, x.end) for x in chunk if x.feature == "exon"])
    return Intervals.complement(intervals)
开发者ID:CGATOxford,项目名称:cgat,代码行数:16,代码来源:GTF.py

示例7: combineMergedIntervals

def combineMergedIntervals(bedfiles):
    '''combine intervals in a collection of bed files.

    Overlapping intervals between tracks are merged.

    Algorithm:

    1. collect all intervals in all tracks into a single track
    2. merge overlapping intervals 
    3. report all intervals that overlap with an interval in each track.

    '''

    # get all intervals
    data_per_contig = collections.defaultdict(list)
    for bedfile in bedfiles:
        for contig in bedfile.contigs:
            i = []
            for bed in bedfile.fetch(contig, parser=pysam.asBed()):
                i.append((bed.start, bed.end))
            data_per_contig[contig].extend(i)

    # merge intervals
    for contig in data_per_contig.keys():
        data_per_contig[contig] = Intervals.combine(data_per_contig[contig])

    # filter intervals - take only those present in all bedfiles
    for contig, data in data_per_contig.iteritems():
        for start, end in data:
            if isContainedInAll(contig, start, end, bedfiles):
                yield contig, start, end
开发者ID:Charlie-George,项目名称:cgat,代码行数:31,代码来源:beds2beds.py

示例8: UTR3

def UTR3(transcript):
    
    exons = GTF.asRanges(transcript, "exon")
    cds = GTF.asRanges(transcript, "CDS")

    if len(cds) == 0:
        return list()
    
    utrs = Intervals.truncate(exons, cds)

    if transcript[0].strand == "+":
        utr3 = [exon for exon in utrs
                if exon[0] >= cds[-1][1]]
    else:
        utr3 = [exon for exon in utrs
                if exon[-1] <= cds[0][0]]

    for e in transcript:
        if e.feature == "exon":
            template_exon = e
            break
            
    returned_exons = []     
    for e in utr3:
        gtf = GTF.Entry().fromGTF(template_exon)
        gtf.start = e[0]
        gtf.end = e[1]
        returned_exons.append(gtf)
        
    return returned_exons
开发者ID:sudlab,项目名称:iCLIPlib,代码行数:30,代码来源:transcript_regions.py

示例9: toSequence

def toSequence(chunk, fasta):
    """convert a list of gff attributes to a single sequence.

    This function ensures correct in-order concatenation on
    positive/negative strand. Overlapping regions are merged.
    """
    if len(chunk) == 0:
        return ""

    contig, strand = chunk[0].contig, chunk[0].strand

    for gff in chunk:
        assert gff.strand == strand, "features on different strands."
        assert gff.contig == contig, "features on different contigs."

    intervals = Intervals.combine([(x.start, x.end) for x in chunk])
    lcontig = fasta.getLength(contig)
    positive = Genomics.IsPositiveStrand(strand)

    if not positive:
        intervals = [(lcontig - end, lcontig - start) for start, end in intervals]
        intervals.reverse()

    s = [fasta.getSequence(contig, strand, start, end) for start, end in intervals]

    return "".join(s)
开发者ID:prasoonnema,项目名称:cgat,代码行数:26,代码来源:GTF.py

示例10: annotateRegulons

def annotateRegulons(iterator, fasta, tss, options):
    """annotate regulons within iterator.

    Entries specied with ``--restrict-source`` are annotated.
    """

    gene_iterator = GTF.gene_iterator(iterator)

    ngenes, ntranscripts, nregulons = 0, 0, 0

    upstream, downstream = options.upstream, options.downstream

    for gene in gene_iterator:
        ngenes += 1
        is_negative_strand = Genomics.IsNegativeStrand(gene[0][0].strand)
        lcontig = fasta.getLength(gene[0][0].contig)
        regulons = []
        transcript_ids = []
        for transcript in gene:

            ntranscripts += 1
            mi, ma = min([x.start for x in transcript]), max([x.end for x in transcript])
            if tss:
                # add range to both sides of tss
                if is_negative_strand:
                    interval = ma - options.downstream, ma + options.upstream
                else:
                    interval = mi - options.upstream, mi + options.downstream
            else:
                # add range to both sides of tts
                if is_negative_strand:
                    interval = mi - options.downstream, mi + options.upstream
                else:
                    interval = ma - options.upstream, ma + options.downstream

            interval = (min(lcontig, max(0, interval[0])), min(lcontig, max(0, interval[1])))

            regulons.append(interval)
            transcript_ids.append(transcript[0].transcript_id)

        if options.merge_promotors:
            # merge the regulons (and rename - as sort order might have
            # changed)
            regulons = Intervals.combine(regulons)
            transcript_ids = ["%i" % (x + 1) for x in range(len(regulons))]

        gtf = GTF.Entry()
        gtf.fromGTF(gene[0][0], gene[0][0].gene_id, gene[0][0].gene_id)
        gtf.source = "regulon"

        x = 0
        for start, end in regulons:
            gtf.start, gtf.end = start, end
            gtf.transcript_id = transcript_ids[x]
            options.stdout.write("%s\n" % str(gtf))
            nregulons += 1
            x += 1

    E.info("ngenes=%i, ntranscripts=%i, nregulons=%i" % (ngenes, ntranscripts, nregulons))
开发者ID:CGATOxford,项目名称:cgat,代码行数:59,代码来源:gtf2gff.py

示例11: cropGFF

def cropGFF(gffs, options):
    """crop intervals in gff file."""

    # read regions to crop with and convert intervals to intersectors
    E.info("reading gff for cropping: started.")

    other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r"))
    cropper = GTF.readAsIntervals(other_gffs)
    ntotal = 0
    for contig in cropper.keys():
        intersector = bx.intervals.intersection.Intersecter()
        for start, end in cropper[contig]:
            intersector.add_interval(bx.intervals.Interval(start, end))
            ntotal += 1
        cropper[contig] = intersector

    E.info("reading gff for cropping: finished.")
    E.info("reading gff for cropping: %i contigs with %i intervals." %
           (len(cropper), ntotal))

    ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0

    # do the actual cropping
    for gff in gffs:

        ninput += 1

        if gff.contig in cropper:
            start, end = gff.start, gff.end
            overlaps = cropper[gff.contig].find(start, end)

            if overlaps:
                l = end - start
                a = numpy.ones(l)
                for i in overlaps:
                    s = max(0, i.start - start)
                    e = min(l, i.end - start)
                    a[s:e] = 0

                segments = Intervals.fromArray(a)

                if len(segments) == 0:
                    ndeleted += 1
                else:
                    ncropped += 1

                for s, e in segments:
                    gff.start, gff.end = s + start, e + start
                    noutput += 1
                    options.stdout.write("%s\n" % gff)

                continue

        noutput += 1
        options.stdout.write("%s\n" % gff)

    if options.loglevel >= 1:
        options.stdlog.write("# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" % (
            ninput, noutput, ncropped, ndeleted))
开发者ID:Charlie-George,项目名称:cgat,代码行数:59,代码来源:gff2gff.py

示例12: iterator_min_feature_length

def iterator_min_feature_length(gff_iterator, min_length, feature="exon"):
    """select only those genes with a minimum length of a given feature."""
    for gffs in gff_iterator:
        intervals = [(x.start, x.end) for x in gffs if x.feature == feature]
        intervals = Intervals.combine(intervals)
        t = sum((x[1] - x[0] for x in intervals))
        if t >= min_length:
            yield gffs
开发者ID:prasoonnema,项目名称:cgat,代码行数:8,代码来源:GTF.py

示例13: FilterEliminateOverlappingTranscripts

def FilterEliminateOverlappingTranscripts(
        exons, filter_exons,
        eliminated_predictions, contig_sizes, options):
    """eliminate predictions that overlap or span a positive set of transcripts.
    """

    eliminated = []

    # convert list of filter exons into a list of ranges.
    filter_ranges = getRangesFromExons(
        filter_exons,
        both_strands=options.filter_remove_spanning_both_strands,
        contig_sizes=contig_sizes)

    for k, r in filter_ranges.items():
        filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r))

    exon_ranges = getRangesFromExons(exons,
                                     both_strands=False)

    # and now go through exons and delete transcripts whose
    # exons overlap one of the forbidden ranges
    for k, ee in exon_ranges.items():

        if k not in filter_ranges:
            continue

        ff = filter_ranges[k]
        ee.sort()

        # set exon index e and filter index f
        # (both are indices in sorted lists)
        e, f = 0, 0

        while e < len(ee):

            efrom, eto, id = ee[e]

            # increment filter, such that its extent
            # is larger than current range ee[e] to test.
            while f < len(ff) and ff[f][1] < efrom:
                f += 1
            if f == len(ff):
                break

            if eto < ff[f][0]:
                # no overlap
                pass
            else:
                options.stdout.write(
                    "%s\t%s\n" % (id, "eliminated: filtered by %s:%i:%i" % (k, ff[f][0], ff[f][1])))
                eliminated_predictions[id] = 0
                eliminated.append((id, "f"))

            e += 1

    return eliminated
开发者ID:CGATOxford,项目名称:Optic,代码行数:57,代码来源:select_transcripts.py

示例14: get_windows

def get_windows(pvalues, window_size, threshold):

    # intervals are close closed
    windows = [(pos-window_size, pos+window_size+1)
               for pos in pvalues.index.values]

    merged_windows = Intervals.combine(windows)
    windows_min_p = [pvalues.ix[float(start):float(end-1)].min()
                    for start, end in merged_windows]
    return zip(merged_windows, windows_min_p)
开发者ID:sudlab,项目名称:iCLIPlib,代码行数:10,代码来源:find_significant_bases.py

示例15: toIntronIntervals

def toIntronIntervals(chunk):
    '''convert a set of gtf elements within a transcript to intron coordinates.

    Will raise an error if more than one transcript is submitted.

    Note that coordinates will still be forward strand coordinates
    '''
    if len(chunk) == 0:
        return []
    contig, strand, transcript_id = chunk[
        0].contig, chunk[0].strand, chunk[0].transcript_id
    for gff in chunk:
        assert gff.strand == strand, "features on different strands."
        assert gff.contig == contig, "features on different contigs."
        assert gff.transcript_id == transcript_id, "more than one transcript submitted"

    intervals = Intervals.combine([(x.start, x.end)
                                   for x in chunk if x.feature == "exon"])
    return Intervals.complement(intervals)
开发者ID:Charlie-George,项目名称:cgat,代码行数:19,代码来源:GTF.py


注:本文中的CGAT.Intervals类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。