Python Intervals.complement方法代码示例

本文整理汇总了Python中CGAT.Intervals.complement方法的典型用法代码示例。如果您正苦于以下问题：Python Intervals.complement方法的具体用法？Python Intervals.complement怎么用？Python Intervals.complement使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Intervals的用法示例。

在下文中一共展示了Intervals.complement方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: toIntronIntervals

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
def toIntronIntervals(chunk):
    """convert a set of gtf elements within a transcript to intron coordinates.

    Will use first transcript_id found.

    Note that coordinates will still be forward strand coordinates
    """
    if len(chunk) == 0:
        return []
    contig, strand, transcript_id = (chunk[0].contig, chunk[0].strand, chunk[0].transcript_id)
    for gff in chunk:
        assert gff.strand == strand, "features on different strands."
        assert gff.contig == contig, "features on different contigs."

    intervals = Intervals.combine([(x.start, x.end) for x in chunk if x.feature == "exon"])
    return Intervals.complement(intervals)

开发者ID:CGATOxford，项目名称:cgat，代码行数:18，代码来源:GTF.py

示例2: toIntronIntervals

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
def toIntronIntervals(chunk):
    '''convert a set of gtf elements within a transcript to intron coordinates.

    Will raise an error if more than one transcript is submitted.

    Note that coordinates will still be forward strand coordinates
    '''
    if len(chunk) == 0:
        return []
    contig, strand, transcript_id = chunk[
        0].contig, chunk[0].strand, chunk[0].transcript_id
    for gff in chunk:
        assert gff.strand == strand, "features on different strands."
        assert gff.contig == contig, "features on different contigs."
        assert gff.transcript_id == transcript_id, "more than one transcript submitted"

    intervals = Intervals.combine([(x.start, x.end)
                                   for x in chunk if x.feature == "exon"])
    return Intervals.complement(intervals)

开发者ID:Charlie-George，项目名称:cgat，代码行数:21，代码来源:GTF.py

示例3: processChunk

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]

#.........这里部分代码省略.........

                if options.threshold_max_sbjct_gapchars and options.threshold_max_sbjct_gapchars > match.mSbjctNGapsBases:
                    nremoved_gaps += 1
                    x_nremoved_gaps += 1
                    continue
                
                new_matches.append( match )
            matches = new_matches

        if len(matches) == 0:
            if outfile_empty:
                outfile_empty.write( "%s\tall matches removed after applying thresholds: before=%i, npid=%i, nqcoverage=%i, ngaps=%i, nmatches=%i\n" %\
                                     (query_id, nmatches, x_nremoved_pid, x_nquery_coverage, x_nremoved_gaps, x_nremoved_nmatches ) )
            nskipped += 1
            return

        ## Remove queries matching to a forbidden region. This section
        ## will remove the full query if any of its matches matches in a
        ## forbidden region.
        keep = True
        for match in matches:
            if intersectors and match.mSbjctId in intersectors:
                found = intersectors[match.mSbjctId].find( match.mSbjctFrom, match.mSbjctTo )
                if found and not options.keep_forbidden or (found and not options.keep_forbidden):
                    nremoved_regions += 1
                    keep = False
                    continue

        if not keep:
            nqueries_removed_region += 1
            if outfile_empty:
                outfile_empty.write( "%s\toverlap with forbidden region\n" % query_id )
            return 

        ## check for full length matches
        for match in matches:
            if match.mQueryCoverage >= 99.9:
                full_matches.append(match)
            if match.mQueryCoverage > options.threshold_good_query_coverage:
                good_matches.append(match)
            else:
                partial_matches.append(match)
            
        if full_matches:
            nfull_matches += 1
        elif good_matches:
            ngood_matches += 1
        elif partial_matches:
            npartial_matches += 1

        ## compute coverage of sequence with matches
        intervals = []
        for match in full_matches + good_matches + partial_matches:
            intervals.append( (match.mQueryFrom, match.mQueryTo) )
        
        rest = Intervals.complement( intervals, 0, match.mQueryLength )
        
        query_coverage = 100.0 * (match.mQueryLength - sum( map( lambda x: x[1] - x[0], rest) ) ) / match.mQueryLength

        if query_coverage >= 99.9:
            fully_matched.append( query_id )
        elif  query_coverage > options.threshold_good_query_coverage:
            well_matched.append( query_id )
        else:
            partially_matched.append( query_id )

        aggregate_coverages.append( query_coverage )

        ## select matches to output
        matches, msg = selectMatches( query_id, matches, options, queries_fasta )

        if len(matches) > 0:
            for match in matches:
                if options.query_forward_coordinates:
                    match.convertCoordinates()

                if options.output_format == "map":
                    options.stdout.write( "%s\n" %\
                                              "\t".join( map(str, (
                                match.mQueryId, match.mSbjctId, 
                                match.strand,
                                "%5.2f" % match.mQueryCoverage,
                                "%5.2f" % match.mSbjctCoverage,
                                "%5.2f" % match.mPid,
                                match.mQueryLength,
                                match.mSbjctLength,
                                match.mQueryFrom, match.mQueryTo,
                                match.mSbjctFrom, match.mSbjctTo,
                                ",".join( map(str,match.mBlockSizes) ),
                                ",".join( map(str,match.mQueryBlockStarts)),
                                ",".join( map(str,match.mSbjctBlockStarts)), 
                                ))))
                elif options.output_format == "psl":
                    options.stdout.write( str(match) + "\n" )

            noutput += 1
        else:
            if outfile_empty:
                outfile_empty.write( "%s\tno matches selected: %s\n" % (query_id, msg) )
            nempty += 1

开发者ID:siping，项目名称:cgat，代码行数:104，代码来源:psl2map.py

示例4: readWorkspace

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
def readWorkspace(infile,
                  workspace_builder="raw",
                  label="none",
                  map_id2annotation={}):
    """read workspace from infile.

    A workspace is a collection of intervals with two labels associated
    to each interval, one for the 5' and one for the 3' end.

    Available workspace builders are:

    gff
       take a gff file. 

    gtf-intergenic
       build workspace from intergenic segments in a gtf file. 

    gtf-intronic
       build workspace from intronic segments in a gtf file

    gtf-genic
       the workspace is built from genes (first to last exon).

    Available labels are:

    none
       no labels are given to the ends of workspaces

    direction
       labels are given based on the 5'/3' end of the
       bounding exon

    annotation
       labels are given based on a gene2annotation map.

    returns a list of segments for each contig in a dictionary
    """

    if label == "none":
        label_f = lambda x, y: (("X",), ("X",))
        info_f = lambda x: None
    elif label == "direction":
        label_f = lambda x, y: ((("5", "3")[x],), (("3", "5")[y],))
        info_f = lambda x: x.strand == "+"
    elif label == "annotation":
        label_f = lambda x, y: (map_id2annotation[x], map_id2annotation[y])
        info_f = lambda x: x.gene_id

    if workspace_builder == "gff":
        workspace = GTF.readAsIntervals(GFF.iterator(infile))

    elif workspace_builder == "gtf-intergenic":

        workspace = collections.defaultdict(list)
        # get all genes
        for e in GTF.merged_gene_iterator(GTF.iterator(infile)):
            workspace[e.contig].append((e.start, e.end, info_f(e)))

        # convert to intergenic regions.
        # overlapping genes are merged and the labels
        # of the right-most entry is retained
        for contig in workspace.keys():
            segs = workspace[contig]
            segs.sort()
            last = segs[0]
            new_segs = []
            for this in segs[1:]:
                if last[1] >= this[0]:
                    if this[1] > last[1]:
                        last = (last[0], this[1], this[2])
                    continue
                assert last[1] < this[0], "this=%s, last=%s" % (this, last)

                new_segs.append((last[1], this[0],
                                 label_f(last[2], this[2])))
                last = this
            workspace[contig] = new_segs

    elif workspace_builder == "gtf-intronic":

        workspace = collections.defaultdict(list)

        # the current procedure will count nested genes
        # twice
        for ee in GTF.flat_gene_iterator(GTF.iterator(infile)):

            exons = Intervals.combine([(e.start, e.end) for e in ee])
            introns = Intervals.complement(exons)

            r = ee[0]
            for start, end in introns:
                workspace[r.contig].append((start,
                                            end,
                                            label_f(info_f(r), info_f(r))
                                            ))
    elif workspace_builder == "gtf-genic":

        workspace = collections.defaultdict(list)

        # the current procedure will count nested genes
#.........这里部分代码省略.........

开发者ID:lesheng，项目名称:cgat，代码行数:103，代码来源:annotator_distance.py

示例5: init

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
 def __init__(self, *args, **kwargs):
     Sampler.__init__(self, *args, **kwargs)
     self.mGapLengths = [x[1] - x[0]
                         for x in Intervals.complement(self.mObserved, self.mWorkStart, self.mWorkEnd)]

开发者ID:lesheng，项目名称:cgat，代码行数:6，代码来源:annotator_distance.py

示例6: main

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--feature", dest="feature", type="choice",
                      choices=["gene", "transcript", "exon"],
                      default="transcript",
                      help="which feature to use: gene/transcript/exon")
    parser.add_option("--unstranded-bw", dest="unstranded_wig", type="string",
                      help="BigWig with tag counts on both strands")
    parser.add_option("--plus-bw", dest="plus_wig", type="string",
                      help="BigWig with tag counts from plus strand")
    parser.add_option("--minus-bw", dest="minus_wig", type="string",
                      help="BigWig with tag counts from minus strand")
    parser.add_option("--bed", dest="bedfile", type="string",
                      help="tabix indexed bed file with tag counts"),
    parser.add_option("-c", "--use-centre", dest="centre", action="store_true",
                      default=False,
                      help="Use centre of read rather than start")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    iterator = GTF.iterator(options.stdin)

    if options.feature == "gene":
        iterator = GTF.flat_gene_iterator(iterator)
    elif options.feature == "transcript":
        iterator = GTF.transcript_iterator(iterator)
    elif options.feature == "exon":
        def _exon_iterator(gff_iterator):
            for exon in gff_iterator:
                yield [exon]
        iterator = _exon_iterator(iterator)

    if options.unstranded_wig:
        bamfile = iCLIP.make_getter(plus_wig=options.unstranded_wig)
    elif options.plus_wig:
        if not options.minus_wig:
            raise ValueError(
                "Please provide wigs for both strands or use --unstranded_wig")
        bamfile = iCLIP.make_getter(plus_wig=options.plus_wig,
                                    minus_wig=options.minus_wig)
    elif options.bedfile:
        bamfile = iCLIP.make_getter(bedfile=options.bedfile)   
    else:
        bamfile = pysam.AlignmentFile(args[0])
        
    outlines = []
    for feature in iterator:
        exons = GTF.asRanges(feature, "exon")

        exon_counts = iCLIP.count_intervals(bamfile,
                                            exons,
                                            feature[0].contig,
                                            feature[0].strand,
                                            dtype="uint32",
                                            use_centre=options.centre)

        exon_counts = exon_counts.sum()

        introns = Intervals.complement(exons)
        intron_counts = iCLIP.count_intervals(bamfile,
                                              introns,
                                              feature[0].contig,
                                              feature[0].strand,
                                              dtype="uint32",
                                              use_centre=options.centre)

        intron_counts = intron_counts.sum()

        if options.feature == "exon":

            try:
                exon_id = feature[0].exon_id
            except AttributeError:
                try:
                    exon_id = feature[0].exon_number
                except AttributeError:
                    exon_id = "missing"

            gene_id = feature[0].gene_id
            transcript_id = feature[0].transcript_id
            intron_counts = "NA"
        else:
            exon_id = "NA"
            gene_id = feature[0].gene_id
            transcript_id = feature[0].transcript_id
            intron_counts = float(intron_counts)
            
        outlines.append([gene_id,
                         transcript_id,
#.........这里部分代码省略.........

开发者ID:sudlab，项目名称:iCLIPlib，代码行数:103，代码来源:count_clip_sites.py

示例7: main

# 需要导入模块: from CGAT import Intervals [as 别名]
# 或者: from CGAT.Intervals import complement [as 别名]
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-f", "--feature", dest="feature", type="choice",
                      choices=["gene", "transcript", "exon"],
                      default="transcript",
                      help="supply help")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    iterator = GTF.iterator(options.stdin)

    if options.feature == "gene":
        iterator = GTF.flat_gene_iterator(iterator)
    elif options.feature == "transcript":
        iterator = GTF.transcript_iterator(iterator)
    elif options.feature == "exon":
        def _exon_iterator(gff_iterator):
            for exon in gff_iterator:
                yield [exon]
        iterator = _exon_iterator(iterator)

    bamfile = pysam.AlignmentFile(args[0])
    outlines = []
    for feature in iterator:
        exons = GTF.asRanges(feature, "exon")

        exon_counts = iCLIP.count_intervals(bamfile,
                                            exons,
                                            feature[0].contig,
                                            feature[0].strand,
                                            dtype="uint32")

        exon_counts = exon_counts.sum()

        introns = Intervals.complement(exons)
        intron_counts = iCLIP.count_intervals(bamfile,
                                              introns,
                                              feature[0].contig,
                                              feature[0].strand,
                                              dtype="uint32")

        intron_counts = intron_counts.sum()

        if options.feature == "exon":

            try:
                exon_id = feature[0].exon_id
            except AttributeError:
                exon_id = "missing"

            gene_id = feature[0].gene_id
            transcript_id = feature[0].transcript_id
            intron_counts = "NA"
        else:
            exon_id = "NA"
            gene_id = feature[0].gene_id
            transcript_id = feature[0].transcript_id

        outlines.append([gene_id,
                         transcript_id,
                         exon_id,
                         str(exon_counts),
                         str(intron_counts)])

    options.stdout.write("\t".join(["gene_id",
                                    "transcript_id",
                                    "exon_id",
                                    "exon_count",
                                    "intron_count"])+"\n")

    outlines = ["\t".join(outline) for outline in outlines]
    outlines = "\n".join(outlines)
    options.stdout.write(outlines + "\n")

    # write footer and output benchmark information.
    E.Stop()

开发者ID:CGATOxford，项目名称:UMI-tools_pipelines，代码行数:89，代码来源:count_clip_sites.py

注：本文中的CGAT.Intervals.complement方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。