当前位置: 首页>>代码示例>>Python>>正文


Python Bed.readAndIndex方法代码示例

本文整理汇总了Python中CGAT.Bed.readAndIndex方法的典型用法代码示例。如果您正苦于以下问题:Python Bed.readAndIndex方法的具体用法?Python Bed.readAndIndex怎么用?Python Bed.readAndIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CGAT.Bed的用法示例。


在下文中一共展示了Bed.readAndIndex方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: annotateCpGIslands

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def annotateCpGIslands( infiles, outfile ):
    '''annotate transcript by absence/presence of CpG islands
    '''
    cpgfile, tssfile = infiles
    cpg = Bed.readAndIndex( IOTools.openFile( cpgfile ) )
    
    extension_upstream = PARAMS["cpg_search_upstream"]
    extension_downstream = PARAMS["cpg_search_downstream"]

    c = E.Counter()
    outf = IOTools.openFile( outfile, "w" )
    outf.write("transcript_id\tstrand\tstart\tend\trelative_start\trelative_end\n" )

    for tss in Bed.iterator(IOTools.openFile( tssfile ) ):
        c.tss_total += 1

        if tss.strand == "+":
            start, end = tss.start - extension_upstream, tss.start + extension_downstream
        else:
            start, end = tss.end - extension_downstream, tss.end + extension_upstream

        try:
            matches = list(cpg[tss.contig].find( start, end ))
        except KeyError:
            c.promotor_without_matches += 1
            continue

        if len(matches) == 0:
            c.promotor_without_matches += 1
            continue

        c.promotor_output += 1
        for match in matches:
            c.matches_total += 1
            genome_start, genome_end, x = match

            l = genome_end - genome_start

            # get relative location of match
            if tss.strand == "+":
                relative_start = genome_start - tss.start 
            else:
                relative_start = tss.end - genome_end
            
            relative_end = relative_start + l

            outf.write( "\t".join( map(str, (
                            tss.name, tss.strand,
                            genome_start, genome_end,
                            relative_start, relative_end ))) + "\n" )
            c.matches_output += 1

    outf.close()
            
    with IOTools.openFile( outfile + ".summary", "w" ) as outf:
        outf.write ("category\tcounts\n" )
        outf.write( c.asTable() + "\n" )
    
    E.info( c )
开发者ID:BioinformaticsArchive,项目名称:cgat,代码行数:61,代码来源:pipeline_promotors.py

示例2: main

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version="%prog version: $Id: bed2graph.py 2861 2010-02-23 17:36:32Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-o", "--output-section", dest="output", type="choice",
                      choices=("full", "name"),
                      help="output either ``full`` overlapping entries, only the ``name``s. [default=%default].")

    parser.set_defaults(
        output="full",
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if len(args) != 2:
        raise ValueError("two arguments required")

    if args[0] == "-":
        infile1 = options.stdin
    else:
        infile1 = IOTools.openFile(args[0], "r")

    infile2 = IOTools.openFile(args[1], "r")

    idx = Bed.readAndIndex(infile2, with_values=True)

    output = options.output
    outfile = options.stdout

    if output == "name":
        outfile.write("name1\tname2\n")
        outf = lambda x: x.fields[0]
    else:
        outf = str

    for bed in Bed.iterator(infile1):
        try:
            overlaps = idx[bed.contig].find(bed.start, bed.end)
        except (KeyError, IndexError):
            # ignore missing contig and zero length intervals
            continue

        for o in overlaps:
            outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n")

    E.Stop()
开发者ID:SCV,项目名称:cgat,代码行数:58,代码来源:bed2graph.py

示例3: __init__

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
    def __init__(self, filename, *args, **kwargs ):
        
        assert filename != None, "please supply filename for CounterOverlap"

        Counter.__init__(self, *args, **kwargs )

        self.filename = filename

        E.info( "reading intervals from %s" % self.filename )

        self.index = Bed.readAndIndex( IOTools.openFile( self.filename, "r"),
                                       per_track = True )
        
        E.info( "read intervals for %s tracks" % len(self.index) )

        self.tracks = self.index.keys()
        self.headers = []
        for track in self.tracks:
            self.headers.extend( ["%s_nover" % track, "%s_bases" % track] )
开发者ID:siping,项目名称:cgat,代码行数:21,代码来源:bed2table.py

示例4: buildIndex

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
 def buildIndex(self, filename):
     return Bed.readAndIndex(IOTools.openFile(filename, "r"))
开发者ID:Q-KIM,项目名称:cgat,代码行数:4,代码来源:diff_bed.py

示例5: __init__

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
 def __init__(self, filename):
     self.mIndices = Bed.readAndIndex(IOTools.openFile(filename, "r"),
                                      per_track=True)
开发者ID:Q-KIM,项目名称:cgat,代码行数:5,代码来源:diff_bed.py

示例6: main

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv


    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-m","--method", dest="method", type="choice",
                      choices=["ave_dist","min_dist","corr"],
                      default="min_dist",
                      help="Method for calcuating similarity between profiles")
    parser.add_option("-s", "--spread", dest="spread", type="int",
                      default=10,
                      help="Amount to spread each tag by")
    parser.add_option("-k", "--keep-dist", dest="keep_dist", 
                      action="store_true",
                      help="Keep the distribution of tag depths")
    parser.add_option("-r", "--rands", dest="rands", 
                      default=100,
                      help="Number of randomisations to use for calculating"
                           " mean and stdev of distance")
 
    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    profile1_file, profile2_file = args
    profile1_file = pysam.AlignmentFile(profile1_file)

    if profile2_file.endswith("bed") or profile2_file.endswith("bed.gz"):
	profile2_file = Bed.readAndIndex(profile2_file, with_values=True)
        profile2_counter = bed_counter
    else:
        profile2_file = pysam.AlignmentFile(profile2_file)
        profile2_counter = iCLIP.count_intervals

    if options.method=="min_dist":
        distance_func = iCLIP.findMinDistance
    elif options.method=="ave_dist":
        distance_func = iCLIP.calcAverageDistance
    else:
        def distance_func(profile1,profile2):
            return iCLIP.corr_profile(profile1,profile2, options.spread, profile2_ready=True)
 
    for exon in GTF.iterator(options.stdin):
	if exon.feature != "exon":
            continue

        contig = exon.contig
        strand = exon.strand
        transcript_id = exon.transcript_id
        start = exon.start
        end = exon.end

        profile1 = iCLIP.count_intervals(profile1_file, 
                                         [(start, end)],
                                         contig=contig,
                                         strand=strand)

        profile2 = profile2_counter(profile2_file,
                                    [(start, end)],
                                    contig=contig,
                                    strand=strand)

        if profile1.sum() == 0 or profile2.sum() == 0:
            z = "NA"
            distance = "NA"
            options.stdout.write(
                "%(contig)s\t%(start)i\t%(end)i\t%(transcript_id)s\t%(strand)s\t%(distance)s\t%(z)s\n" % locals())
            continue

	if options.method=="corr":
             profile2 = iCLIP.spread(profile2, options.spread)

        distance = distance_func(profile1, profile2)

        rands = iCLIP.rand_apply(profile=profile1, 
                                 exon=exon, 
                                 n=options.rands, 
                                 func=distance_func,
                                 keep_dist=options.keep_dist,
                                 profile2=profile2)

        z = (distance - rands.mean())/rands.std()

        options.stdout.write(
          "%(contig)s\t%(start)i\t%(end)i\t%(transcript_id)s\t%(strand)s\t%(distance).3f\t%(z).2f\n" % locals())
    # write footer and output benchmark information.
    E.Stop()
开发者ID:CGATOxford,项目名称:UMI-tools_pipelines,代码行数:96,代码来源:reproducibility_by_exon.py

示例7: countInteractions

# 需要导入模块: from CGAT import Bed [as 别名]
# 或者: from CGAT.Bed import readAndIndex [as 别名]
def countInteractions(reads, digest, probe_fragments, outfile,
                      metrics_file):

    reads = pysam.AlignmentFile(reads)
    digest_intervals = Bed.readAndIndex(
        IOTools.openFile(digest), with_values="name")
    probe_fragments = Bed.readAndIndex(
        IOTools.openFile(probe_fragments), with_values="name")
    c = collections.Counter()
    results = collections.defaultdict(int)
    contigs = digest_intervals.keys()
    rejects=pysam.AlignmentFile(outfile + ".rejects.bam", "wb", template=reads)

    E.debug("Starting Counting Run...")
    for fragment in readBundle(reads):

        if c["fragments"] % 1000 == 0:
            E.debug("\t".join(["%s:\t%s" % x for x in c.iteritems()]))

        c["fragments"] += 1
        c["reads"] += len(fragment)

        def _get_read_position(aln):

            if aln.is_reverse:
                c = aln.cigartuples[::-1]
            else:
                c = aln.cigartuples

            pos = 0
            for op, length in c:
                if op == 0:
                    return pos
                else:
                    pos += length

            return pos

        def _get_first(alignments):
            '''find alignment that maps earliest part of read'''
            slist = sorted(alignments, key=lambda x: _get_read_position(x))
            return (slist[0], slist[1:])
            
        def _get_probes(read):
            try:
                m = list(
                    probe_fragments[reads.getrname(read.reference_id)].find(
                        read.pos, read.aend))
            except KeyError:
                return []
            return m

        first_read = [aln for aln in fragment if aln.is_read1]
        second_read = [aln for aln in fragment if aln.is_read2]

        if len(first_read) == 0 or len(second_read) == 0:
            c["Unpaired"] += 1
            continue

        assert len(first_read) + len(second_read) == len(fragment)

        primary_aln, other_aln = zip(_get_first(first_read),
                                     _get_first(second_read))
        other_aln = sum(other_aln, [])
        
        probe_matches = [_get_probes(read) for read in primary_aln]

        if len(sum(probe_matches, [])) == 0:
            c["no_probe"] += 1
            for read in fragment:
                rejects.write(read)
            continue
    
        other_matches = set(sum([
            list(digest_intervals[reads.getrname(read.reference_id)].find(
                read.pos, read.aend))
            for read in other_aln
            if reads.getrname(read.reference_id) in contigs],[]))

        primary_matches = set(sum([
            list(digest_intervals[reads.getrname(read.reference_id)].find(
                read.pos, read.aend))
            for read in primary_aln
            if reads.getrname(read.reference_id) in contigs],[]))

        if len(primary_matches) > 2:
            c["multi-hit"] += 1
            continue
            
        if not all([match in primary_matches for match in other_matches]):
            c["multi-hit"] += 1
            continue
        
        primary_matches = list(primary_matches)
        if len(primary_matches) == 2:
            results[(primary_matches[0][2], primary_matches[1][2])] += 1
            results[(primary_matches[1][2], primary_matches[0][2])] += 1
        elif len(primary_matches) == 1:
            results[(primary_matches[0][2], primary_matches[0][2])] += 1
        else:
#.........这里部分代码省略.........
开发者ID:sudlab,项目名称:Capture_C,代码行数:103,代码来源:PipelineCaptureC.py


注:本文中的CGAT.Bed.readAndIndex方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。