当前位置: 首页>>代码示例>>Python>>正文


Python pysam.asBed函数代码示例

本文整理汇总了Python中pysam.asBed函数的典型用法代码示例。如果您正苦于以下问题:Python asBed函数的具体用法?Python asBed怎么用?Python asBed使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了asBed函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_gap_intervals

def load_gap_intervals(gap_file):
    if gap_file is None: return []
    logger.info("Loading the gaps in the genome from %s" % gap_file)
    with open(gap_file) as gap_file_fd:
        gap_intervals = [SVInterval(it.contig, it.start, it.end, it.name, "gap") for it in
                         pysam.tabix_file_iterator(gap_file_fd, parser=pysam.asBed())]
    return merge_intervals(gap_intervals)
开发者ID:BioinformaticsArchive,项目名称:metasv,代码行数:7,代码来源:vcf_utils.py

示例2: _run

    def _run(self, _config, temp):
        def keyfunc(bed):
            return (bed.contig, bed.name, bed.start)

        fastafile = pysam.Fastafile(self._reference)
        seqs = collections.defaultdict(list)
        with open(self._intervals) as bedfile:
            intervals = text.parse_lines_by_contig(bedfile, pysam.asBed()).items()
            for (contig, beds) in sorted(intervals):
                beds.sort(key = keyfunc)

                for (gene, gene_beds) in itertools.groupby(beds, lambda x: x.name):
                    gene_beds = tuple(gene_beds)
                    for bed in gene_beds:
                        seqs[(contig, gene)].append(fastafile.fetch(contig, bed.start, bed.end))

                    seq = "".join(seqs[(contig, gene)])
                    if any((bed.strand == "-") for bed in gene_beds):
                        assert all((bed.strand == "-") for bed in gene_beds)
                        seq = sequences.reverse_complement(seq)
                    seqs[(contig, gene)] = seq

        temp_file = os.path.join(temp, "sequences.fasta")
        with open(temp_file, "w") as out_file:
            for ((_, gene), sequence) in sorted(seqs.items()):
                fasta.print_fasta(gene, sequence, out_file)

        move_file(temp_file, self._outfile)
开发者ID:schae234,项目名称:pypeline,代码行数:28,代码来源:genotype.py

示例3: combineMergedIntervals

def combineMergedIntervals(bedfiles):
    '''combine intervals in a collection of bed files.

    Overlapping intervals between tracks are merged.

    Algorithm:

    1. collect all intervals in all tracks into a single track
    2. merge overlapping intervals 
    3. report all intervals that overlap with an interval in each track.

    '''

    # get all intervals
    data_per_contig = collections.defaultdict(list)
    for bedfile in bedfiles:
        for contig in bedfile.contigs:
            i = []
            for bed in bedfile.fetch(contig, parser=pysam.asBed()):
                i.append((bed.start, bed.end))
            data_per_contig[contig].extend(i)

    # merge intervals
    for contig in data_per_contig.keys():
        data_per_contig[contig] = Intervals.combine(data_per_contig[contig])

    # filter intervals - take only those present in all bedfiles
    for contig, data in data_per_contig.iteritems():
        for start, end in data:
            if isContainedInAll(contig, start, end, bedfiles):
                yield contig, start, end
开发者ID:Charlie-George,项目名称:cgat,代码行数:31,代码来源:beds2beds.py

示例4: _bed_getter

def _bed_getter(bedfile, contig, start=0, end=None, strand=".", dtype="uint16"):
    '''Get crosslink profiles from tabix indexed bedGraph/Bed'''

    # check the file contains some data for the requested contig
    if not contig in bedfile.contigs:
        #print "%s not in bedfile" % contig
        return pd.Series(dict(), dtype=dtype)
    
    # fetch the rercords from the specificed region
    crosslinks = bedfile.fetch(contig, start, end, parser=pysam.asBed())
    
    profile = dict()

    check_sum = 0
    
    for base in crosslinks:
        try:
            correct_strand = strand == "." or base.strand == strand
        except AttributeError:
            correct_strand = True
            
        if correct_strand:
            profile[float(base.start)] = int(base.score)
            check_sum += int(base.score)

    profile = pd.Series(dict(profile), dtype=dtype)

            
    #if not check_sum == profile.sum():
    #    raise OverflowError("Check sum failed (%i = %i). Possibly counts exceed specified dtype. Use bigger dtype"
    #                        % (check_sum, profile.sum()))

    return profile
开发者ID:sudlab,项目名称:iCLIPlib,代码行数:33,代码来源:getters.py

示例5: fetchProbeFragments

def fetchProbeFragments(probe_bed, digest_bed, outfile,
                        lookup_out):

    digest_fragments = pysam.TabixFile(digest_bed)
    bed = Bed.Bed()
    with IOTools.openFile(outfile, "w") as outf, \
         IOTools.openFile(lookup_out,"w") as lookup:

        lookup.write("probe\tfragment\n")
        for probe in Bed.iterator(IOTools.openFile(probe_bed)):
            
            frag = digest_fragments.fetch(probe.contig,
                                          probe.start,
                                          probe.end,
                                          parser=pysam.asBed())
            frag = list(frag)
            if not len(frag) == 1:
                E.warn("%i fragments found for probe %s, skipping" %
                       (len(frag), probe.name))
                continue

            frag = frag[0]
            bed.start = frag.start
            bed.end = frag.end
            bed.contig = frag.contig
            bed["name"] = probe.name
            bed["score"] = "."
            bed["strand"] = "+"

            lookup.write("%s\t%s\n" % (probe.name, frag.name))
            outf.write(str(bed) + "\n")
开发者ID:sudlab,项目名称:Capture_C,代码行数:31,代码来源:PipelineCaptureC.py

示例6: getProbeFragments

def getProbeFragments(probe_bed, digest_bed, outfile,
                        lookup_out):
    
    # First find the length of the restriction enzyme cut, required to obtain the start and end coordinates
    # from the pregenerated file.
    # First iteration, no comparison
    first_iteration = True
    
    length_RE_cut = 0
    
    last_bed = None
    
    for bed_digest in Bed.iterator(IOTools.openFile(digest_bed)):
                
        if(first_iteration):
            first_iteration = False
        else:
            # If they are in the same contig they can be compared
            if(bed_digest.contig == last_bed.contig):
                length_RE_cut = bed_digest.start - last_bed.end
                break
        
        last_bed = bed_digest
    
    
    digest_fragments = pysam.TabixFile(digest_bed)
    bed = Bed.Bed()
    with IOTools.openFile(outfile, "w") as outf, \
         IOTools.openFile(lookup_out,"w") as lookup:

        lookup.write("probe\tfragment\n")
        for probe in Bed.iterator(IOTools.openFile(probe_bed)):
            
            frag = digest_fragments.fetch(probe.contig,
                                          probe.start,
                                          probe.end,
                                          parser=pysam.asBed())
            frag = list(frag)
            if not len(frag) == 1:
                E.warn("%i fragments found for probe %s, skipping" %
                       (len(frag), probe.name))
                continue

            frag = frag[0]
            
            # The restriction enzyme cut on the left side of the fragment
            # is the end site of the last restriction enzyme fragment + 1
            # (+1 because according to the manual coordinates are specified
            # in 1-origin for the bed start.)
            
            bed.start = frag.start-length_RE_cut+1
            bed.end = frag.end+length_RE_cut
            bed.contig = frag.contig
            bed["name"] = probe.name
            bed["score"] = "."
            bed["strand"] = "+"

            lookup.write("%s\t%s\n" % (probe.name, frag.name))
            outf.write(str(bed) + "\n")
开发者ID:sudlab,项目名称:pipeline_capt_c_perl,代码行数:59,代码来源:pipelineCaptCPerl.py

示例7: __init__

    def __init__(self, file_path):
        file_path = str(file_path)
        if not file_path.endswith('.gz'):
            if os.path.exists(file_path + '.gz'):
                file_path += '.gz'
            else:
                file_path = self.compress(file_path)

        super().__init__(file_path, parser=pysam.asBed())
开发者ID:jrderuiter,项目名称:ngs-tk,代码行数:9,代码来源:tabix.py

示例8: testRead

    def testRead( self ):

        for x, r in enumerate(self.tabix.fetch( parser = pysam.asBed() )):
            c = self.compare[x]
            self.assertEqual( "\t".join( c ), str(r) )
            self.assertEqual( list(c), list(r) )
            self.assertEqual( c[0], r.contig)
            self.assertEqual( int(c[1]), r.start)
            self.assertEqual( int(c[2]), r.end)
开发者ID:RLuisier,项目名称:RSeQC,代码行数:9,代码来源:tabix_test.py

示例9: _collect_and_validate_regions

def _collect_and_validate_regions(regions):
    contigs = _collect_fasta_contigs(regions)
    parser = pysam.asBed()
    sequences = set()
    with open(regions["BED"]) as bedhandle:
        for (line_num, line) in enumerate(bedhandle):
            line = line.strip()
            if not line or line.startswith("#"):
                continue

            try:
                bed = parser(line, len(line))
                # Force evaluation of (lazily parsed) properties
                bed_start = bed.start
                bed_end = bed.end
            except ValueError, error:
                raise MakefileError(("Error parsing line %i in regions file:\n"
                                     "  Path = %r\n  Line = %r\n\n%s")
                                    % (line_num + 1, regions["BED"],
                                       line, error))

            if len(bed) < 6:
                url = "http://genome.ucsc.edu/FAQ/FAQformat.html#format1"
                name = repr(bed.name) if len(bed) > 3 else "unnamed record"
                raise MakefileError(("Region at line #%i (%s) does not "
                                     "contain the expected number of fields; "
                                     "the first 6 fields are required. C.f. "
                                     "defination at\n   %s\n\nPath = %r")
                                    % (line_num, name, url, regions["BED"]))

            contig_len = contigs.get(bed.contig)
            if contig_len is None:
                raise MakefileError(("Regions file contains contig not found "
                                     "in reference:\n  Path = %r\n  Contig = "
                                     "%r\n\nPlease ensure that all contig "
                                     "names match the reference names!")
                                    % (regions["BED"], bed.contig))
            elif not (0 <= int(bed_start) < int(bed_end) <= contig_len):
                raise MakefileError(("Regions file contains invalid region:\n"
                                     "  Path   = %r\n  Contig = %r\n"
                                     "  Start  = %s\n  End    = %s\n\n"
                                     "Expected 0 <= Start < End <= %i!")
                                    % (regions["BED"], bed.contig, bed.start,
                                       bed.end, contig_len))
            elif bed.strand not in "+-":
                raise MakefileError(("Regions file contains invalid region: "
                                     "  Path   = %r\n  Line = %i\n  Name = %r"
                                     "\nStrand is %r, expected '+' or '-'.")
                                    % (regions["BED"], line_num, bed.name,
                                       bed.strand))

            sequences.add(bed.name)
开发者ID:CarlesV,项目名称:paleomix,代码行数:52,代码来源:makefile.py

示例10: read_bed_records

def read_bed_records(filename):
    """Reads a bed-file (i.e. for a set of regions of interest), and returns
    a sorted list containing each line as a tuple containing the contig name,
    the start position, and the end position."""
    regions = []
    bed_parser = pysam.asBed()
    with open(filename) as bed_file:
        for line in bed_file:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            regions.append(bed_parser(line, len(line)))
    return regions
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:13,代码来源:summarize_heterozygosity.py

示例11: combineUnmergedIntervals

def combineUnmergedIntervals(foreground, background):
    '''combine intervals in a collection of bed files.

    Only intervals in the first track are reported.

    Algorithm:

    1. report all intervals in the first track that overlap with an interval in every other track.
    '''

    intervals = []
    c = 0
    for bed in foreground.fetch(parser=pysam.asBed()):
        c += 1
        if isContainedInAll(bed.contig, bed.start, bed.end, background):
            yield bed
开发者ID:Charlie-George,项目名称:cgat,代码行数:16,代码来源:beds2beds.py

示例12: testWrite

    def testWrite(self):

        for x, r in enumerate(self.tabix.fetch(parser=pysam.asBed())):
            c = self.compare[x]
            self.assertEqual(c, str(r).split("\t"))
            self.assertEqual(list(c), list(r))

            r.contig = "test"
            self.assertEqual("test", r.contig)
            self.assertEqual("test", r[0])

            r.start += 1
            self.assertEqual(int(c[1]) + 1, r.start)
            self.assertEqual(str(int(c[1]) + 1), r[1])

            r.end += 1
            self.assertEqual(int(c[2]) + 1, r.end)
            self.assertEqual(str(int(c[2]) + 1), r[2])
开发者ID:humburg,项目名称:pysam,代码行数:18,代码来源:tabix_test.py

示例13: read_bed_file

def read_bed_file(filename):
    """Parses a (gzip/bzip2 compressed) BED file, and yields
    a sequence of records. Comments and empty lines are skipped."""
    handle = None
    try:
        handle = fileutils.open_ro(filename)
        parser = pysam.asBed()

        for record in text.parse_lines(handle, parser):
            # Force evaluation of (lazily parsed) properties
            _ = record.start
            _ = record.end

            yield record

    finally:
        if handle:
            handle.close()
开发者ID:CarlesV,项目名称:paleomix,代码行数:18,代码来源:bedtools.py

示例14: _get_hits

def _get_hits(coords, annotation, parser_type):
    """Retrieve BED information, recovering if BED annotation file does have a chromosome.
    """
    if parser_type == "bed":
        parser = pysam.asBed()
    elif parser_type == "vcf":
        parser = pysam.asVCF()
    elif parser_type == "tuple":
        parser = pysam.asTuple()
    elif parser_type is None:
        parser = None
    else:
        raise ValueError("Unexpected parser type: %s" % parser)
    chrom, start, end = coords
    try:
        hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
    # catch invalid region errors raised by ctabix
    except ValueError:
        hit_iter = []
    return hit_iter
开发者ID:egafni,项目名称:gemini,代码行数:20,代码来源:annotations.py

示例15: _run

    def _run(self, _config, temp):
        def _by_name(bed):
            return bed.name

        fastafile = pysam.Fastafile(self._reference)
        seqs = collections.defaultdict(list)
        with open(self._bedfile) as bedfile:
            bedrecords = text.parse_lines_by_contig(bedfile, pysam.asBed())
            for (contig, beds) in sorted(bedrecords.iteritems()):
                beds.sort(key=lambda bed: (bed.contig, bed.name, bed.start))

                for (gene, gene_beds) in itertools.groupby(beds, _by_name):
                    gene_beds = tuple(gene_beds)
                    sequence = self._collect_sequence(fastafile, gene_beds)
                    seqs[(contig, gene)] = sequence

        temp_file = os.path.join(temp, "sequences.fasta")
        with open(temp_file, "w") as out_file:
            for ((_, gene), sequence) in sorted(seqs.items()):
                FASTA(gene, None, sequence).write(out_file)

        fileutils.move_file(temp_file, self._outfile)
开发者ID:beeso018,项目名称:paleomix,代码行数:22,代码来源:sequences.py


注:本文中的pysam.asBed函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。