当前位置: 首页>>代码示例>>Python>>正文


Python BedTool.filter方法代码示例

本文整理汇总了Python中pybedtools.BedTool.filter方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.filter方法的具体用法?Python BedTool.filter怎么用?Python BedTool.filter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybedtools.BedTool的用法示例。


在下文中一共展示了BedTool.filter方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: filter_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def filter_bed(bedfile, snp_list, outfile=sys.stdout):
    """Filter a bedfile to only include snps in snp_list, print to outfile.

    :bedfile:  A bed file of all the SNPs, can be gzipped.
    :snp_list: List/tuple/set/frozenset of snp names.
    :outfile:  Something .bed or .bed.gz, deault STDOUT.
    :returns:  0 on success 1 on failure

    """
    try:
        from pybedtools import BedTool
    except ImportError:
        logme.log('pybedtools is not installed.\n' +
                  'Please install and try again. You can get it from here:\n' +
                  'https://github.com/daler/pybedtools',
                  level='error')
        return -1

    if not isinstance(snp_list, (tuple, list, set, frozenset)):
        raise Exception('snp_list must be tuple/list/set/frozenset ' +
                        'it is: {}'.format(type(snp_list)))

    bed      = BedTool(bedfile)
    filtered = bed.filter(lambda a: a.name in snp_list)

    with open_zipped(outfile, 'w') as fout:
        fout.write(str(filtered))
开发者ID:rmagoglia,项目名称:ASEr,代码行数:29,代码来源:snps.py

示例2: _get

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def _get(relative_path, genome=None):
    """
    :param relative_path: relative path of the file inside the repository
    :param genome: genome name. Can contain chromosome name after comma, like hg19-chr20,
                   in case of BED, the returning BedTool will be with added filter.
    :return: BedTools object if it's a BED file, or filepath
    """
    chrom = None
    if genome:
        if '-chr' in genome:
            genome, chrom = genome.split('-')
        check_genome(genome)
        relative_path = relative_path.format(genome=genome)

    path = abspath(join(dirname(__file__), relative_path))
    if not isfile(path) and isfile(path + '.gz'):
        path += '.gz'

    if path.endswith('.bed') or path.endswith('.bed.gz'):
        if path.endswith('.bed.gz'):
            bedtools = which('bedtools')
            if not bedtools:
                critical('bedtools not found in PATH: ' + str(os.environ['PATH']))
            debug('BED is compressed, creating BedTool')
            bed = BedTool(path)
        else:
            debug('BED is uncompressed, creating BedTool')
            bed = BedTool(path)

        if chrom:
            debug('Filtering BEDTool for chrom ' + chrom)
            bed = bed.filter(lambda r: r.chrom == chrom)
        return bed
    else:
        return path
开发者ID:vladsaveliev,项目名称:TargQC,代码行数:37,代码来源:__init__.py

示例3: main

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def main():
    p = argparse.ArgumentParser(description=__doc__,
            formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument('peaks', help='peaks bed')
    p.add_argument('exons', help='refseq exons from UCSC')
    p.add_argument('gtf', help='refseq gtf with feature of interest')
    p.add_argument('feature', help='feature of interest in the gtf')
    p.add_argument('-v', '--verbose', action="store_true", help='maximum verbosity')
    args = p.parse_args()
    
    if args.verbose: sys.stderr.write(">> building exon library...\n")
    exon_lib = make_exon_lib(args.exons)
    
    peaks = BedTool(args.peaks)
    exons = BedTool(args.exons)
    full_ref = BedTool(args.gtf)
    
    if args.verbose: sys.stderr.write(">> filtering for feature...\n")
    filtered_ref = full_ref.filter(lambda gtf: gtf[2] == args.feature)
    
    if args.verbose: sys.stderr.write(">> selecting exonic peaks...\n")
    exonic_peaks = peaks.intersect(exons, wo=True)
    
    if args.verbose: sys.stderr.write(">> calculating distance fractions...\n")
    # D for distance (returns negative if upstream)
    for peak in exonic_peaks.closest(filtered_ref, D="a"):
        try:
            p = ComplexLine(peak)
            corrected_distance = 0.0
            total_exon_length = 0.0
            # parse gtf attrs
            gene_id = p.gtfattrs.split(';')[0].rstrip('"').lstrip('gene_id "')

            # looking downstream wrt peak
            if p.gtfdistance > 0:
                # exon with peak
                corrected_distance = p.exonstop - p.peakstop
                for exon in exon_lib[p.exoninfo.name]:
                    # add downstream exon lengths
                    if exon > p.exoninfo.number:
                        corrected_distance += exon_lib[p.exoninfo.name][exon]
                        
            # looking upstream wrt peak
            else:
                # exon with peak
                corrected_distance = p.peakstart - p.exonstart
                for exon in exon_lib[p.exoninfo.name]:
                    # add upstream exon lengths
                    if exon < p.exoninfo.number:
                        corrected_distance += exon_lib[p.exoninfo.name][exon]
            
            for exon in exon_lib[p.exoninfo.name]:
                total_exon_length += exon_lib[p.exoninfo.name][exon]
            
            # fraction
            print (corrected_distance / total_exon_length)
        
        except ValueError:
            continue
开发者ID:brwnj,项目名称:cu_projects,代码行数:61,代码来源:relative_cluster_positions.py

示例4: getCDSs

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def getCDSs(bedfilename, reffilename, strand):
    """
    return iterator of coding sequences
    """
    bed = BedTool(bedfilename)
    bed = bed.filter(lambda x: x.strand == strand)
    fasta = reffilename
    bed = bed.sequence(fi=fasta, s=True)
    return SeqIO.parse(bed.seqfn, "fasta")
开发者ID:adamjorr,项目名称:zypy,代码行数:11,代码来源:degenerate.py

示例5: filterReadsByLength

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def filterReadsByLength(inbam, minlength, maxlength):
    '''
    Takes a bam file and selects intervals that are within the defined lengths.
    Input: bam file and min/max lengths
    Output: bedTool
    '''
    # convert bam to bed
    intervals = BedTool(inbam).bam_to_bed()
    filt = intervals.filter(lambda x: len(x) > minlength and len(x) < maxlength).saveas()
    # print filt
    return filt
开发者ID:adomingues,项目名称:NGSpipe2go,代码行数:13,代码来源:ping-pong_signature.py

示例6: clean_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def clean_bed(bed_fpath, work_dir):
    clean_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')

    if not can_reuse(clean_fpath, bed_fpath):
        pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))
        bed = BedTool(bed_fpath)
        bed = bed.filter(lambda x: x.chrom and
                         not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))
        bed = bed.remove_invalid()
        with file_transaction(work_dir, clean_fpath) as tx_out_file:
            bed.saveas(tx_out_file)
        verify_bed(clean_fpath, is_critical=True)
        debug('Saved clean BED file into ' + clean_fpath)
    return clean_fpath
开发者ID:vladsaveliev,项目名称:Utils,代码行数:16,代码来源:bed_utils.py

示例7: _bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
    def _bed(self):
        def by_name(rec):
            # Drop first part before underscore.
            if "_" in self.name:
                name = "_".join(self.name.split("_")[1:])
            else:
                name = self.name
            return (name + "*" in rec.name) or (name == rec.name)

        bt = BedTool(self.path)
        if not self.custom and '_all' not in self.name:
            bt = bt.filter(by_name).saveas()

        if len(bt) > 0 and len(bt[0].fields) > 6:
            bt = bt.bed6().saveas()

        return bt
开发者ID:dieterich-lab,项目名称:dorina,代码行数:19,代码来源:regulator.py

示例8: GenomicSubset

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
class GenomicSubset(object):
    def __init__(self, name, path=paths.genome_subsets, assembly='hg19'):
        self.assembly = assembly
        self.name = name
        self.bedtool = BedTool(path + name + '.bed').sort()

        # Intersect the pathway with the appropriate genome build
        # TODO: this step should be unnecessary if the pathways are correct
        if name != self.assembly:
            self.bedtool = GenomicSubset.reference_genome(
                    self.assembly).bedtool.intersect(self.bedtool).sort().saveas()

    def expand_by(self, expansion_in_each_direction_Mb):
        window_size_str = str(expansion_in_each_direction_Mb) + 'Mb'
        print('total size before window addition:', self.bedtool.total_coverage(), 'bp')

        # compute the flanks
        # TODO: use 1cM instead of 1Mb
        print('computing flanks')
        flanks = self.bedtool.flank(
            genome=self.assembly,
            b=expansion_in_each_direction_Mb*1000000).sort().merge().saveas()

        # compute the union of the flanks and the pathway
        print('computing union')
        union = self.bedtool.cat(flanks, postmerge=False).sort()
        merged = union.merge().saveas()
        print('total size after window addition:', merged.total_coverage(), 'bp')
        self.bedtool = merged

    def restricted_to_chrom_bedtool(self, chrnum):
        return self.bedtool.filter(
                lambda x : x[0] == 'chr' + str(int(chrnum))).saveas()

    @classmethod
    def reference_genome(cls, assembly='hg19'):
        return GenomicSubset(assembly, path=paths.reference, assembly=assembly)

    @classmethod
    def reference_chrom_bedtool(cls, chrnum, assembly='hg19'):
        return cls.reference_genome(assembly=assembly).restricted_to_chrom_bedtool(chrnum)

    @classmethod
    def whole_genome(cls, assembly='hg19'):
        return cls(assembly, path=paths.reference)
开发者ID:yakirr,项目名称:statgen_y1,代码行数:47,代码来源:genome.py

示例9: _get_genome_bedtool

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
    def _get_genome_bedtool(self, genome_name, region, genes=None):
        """get the bedtool object for a genome depending on the name and the region"""
        genome = Genome.path_by_name(genome_name)
        mapping = { "any":        "all",
                    "CDS":        "cds",
                    "3prime":     "3_utr",
                    "5prime":     "5_utr",
                    "intron":     "intron",
                    "intergenic": "intergenic" }

        if region not in mapping:
            raise ValueError("Invalid region: %r" % region)
        else:
            bed = BedTool(path.join(genome, "%s.gff" % mapping[region]))

        # Optionally, filter by gene.
        if genes is None or 'all' in genes:
            return bed
        else:
            return bed.filter(lambda x: x.name in genes).saveas()
开发者ID:BIMSBbioinfo,项目名称:dorina,代码行数:22,代码来源:run.py

示例10: add_bed

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
    def add_bed(self, bedfile):
        """Add a list of pybedtools Interval objects to self as self.bed.

        Requires pybedtools, adds only records for snps in this individual.

        Note: This is a slow operation.

        :returns: True on success, False on failure.
        """
        try:
            from pybedtools import BedTool
        except ImportError:
            logme.log('add_bed() failed.\n' +
                      'pybedtools is not installed.\n' +
                      'Please install and try again. You can get it from here:\n' +
                      'https://github.com/daler/pybedtools',
                      level='error')
            return False
        bed = BedTool(bedfile)
        self.bed = [i for i in bed.filter(lambda a: a.name in self.snps)]
        return True
开发者ID:rmagoglia,项目名称:ASEr,代码行数:23,代码来源:snps.py

示例11: chrom_filter

# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
maxdist=6

seqnames=list()

for a in amplicons:
        seqnames.append(a.chrom)

def chrom_filter(feature,chrom):
	return feature.chrom==chrom

for chr in set(seqnames):
	left_lengths=dict()
	right_lengths=dict()
	a_starts=dict()
	a_ends=dict()
	amplicons_chrom=amplicons.filter(chrom_filter, chrom=chr)
	for a in amplicons_chrom:
		left_lengths[a.name]=map(int,a.fields[10].split(","))[0]
		right_lengths[a.name]=map(int,a.fields[10].split(","))[1]
		a_starts[a.name]=a.start
		a_ends[a.name]=a.stop

	chrom=chr[3:]
	print chrom
	for read in samfile.fetch(str(chrom)):
		if read.is_reverse:
			if read.is_unmapped==False:
				dists=dict()
				abs_dists=dict()
				for k, v in a_ends.items():
					dists[k]=read.reference_end-v
开发者ID:ikrier,项目名称:leukemia_analysis,代码行数:33,代码来源:remove_probes.py


注:本文中的pybedtools.BedTool.filter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。