本文整理汇总了Python中pybedtools.BedTool.filter方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.filter方法的具体用法?Python BedTool.filter怎么用?Python BedTool.filter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.filter方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: filter_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def filter_bed(bedfile, snp_list, outfile=sys.stdout):
"""Filter a bedfile to only include snps in snp_list, print to outfile.
:bedfile: A bed file of all the SNPs, can be gzipped.
:snp_list: List/tuple/set/frozenset of snp names.
:outfile: Something .bed or .bed.gz, deault STDOUT.
:returns: 0 on success 1 on failure
"""
try:
from pybedtools import BedTool
except ImportError:
logme.log('pybedtools is not installed.\n' +
'Please install and try again. You can get it from here:\n' +
'https://github.com/daler/pybedtools',
level='error')
return -1
if not isinstance(snp_list, (tuple, list, set, frozenset)):
raise Exception('snp_list must be tuple/list/set/frozenset ' +
'it is: {}'.format(type(snp_list)))
bed = BedTool(bedfile)
filtered = bed.filter(lambda a: a.name in snp_list)
with open_zipped(outfile, 'w') as fout:
fout.write(str(filtered))
示例2: _get
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def _get(relative_path, genome=None):
"""
:param relative_path: relative path of the file inside the repository
:param genome: genome name. Can contain chromosome name after comma, like hg19-chr20,
in case of BED, the returning BedTool will be with added filter.
:return: BedTools object if it's a BED file, or filepath
"""
chrom = None
if genome:
if '-chr' in genome:
genome, chrom = genome.split('-')
check_genome(genome)
relative_path = relative_path.format(genome=genome)
path = abspath(join(dirname(__file__), relative_path))
if not isfile(path) and isfile(path + '.gz'):
path += '.gz'
if path.endswith('.bed') or path.endswith('.bed.gz'):
if path.endswith('.bed.gz'):
bedtools = which('bedtools')
if not bedtools:
critical('bedtools not found in PATH: ' + str(os.environ['PATH']))
debug('BED is compressed, creating BedTool')
bed = BedTool(path)
else:
debug('BED is uncompressed, creating BedTool')
bed = BedTool(path)
if chrom:
debug('Filtering BEDTool for chrom ' + chrom)
bed = bed.filter(lambda r: r.chrom == chrom)
return bed
else:
return path
示例3: main
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('peaks', help='peaks bed')
p.add_argument('exons', help='refseq exons from UCSC')
p.add_argument('gtf', help='refseq gtf with feature of interest')
p.add_argument('feature', help='feature of interest in the gtf')
p.add_argument('-v', '--verbose', action="store_true", help='maximum verbosity')
args = p.parse_args()
if args.verbose: sys.stderr.write(">> building exon library...\n")
exon_lib = make_exon_lib(args.exons)
peaks = BedTool(args.peaks)
exons = BedTool(args.exons)
full_ref = BedTool(args.gtf)
if args.verbose: sys.stderr.write(">> filtering for feature...\n")
filtered_ref = full_ref.filter(lambda gtf: gtf[2] == args.feature)
if args.verbose: sys.stderr.write(">> selecting exonic peaks...\n")
exonic_peaks = peaks.intersect(exons, wo=True)
if args.verbose: sys.stderr.write(">> calculating distance fractions...\n")
# D for distance (returns negative if upstream)
for peak in exonic_peaks.closest(filtered_ref, D="a"):
try:
p = ComplexLine(peak)
corrected_distance = 0.0
total_exon_length = 0.0
# parse gtf attrs
gene_id = p.gtfattrs.split(';')[0].rstrip('"').lstrip('gene_id "')
# looking downstream wrt peak
if p.gtfdistance > 0:
# exon with peak
corrected_distance = p.exonstop - p.peakstop
for exon in exon_lib[p.exoninfo.name]:
# add downstream exon lengths
if exon > p.exoninfo.number:
corrected_distance += exon_lib[p.exoninfo.name][exon]
# looking upstream wrt peak
else:
# exon with peak
corrected_distance = p.peakstart - p.exonstart
for exon in exon_lib[p.exoninfo.name]:
# add upstream exon lengths
if exon < p.exoninfo.number:
corrected_distance += exon_lib[p.exoninfo.name][exon]
for exon in exon_lib[p.exoninfo.name]:
total_exon_length += exon_lib[p.exoninfo.name][exon]
# fraction
print (corrected_distance / total_exon_length)
except ValueError:
continue
示例4: getCDSs
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def getCDSs(bedfilename, reffilename, strand):
"""
return iterator of coding sequences
"""
bed = BedTool(bedfilename)
bed = bed.filter(lambda x: x.strand == strand)
fasta = reffilename
bed = bed.sequence(fi=fasta, s=True)
return SeqIO.parse(bed.seqfn, "fasta")
示例5: filterReadsByLength
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def filterReadsByLength(inbam, minlength, maxlength):
'''
Takes a bam file and selects intervals that are within the defined lengths.
Input: bam file and min/max lengths
Output: bedTool
'''
# convert bam to bed
intervals = BedTool(inbam).bam_to_bed()
filt = intervals.filter(lambda x: len(x) > minlength and len(x) < maxlength).saveas()
# print filt
return filt
示例6: clean_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def clean_bed(bed_fpath, work_dir):
clean_fpath = intermediate_fname(work_dir, bed_fpath, 'clean')
if not can_reuse(clean_fpath, bed_fpath):
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))
bed = BedTool(bed_fpath)
bed = bed.filter(lambda x: x.chrom and
not any(x.chrom.startswith(e) for e in ['#', ' ', 'track', 'browser']))
bed = bed.remove_invalid()
with file_transaction(work_dir, clean_fpath) as tx_out_file:
bed.saveas(tx_out_file)
verify_bed(clean_fpath, is_critical=True)
debug('Saved clean BED file into ' + clean_fpath)
return clean_fpath
示例7: _bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def _bed(self):
def by_name(rec):
# Drop first part before underscore.
if "_" in self.name:
name = "_".join(self.name.split("_")[1:])
else:
name = self.name
return (name + "*" in rec.name) or (name == rec.name)
bt = BedTool(self.path)
if not self.custom and '_all' not in self.name:
bt = bt.filter(by_name).saveas()
if len(bt) > 0 and len(bt[0].fields) > 6:
bt = bt.bed6().saveas()
return bt
示例8: GenomicSubset
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
class GenomicSubset(object):
def __init__(self, name, path=paths.genome_subsets, assembly='hg19'):
self.assembly = assembly
self.name = name
self.bedtool = BedTool(path + name + '.bed').sort()
# Intersect the pathway with the appropriate genome build
# TODO: this step should be unnecessary if the pathways are correct
if name != self.assembly:
self.bedtool = GenomicSubset.reference_genome(
self.assembly).bedtool.intersect(self.bedtool).sort().saveas()
def expand_by(self, expansion_in_each_direction_Mb):
window_size_str = str(expansion_in_each_direction_Mb) + 'Mb'
print('total size before window addition:', self.bedtool.total_coverage(), 'bp')
# compute the flanks
# TODO: use 1cM instead of 1Mb
print('computing flanks')
flanks = self.bedtool.flank(
genome=self.assembly,
b=expansion_in_each_direction_Mb*1000000).sort().merge().saveas()
# compute the union of the flanks and the pathway
print('computing union')
union = self.bedtool.cat(flanks, postmerge=False).sort()
merged = union.merge().saveas()
print('total size after window addition:', merged.total_coverage(), 'bp')
self.bedtool = merged
def restricted_to_chrom_bedtool(self, chrnum):
return self.bedtool.filter(
lambda x : x[0] == 'chr' + str(int(chrnum))).saveas()
@classmethod
def reference_genome(cls, assembly='hg19'):
return GenomicSubset(assembly, path=paths.reference, assembly=assembly)
@classmethod
def reference_chrom_bedtool(cls, chrnum, assembly='hg19'):
return cls.reference_genome(assembly=assembly).restricted_to_chrom_bedtool(chrnum)
@classmethod
def whole_genome(cls, assembly='hg19'):
return cls(assembly, path=paths.reference)
示例9: _get_genome_bedtool
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def _get_genome_bedtool(self, genome_name, region, genes=None):
"""get the bedtool object for a genome depending on the name and the region"""
genome = Genome.path_by_name(genome_name)
mapping = { "any": "all",
"CDS": "cds",
"3prime": "3_utr",
"5prime": "5_utr",
"intron": "intron",
"intergenic": "intergenic" }
if region not in mapping:
raise ValueError("Invalid region: %r" % region)
else:
bed = BedTool(path.join(genome, "%s.gff" % mapping[region]))
# Optionally, filter by gene.
if genes is None or 'all' in genes:
return bed
else:
return bed.filter(lambda x: x.name in genes).saveas()
示例10: add_bed
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
def add_bed(self, bedfile):
"""Add a list of pybedtools Interval objects to self as self.bed.
Requires pybedtools, adds only records for snps in this individual.
Note: This is a slow operation.
:returns: True on success, False on failure.
"""
try:
from pybedtools import BedTool
except ImportError:
logme.log('add_bed() failed.\n' +
'pybedtools is not installed.\n' +
'Please install and try again. You can get it from here:\n' +
'https://github.com/daler/pybedtools',
level='error')
return False
bed = BedTool(bedfile)
self.bed = [i for i in bed.filter(lambda a: a.name in self.snps)]
return True
示例11: chrom_filter
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import filter [as 别名]
maxdist=6
seqnames=list()
for a in amplicons:
seqnames.append(a.chrom)
def chrom_filter(feature,chrom):
return feature.chrom==chrom
for chr in set(seqnames):
left_lengths=dict()
right_lengths=dict()
a_starts=dict()
a_ends=dict()
amplicons_chrom=amplicons.filter(chrom_filter, chrom=chr)
for a in amplicons_chrom:
left_lengths[a.name]=map(int,a.fields[10].split(","))[0]
right_lengths[a.name]=map(int,a.fields[10].split(","))[1]
a_starts[a.name]=a.start
a_ends[a.name]=a.stop
chrom=chr[3:]
print chrom
for read in samfile.fetch(str(chrom)):
if read.is_reverse:
if read.is_unmapped==False:
dists=dict()
abs_dists=dict()
for k, v in a_ends.items():
dists[k]=read.reference_end-v