本文整理汇总了Python中pybedtools.BedTool.intersect方法的典型用法代码示例。如果您正苦于以下问题:Python BedTool.intersect方法的具体用法?Python BedTool.intersect怎么用?Python BedTool.intersect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybedtools.BedTool
的用法示例。
在下文中一共展示了BedTool.intersect方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: intersector
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def intersector(sampfile, filtfile, specfr):
###Takes two bed files and will intersect them. The specfr argument is used to tell whether to keep or remove the regions that overlap in the two files given.###
sf = BedTool(sampfile)
ff = BedTool(filtfile)
if specfr == 'Keep':
endfile = sf.intersect(ff, u=True)
elif specfr == 'Discard':
endfile = sf.intersect(ff, u=False)
else: print "ERROR: Incorrect specifying argument given, please use 'Keep' or 'Discard'."
return endfile
示例2: generate_background
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def generate_background(foipath,gfpath,background):
"""accepts a background filepath
generate a background and returns as a pybedtool.
Replaces the chrom fields of the foi and the gf with the interval
id from the background.
"""
bckg = background
bckgnamed = ""
interval = 0
#inserts a unique interval id into the backgrounds name field
for b in bckg:
bckgnamed += "\t".join(b[:3])+'\t{}\t'.format(interval) + "\t".join(b[4:]) + "\n"
interval += 1
bckg = BedTool(bckgnamed,from_string=True)
foi = BedTool(str(foipath))
gf = BedTool(str(gfpath))
# get the interval names from the background that the gf intersects with
gf = bckg.intersect(gf)
gfnamed = ""
# insert the interval id into the chrom field of the gf and creates a new bedtool
for g in gf:
gfnamed += '{}\t'.format(g.name) + "\t".join(g[1:]) + "\n"
#print "GFNAMED: " + str(g)
gf = BedTool(gfnamed,from_string=True)
#print "GFBEDTOOL: " + str(g)
# inserts the interval id into the chrom column of the foi and creates a new bedtool
foi = bckg.intersect(foi)
foinamed = ""
for f in foi:
foinamed += '{}\t'.format(f.name) + "\t".join(f[1:])+"\n"
#print "FOINAMED: " + str(f)
foi = BedTool(foinamed,from_string=True)
#print "FOIBEDTOOL: " + str(f)
bckgnamed = ""
for b in bckg:
bckgnamed += '{}\t'.format(b.name) + "\t".join(b[1:])+"\n"
bckg = BedTool(bckgnamed,from_string=True)
# converts the background to a genome dictionary
chrstartend = [(g.start,g.end) for g in bckg]
background = dict(zip([g.chrom for g in bckg],chrstartend))
return {"foi": foi,"gf":gf,"background":background}
run_pvalue=False,run_pybedtool=False,run_jaccard=False,run_proximity=False,run_kolmogorov=False
示例3: generate_bed_file_annotations
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def generate_bed_file_annotations(bed_directory, output_directory, loci):
"""
Generates the annotation file for every bed file in the bed_directory folder
"""
# Loop over the bed files in the bed directory.
bed_file_list = glob.glob(os.path.join(bed_directory, "*.bed"))
logging.info("Start to generate BED file annotations")
logging.info("Writing annotation to: {0}/".format(output_directory))
for locus in loci:
zscore = os.path.join(output_directory, locus)
bed_lines, rsids = _bed_from_zscore(zscore)
tmp_bed = open("tmp.bed","w").writelines(bed_lines)
snps = BedTool("tmp.bed")
no_snps = _get_line_number(zscore)
a_matrix= AnnotateLociMatrix(len(bed_file_list), no_snps)
logging.info("Annotating locus: {0}, using VCF file {1}".format(locus, zscore))
for beds in bed_file_list:
test_annotation = BedTool(beds)
inter = snps.intersect(test_annotation)
idxs = []
for inte in inter:
idxs.append(rsids.index(inte.name))
zeroes = np.zeros(len(rsids))
for idx in idxs:
zeroes[idx] = 1
a_matrix.add_annotation(zeroes, beds)
annotations_file = os.path.join(output_directory, locus + ".annotations")
logging.info("Writing annotation matrix to: {0}".format(annotations_file))
a_matrix.write_annotations(annotations_file)
os.remove("tmp.bed")
示例4: get_coverage
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def get_coverage(bed_prefix, directory, file_prefix, bam):
"""
Coverage at all positions is calculated. This is then used for coverage analysis and to determine read depth at any
false negative sites
:param bed_prefix: all regions in the bed files submitted are in a file generated during intersections
:param directory: location of patient results
:param file_prefix: prefix used for all files in pipeline i.e. worklist-patient
:return out: filename for coverage stats
"""
#TODO change BAM path so filename is not required
print 'Generating coverage stats.'
whole_bed = '/results/Analysis/MiSeq/MasterBED/GIAB/' + bed_prefix + '.whole.bed'
out = directory + '/giab_results/whole_bed_coverage.txt'
command = '/results/Pipeline/program/sambamba/build/sambamba depth base --min-coverage=0 -q29 -m -L ' + whole_bed + \
' ' + bam + ' > ' + out + '.tmp'
try:
subprocess.check_call(command, shell=True)
except subprocess.CalledProcessError as e:
print 'Error executing command:' + str(e.returncode)
exit(1)
print 'Sambamba complete.'
#issue with sambamba that leaves out regions that have 0 coverage - intersect regions to find missing and add
# them to the file at coverage 0
temp_bed = out.replace('.txt', '.bed.tmp')
command = 'awk \'{print($1"\\t"$2"\\t"$2+1"\\t"$3)}\' ' + out + '.tmp | grep -v "COV" > ' + temp_bed
print command
try:
subprocess.check_call(command, shell=True)
print 'BED coordinates extracted.'
except subprocess.CalledProcessError as e:
print 'Error executing command:' + str(e.returncode)
exit(1)
coverage_bed = BedTool(temp_bed)
print 'BED tool created'
whole_bedtool = BedTool(whole_bed)
print 'Intersecting'
missing_regions = whole_bedtool.intersect(coverage_bed, v=True)
missing_file = directory + '/giab_results/regions_missing'
missing_regions.moveto(missing_file)
print 'Generating file'
sample_split = file_prefix.split('-')
sample = sample_split[1] + '-' + sample_split[2]
command = '''while read i; do start=`echo "$i"|cut -f2`; end=`echo "$i"|cut -f3`; chr=`echo "$i"|cut -f1`; end_true=`echo "${end} - 1" | bc`; for j in $(seq $start $end_true); do new_end=`echo -e "${j} + 1" | bc`; echo -e "$chr\\t${j}\\t0\\t0\\t0\\t0\\t0\\t0\\t0\\t''' + sample + '''";done;done < ''' + missing_file + '> ' + directory + '/to_add'
print command
try:
subprocess.check_call(command, shell=True)
except subprocess.CalledProcessError as e:
print 'Error executing command:' + str(e.returncode)
exit(1)
command = 'cat ' + out + '.tmp ' + directory + '/to_add > ' + out
try:
subprocess.check_call(command, shell=True)
except subprocess.CalledProcessError as e:
print 'Error executing command:' + str(e.returncode)
exit(1)
print 'fix complete.'
return out
示例5: gene_regions
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def gene_regions(vf, af):
v = BedTool(vf)
feats = BedTool(af)
# first establish all the columns in the annotation file
cols = set(f[4] for f in feats)
results = {}
intersection = v.intersect(feats, wb=True)
if len(intersection) > 0:
annots = intersection.groupby(g=[1,2,3,4], c=9, ops='collapse')
for entry in annots:
regions = {}
for region in entry[4].split(','):
if region in regions:
regions[region] += 1
else:
regions[region] = 1
results[entry.name] = Series(regions)
df = DataFrame(results, index = cols)
return df.T.fillna(0)
示例6: annotate_peaks
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def annotate_peaks(notsif, beds, names):
"""Takes notsif, transforms to bed, and outputs annotation of where the
miRNA seed is interrogating via Cytoscape edge attribute file.
"""
strand = find_strand_from_filename(notsif)
mirna_bed = BedTool(notsif_to_bed(notsif, strand), from_string=True)
# create the reference beds
reference = {}
for name, bed in izip(names, beds):
reference[name] = BedTool(bed)
for name in names:
# intersect the mirna bed with the reference annotations
for hit in mirna_bed.intersect(reference[name], s=True, stream=True):
# name field returned from notsif_to_bed is delimited by "|"
mirna_name = hit.name.split("|")[0]
gene_name = hit.name.split("|")[1]
# Cytoscape formatting
seed_length = "(%s)" % hit.score
fields = (mirna_name, seed_length, gene_name, "=", name)
print " ".join(map(str, fields))
示例7: segmentations
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def segmentations(vf, af):
v = BedTool(vf)
feats = BedTool(af)
results = {}
intersection = v.intersect(feats, wb=True)
if len(intersection) > 0:
sort_cmd1 = 'awk -F \'\t\' \'{print $1"\t"$2"\t"$3"\t"$4"\t"$8"\t"$5"_"$6"_"$7"_"$8"_"$9}\' %s 1<>%s' % (intersection.fn, intersection.fn)
call(sort_cmd1, shell=True)
annots = intersection.groupby(g=[1,2,3,4,5], c=6, ops='collapse')
for entry in annots:
regions = {}
regions[entry[4]] = entry[5]
results[entry.name] = Series(regions)
names = {
'CTCF': 'CTCF_REG',
'E': 'ENH',
'PF': 'TSS_FLANK',
'R': 'REP',
'T': 'TRAN',
'TSS': 'TSS',
'WE': 'WEAK_ENH'
}
return DataFrame(results, index=names.keys()).T.rename(columns=names)
示例8: calculate_ovl
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def calculate_ovl(nbedfile, obedfile, opts, scoresfile):
nbedtool = BedTool(nbedfile)
obedtool = BedTool(obedfile)
ab = nbedtool.intersect(obedtool, wao=True, f=opts.f, r=opts.r, s=opts.s)
cmd = """cut -f4,5,10,13 | awk -F $'\t' 'BEGIN { OFS = FS } ($3 != "."){ print $1,$3,$2,$4; }'"""
sh(cmd, infile=ab.fn, outfile=scoresfile)
示例9: gene_regions
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def gene_regions(vf, af):
v = BedTool(vf)
feats = BedTool(af)
# first establish all the columns in the annotation file
cols = set(f[4] for f in feats)
results = {}
intersection = v.intersect(feats, wb=True)
if len(intersection) > 0:
#sort_cmd1 = 'awk -F \'\t\' \'{print $1"\t"$2"\t"$3"\t"$4"\t"$9"\t"$5"_"$6"_"$7"_"$8"_"$9}\' %s 1<>%s' % (intersection.fn, intersection.fn)
#call(sort_cmd1, shell=True)
tempfile1 = tempfile.mktemp()
sort_cmd2 = 'awk -F \'\t\' \'{print $1"\t"$2"\t"$3"\t"$4"\t"$9"\t"$5"_"$6"_"$7"_"$8"_"$9}\' %s > %s' % (intersection.fn, tempfile1)
call(sort_cmd2, shell=True)
intersection = BedTool(tempfile1)
annots = intersection.groupby(g=[1,2,3,4,5], c=6, ops='collapse')
for entry in annots:
regions = {}
regions[entry[4]] = entry[5]
results[entry.name] = Series(regions)
df = DataFrame(results, index = cols)
return df.T.fillna(0)
示例10: main
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('bed', help='bed with miRNA as name')
p.add_argument('--reference-beds', dest='reference', nargs='+',
help='reference beds for each feature to annotate')
p.add_argument('--names', nargs='+',
help='names corresponding to reference files')
args = p.parse_args()
if not args.names and not args.reference:
sys.exit(p.print_help())
bed = BedTool(args.bed)
# create the reference beds
reference = {}
for refname, refbed in izip(args.names, args.reference):
reference[refname] = BedTool(refbed)
for refname in args.names:
# intersect the mirna bed with the reference annotations
for b in bed.intersect(reference[refname], s=True, stream=True):
# Cytoscape formatting
fields = (b.name, "=", refname)
print " ".join(map(str, fields))
示例11: main
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('peaks', help='peaks bed')
p.add_argument('exons', help='refseq exons from UCSC')
p.add_argument('gtf', help='refseq gtf with feature of interest')
p.add_argument('feature', help='feature of interest in the gtf')
p.add_argument('-v', '--verbose', action="store_true", help='maximum verbosity')
args = p.parse_args()
if args.verbose: sys.stderr.write(">> building exon library...\n")
exon_lib = make_exon_lib(args.exons)
peaks = BedTool(args.peaks)
exons = BedTool(args.exons)
full_ref = BedTool(args.gtf)
if args.verbose: sys.stderr.write(">> filtering for feature...\n")
filtered_ref = full_ref.filter(lambda gtf: gtf[2] == args.feature)
if args.verbose: sys.stderr.write(">> selecting exonic peaks...\n")
exonic_peaks = peaks.intersect(exons, wo=True)
if args.verbose: sys.stderr.write(">> calculating distance fractions...\n")
# D for distance (returns negative if upstream)
for peak in exonic_peaks.closest(filtered_ref, D="a"):
try:
p = ComplexLine(peak)
corrected_distance = 0.0
total_exon_length = 0.0
# parse gtf attrs
gene_id = p.gtfattrs.split(';')[0].rstrip('"').lstrip('gene_id "')
# looking downstream wrt peak
if p.gtfdistance > 0:
# exon with peak
corrected_distance = p.exonstop - p.peakstop
for exon in exon_lib[p.exoninfo.name]:
# add downstream exon lengths
if exon > p.exoninfo.number:
corrected_distance += exon_lib[p.exoninfo.name][exon]
# looking upstream wrt peak
else:
# exon with peak
corrected_distance = p.peakstart - p.exonstart
for exon in exon_lib[p.exoninfo.name]:
# add upstream exon lengths
if exon < p.exoninfo.number:
corrected_distance += exon_lib[p.exoninfo.name][exon]
for exon in exon_lib[p.exoninfo.name]:
total_exon_length += exon_lib[p.exoninfo.name][exon]
# fraction
print (corrected_distance / total_exon_length)
except ValueError:
continue
示例12: cpg_islands
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def cpg_islands(vf, af):
print "inside cpg_islands"
v = BedTool(vf)
cpg = BedTool(af)
overlap = v.intersect(cpg, wb=True)
results = dict([ (r.name, 1) for r in overlap ])
print "exit cpg_islands"
return Series(results, name="cpg_island")
示例13: motifs
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def motifs(vf, af):
print "inside motif"
v = BedTool(vf)
cpg = BedTool(af)
overlap = v.intersect(cpg, wb=True)
results = dict([ (r.name, 1) for r in overlap ])
print "exit motif"
return Series(results, name="pwm")
示例14: build_vcf_intervals
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def build_vcf_intervals(reads, vcf_recs, bam_handle):
"""
Find if any of these reads match a known SUN/indel by simple bedtools intersections
"""
vcf_bed_recs = [ChromosomeInterval(x.CHROM, x.start, x.end, None) for x in vcf_recs]
vcf_bed = BedTool(vcf_bed_recs)
reads_bed_recs = [(bam_handle.getrname(x.tid), x.positions[0], x.positions[-1]) for x in reads if len(x.positions) > 2]
reads_bed = BedTool(reads_bed_recs)
return list(vcf_bed.intersect(reads_bed))
示例15: repeats
# 需要导入模块: from pybedtools import BedTool [as 别名]
# 或者: from pybedtools.BedTool import intersect [as 别名]
def repeats(vf, af):
v = BedTool(vf)
feats = BedTool(af)
intersection = v.intersect(feats, wb=True)
results = {}
if len(intersection) > 0:
annots = intersection.groupby(g=[1,2,3,4], c=8, ops='collapse')
for entry in annots:
types = entry[4].split(',')
results[entry.name] = len(types)
return Series(results, name='repeat')