本文整理汇总了Python中pybedtools.set_tempdir函数的典型用法代码示例。如果您正苦于以下问题:Python set_tempdir函数的具体用法?Python set_tempdir怎么用?Python set_tempdir使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了set_tempdir函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: genotype_intervals
def genotype_intervals(intervals_file=None, bam=None, workdir=None, window=GT_WINDOW, isize_mean=ISIZE_MEAN, isize_sd=ISIZE_SD, normal_frac_threshold=GT_NORMAL_FRAC):
func_logger = logging.getLogger("%s-%s" % (genotype_intervals.__name__, multiprocessing.current_process()))
if workdir and not os.path.isdir(workdir):
os.makedirs(workdir)
pybedtools.set_tempdir(workdir)
genotyped_intervals = []
start_time = time.time()
isize_min = max(0, isize_mean - 3 * isize_sd)
isize_max = isize_mean + 3 * isize_sd
try:
bam_handle = pysam.Samfile(bam, "rb")
for interval in pybedtools.BedTool(intervals_file):
chrom, start, end, sv_type, svlen = parse_interval(interval)
genotype = genotype_interval(chrom, start, end, sv_type, svlen, bam_handle, isize_min, isize_max, window, normal_frac_threshold)
fields = interval.fields + [genotype]
genotyped_intervals.append(pybedtools.create_interval_from_list(fields))
bedtool = pybedtools.BedTool(genotyped_intervals).moveto(os.path.join(workdir, "genotyped.bed"))
except Exception as e:
func_logger.error('Caught exception in worker thread')
# This prints the type, value, and stack trace of the
# current exception being handled.
traceback.print_exc()
print()
raise e
func_logger.info("Genotyped %d intervals in %g minutes" % (len(genotyped_intervals), (time.time() - start_time)/60.0))
return bedtool.fn
示例2: cleanup_unwriteable
def cleanup_unwriteable():
"""
Reset to normal tempdir operation....
"""
if os.path.exists(unwriteable):
os.system('rm -rf %s' % unwriteable)
pybedtools.set_tempdir(test_tempdir)
示例3: test_stream
def test_stream():
"""
Stream and file-based equality, both whole-file and Interval by
Interval
"""
a = pybedtools.example_bedtool('a.bed')
b = pybedtools.example_bedtool('b.bed')
c = a.intersect(b)
# make an unwriteable dir...
orig_tempdir = pybedtools.get_tempdir()
if os.path.exists('unwriteable'):
os.system('rm -rf unwriteable')
os.system('mkdir unwriteable')
os.system('chmod -w unwriteable')
# ...set that to the new tempdir
pybedtools.set_tempdir('unwriteable')
# this should really not be written anywhere
d = a.intersect(b, stream=True)
assert_raises(NotImplementedError, c.__eq__, d)
d_contents = d.fn.read()
c_contents = open(c.fn).read()
assert d_contents == c_contents
# reconstruct d and check Interval-by-Interval equality
pybedtools.set_tempdir('unwriteable')
d = a.intersect(b, stream=True)
for i,j in zip(c, d):
assert str(i) == str(j)
# Now do something similar with GFF files.
a = pybedtools.example_bedtool('a.bed')
f = pybedtools.example_bedtool('d.gff')
# file-based
pybedtools.set_tempdir(orig_tempdir)
g1 = f.intersect(a)
# streaming
pybedtools.set_tempdir('unwriteable')
g2 = f.intersect(a, stream=True)
for i,j in zip(g1, g2):
assert str(i) == str(j)
# this was segfaulting at one point, just run to make sure
g3 = f.intersect(a, stream=True)
for i in iter(g3):
print i
for row in f.cut(range(3), stream=True):
row[0], row[1], row[2]
assert_raises(IndexError, row.__getitem__, 3)
pybedtools.set_tempdir(orig_tempdir)
os.system('rm -fr unwriteable')
示例4: gcn
def gcn(args):
"""
%prog gcn gencode.v26.exonunion.bed data/*.vcf.gz
Compile gene copy njumber based on CANVAS results.
"""
p = OptionParser(gcn.__doc__)
p.set_cpus()
p.set_tmpdir(tmpdir="tmp")
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) < 2:
sys.exit(not p.print_help())
exonbed = args[0]
canvasvcfs = args[1:]
tsvfile = opts.outfile
tmpdir = opts.tmpdir
mkdir(tmpdir)
set_tempdir(tmpdir)
df = vcf_to_df(canvasvcfs, exonbed, opts.cpus)
for suffix in (".avgcn", ".medcn"):
df_to_tsv(df, tsvfile, suffix)
示例5: determine_sex
def determine_sex(work_dir, bam_fpath, avg_depth, genome, target_bed=None):
debug()
debug('Determining sex')
pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))
male_bed = None
for k in chry_key_regions_by_genome:
if k in genome:
male_bed = BedTool(chry_key_regions_by_genome.get(k))
break
if not male_bed:
warn('Warning: no male key regions for ' + genome + ', cannot identify sex')
return None
male_area_size = get_total_bed_size(male_bed)
debug('Male region total size: ' + str(male_area_size))
if target_bed:
target_male_bed = join(work_dir, 'male.bed')
with file_transaction(work_dir, target_male_bed) as tx:
BedTool(target_bed).intersect(male_bed).merge().saveas(tx)
target_male_area_size = get_total_bed_size(target_male_bed)
if target_male_area_size == 0:
debug('The male non-PAR region does not overlap with the capture target - cannot determine sex.')
return None
male_bed = target_male_bed
else:
debug('WGS, determining sex based on chrY key regions coverage.')
info('Detecting sex by comparing the Y chromosome key regions coverage and average coverage depth.')
if not bam_fpath:
critical('BAM file is required.')
index_bam(bam_fpath)
chry_mean_coverage = _calc_mean_coverage(work_dir, male_bed, bam_fpath, 1)
debug('Y key regions average depth: ' + str(chry_mean_coverage))
avg_depth = float(avg_depth)
debug('Sample average depth: ' + str(avg_depth))
if avg_depth < AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX:
debug('Sample average depth is too low (less than ' + str(AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX) +
') - cannot determine sex')
return None
if chry_mean_coverage == 0:
debug('Y depth is 0 - it\s female')
sex = 'F'
else:
factor = avg_depth / chry_mean_coverage
debug('Sample depth / Y depth = ' + str(factor))
if factor > FEMALE_Y_COVERAGE_FACTOR: # if mean target coverage much higher than chrY coverage
debug('Sample depth is more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s female')
sex = 'F'
else:
debug('Sample depth is not more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s male')
sex = 'M'
debug('Sex is ' + sex)
debug()
return sex
示例6: bedtools_tmpdir
def bedtools_tmpdir(data):
with tx_tmpdir(data) as tmpdir:
orig_tmpdir = tempfile.gettempdir()
pybedtools.set_tempdir(tmpdir)
yield
if orig_tmpdir and os.path.exists(orig_tmpdir):
pybedtools.set_tempdir(orig_tmpdir)
else:
tempfile.tempdir = None
示例7: run_spades_parallel
def run_spades_parallel(bam=None, spades=None, bed=None, work=None, pad=SPADES_PAD, nthreads=1, chrs=[],
max_interval_size=SPADES_MAX_INTERVAL_SIZE,
timeout=SPADES_TIMEOUT, isize_min=ISIZE_MIN, isize_max=ISIZE_MAX,
svs_to_assemble=SVS_ASSEMBLY_SUPPORTED,
stop_on_fail=False, max_read_pairs=EXTRACTION_MAX_READ_PAIRS):
pybedtools.set_tempdir(work)
logger.info("Running SPAdes on the intervals in %s" % bed)
if not bed:
logger.info("No BED file specified")
return None, None
bedtool = pybedtools.BedTool(bed)
total = bedtool.count()
chrs = set(chrs)
all_intervals = [interval for interval in bedtool] if not chrs else [interval for interval in bedtool if
interval.chrom in chrs]
selected_intervals = filter(partial(should_be_assembled, max_interval_size=max_interval_size, svs_to_assemble=svs_to_assemble),
all_intervals)
ignored_intervals = filter(partial(shouldnt_be_assembled, max_interval_size=max_interval_size, svs_to_assemble=svs_to_assemble),
all_intervals)
pool = multiprocessing.Pool(nthreads)
assembly_fastas = []
for i in xrange(nthreads):
intervals = [interval for (j, interval) in enumerate(selected_intervals) if (j % nthreads) == i]
kwargs_dict = {"intervals": intervals, "bam": bam, "spades": spades, "work": "%s/%d" % (work, i), "pad": pad,
"timeout": timeout, "isize_min": isize_min, "isize_max": isize_max, "stop_on_fail": stop_on_fail,
"max_read_pairs": max_read_pairs}
pool.apply_async(run_spades_single, kwds=kwargs_dict,
callback=partial(run_spades_single_callback, result_list=assembly_fastas))
pool.close()
pool.join()
logger.info("Merging the contigs from %s" % (str(assembly_fastas)))
assembled_fasta = os.path.join(work, "spades_assembled.fa")
with open(assembled_fasta, "w") as assembled_fd:
for line in fileinput.input(assembly_fastas):
assembled_fd.write("%s\n" % (line.strip()))
if os.path.getsize(assembled_fasta) > 0:
logger.info("Indexing the assemblies")
pysam.faidx(assembled_fasta)
else:
logger.error("No assembly generated")
assembled_fasta = None
ignored_bed = None
if ignored_intervals:
ignored_bed = os.path.join(work, "ignored.bed")
pybedtools.BedTool(ignored_intervals).each(add_breakpoints).saveas(ignored_bed)
pybedtools.cleanup(remove_all=True)
return assembled_fasta, ignored_bed
示例8: parallel_genotype_intervals
def parallel_genotype_intervals(intervals_file=None, bam=None, workdir=None, nthreads=1, chromosomes=[],
window=GT_WINDOW, isize_mean=ISIZE_MEAN, isize_sd=ISIZE_SD,
normal_frac_threshold=GT_NORMAL_FRAC):
func_logger = logging.getLogger("%s-%s" % (parallel_genotype_intervals.__name__, multiprocessing.current_process()))
if not intervals_file:
func_logger.warning("No intervals file specified. Perhaps no intervals to process")
return None
if workdir and not os.path.isdir(workdir):
os.makedirs(workdir)
chromosomes = set(chromosomes)
start_time = time.time()
bedtool = pybedtools.BedTool(intervals_file)
selected_intervals = [interval for interval in bedtool if not chromosomes or interval.chrom in chromosomes]
nthreads = min(len(selected_intervals), nthreads)
intervals_per_process = (len(selected_intervals) + nthreads - 1) / nthreads
pool = multiprocessing.Pool(nthreads)
genotyped_beds = []
for i in xrange(nthreads):
process_workdir = os.path.join(workdir, str(i))
if not os.path.isdir(process_workdir):
os.makedirs(process_workdir)
process_intervals = pybedtools.BedTool(
selected_intervals[i * intervals_per_process: (i + 1) * intervals_per_process]).saveas(
os.path.join(process_workdir, "ungenotyped.bed"))
kwargs_dict = {"intervals_file": process_intervals.fn, "bam": bam, "workdir": process_workdir, "window": window,
"isize_mean": isize_mean, "isize_sd": isize_sd, "normal_frac_threshold": normal_frac_threshold}
pool.apply_async(genotype_intervals, kwds=kwargs_dict,
callback=partial(genotype_intervals_callback, result_list=genotyped_beds))
pool.close()
pool.join()
func_logger.info("Following BED files will be merged: %s" % (str(genotyped_beds)))
if not genotyped_beds:
func_logger.warn("No intervals generated")
return None
pybedtools.set_tempdir(workdir)
bedtool = pybedtools.BedTool(genotyped_beds[0])
for bed_file in genotyped_beds[1:]:
bedtool = bedtool.cat(pybedtools.BedTool(bed_file), postmerge=False)
bedtool = bedtool.sort().moveto(os.path.join(workdir, "genotyped.bed"))
func_logger.info("Finished parallel genotyping of %d intervals in %g minutes" % (
len(selected_intervals), (time.time() - start_time) / 60.0))
return bedtool.fn
示例9: make_unwriteable
def make_unwriteable():
"""
Make a directory that cannot be written to and set the pybedtools tempdir
to it. This is used to isolate "streaming" tests to ensure they do not
write to disk.
"""
if os.path.exists(unwriteable):
os.system('rm -rf %s' % unwriteable)
os.system('mkdir -p %s' % unwriteable)
os.system('chmod -w %s' % unwriteable)
pybedtools.set_tempdir(unwriteable)
示例10: overlap_target_counts
def overlap_target_counts(bam_file, target_file, config):
"""Overlap BAM alignment file with shRNA targets.
"""
out_dir = safe_makedir(config["dir"]["counts"])
out_file = os.path.join(out_dir,
"{0}.bed".format(os.path.splitext(os.path.basename(bam_file))[0]))
if not file_exists(out_file):
pybedtools.set_tempdir(out_dir)
bed_read_file = pybedtools.BedTool(bam_file).bam_to_bed()
counts = pybedtools.BedTool(target_file).intersect(bed_read_file, c=True)
counts.saveas(out_file)
return out_file
示例11: identify_targets
def identify_targets(bam_files, config, out_base="shrna_targets"):
"""Create BED file of target regions based on input BAM alignments
"""
work_dir = safe_makedir(config["dir"]["annotation"])
pybedtools.set_tempdir(work_dir)
out_file = os.path.join(work_dir, "{0}.bed".format(out_base))
if not file_exists(out_file):
pybed_files = [pybedtools.BedTool(x) for x in bam_files]
bed_files = [x.bam_to_bed() for x in pybed_files]
combined_bed = reduce(lambda x, y: x.cat(y), bed_files)
merge_bed = combined_bed.merge(d=config["algorithm"].get("merge_distance", 0))
merge_bed.saveas(out_file)
return out_file
示例12: intersect_bed
def intersect_bed(bed_name, bed_filter):
"""KEEPS regions of annotation of interest that overlap with
repeat-masked regions
"""
pybedtools.set_tempdir('/sc/orga/scratch/richtf01')
if not os.path.isfile(bed_name + '.Rmsk.bed'):
bed = BedTool(bed_name + '.merged.sorted.bed')
print "Keeping calls in rmsk from " + bed_name + "..."
bed_overlap = bed.intersect(bed_filter)
bed_overlap.saveas(bed_name + '.Rmsk.bed')
print bed_name + " done!"
else:
print bed_name + " rmsk calls already isolated"
示例13: subtract_bed_sd
def subtract_bed_sd(bed_name, bed_filter):
"""REMOVES regions of annotation of interest that overlap with
segmental duplications
"""
pybedtools.set_tempdir('/sc/orga/scratch/richtf01')
if not os.path.isfile(bed_name + '.noRmsk.noSD.bed'):
bed = BedTool(bed_name + '.noRmsk.bed')
print "Removing calls in seg dup from " + bed_name + "..."
bed_no_overlap = bed.subtract(bed_filter)
bed_no_overlap.saveas(bed_name + '.noRmsk.noSD.bed')
print bed_name + " done!"
else:
print bed_name + " Seg dup calls already removed"
示例14: subtract_bed_rmsk
def subtract_bed_rmsk(bed_name, bed_filter):
"""REMOVES regions of annotation of interest that overlap with
repeat-masked regions
"""
pybedtools.set_tempdir('/sc/orga/scratch/richtf01')
if not os.path.isfile(bed_name + '.noRmsk.bed'):
bed = BedTool(bed_name + '.bed') # .merged.sorted
print "Removing calls in rmsk from " + bed_name + "..."
bed_no_overlap = bed.subtract(bed_filter)
bed_no_overlap.saveas(bed_name + '.noRmsk.bed')
print bed_name + " done!"
else:
print bed_name + " rmsk calls already removed"
示例15: merge_bed
def merge_bed(bed_name):
""" MERGES a bed file after removing rmsk, sd
"""
pybedtools.set_tempdir('/sc/orga/scratch/richtf01')
bed_in = bed_name + '.sorted.noRmsk.noSD.bed'
bed_out = bed_name + '.merged.sorted.noRmsk.noSD.bed'
if not os.path.isfile(bed_out):
bed = BedTool(bed_in)
print "Merging " + bed_in + "..."
bed_merged = bed.merge()
bed_merged.saveas(bed_out)
print bed_name + " done!"
else:
print bed_out + " already merged"