本文整理汇总了Python中scilifelab.utils.misc.filtered_walk函数的典型用法代码示例。如果您正苦于以下问题:Python filtered_walk函数的具体用法?Python filtered_walk怎么用?Python filtered_walk使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了filtered_walk函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find_samples
def find_samples(path, sample=None, pattern = "-bcbb-config.yaml$", only_failed=False, **kw):
"""Find bcbb config files in a path.
:param path: path to search in
:param sample: a specific sample, or a file consisting of -bcbb-config.yaml files
:param pattern: pattern to search for
:returns: list of file names
"""
def bcbb_yaml_filter(f):
return re.search(pattern, f) != None
flist = []
if sample:
if os.path.exists(sample):
with open(sample) as fh:
samplelist = fh.readlines()
flist = [x.rstrip() for x in samplelist if re.search(pattern, x)]
if len(flist) == 0:
flist = [os.path.join(path, x.rstrip()) for x in samplelist if len(x) > 1]
# Make sure there actually is a config file in path
flist = list(chain.from_iterable([filtered_walk(x, bcbb_yaml_filter, exclude_dirs=kw.get("exclude_dirs", None), include_dirs=kw.get("include_dirs", None)) for x in flist]))
if len(flist) == 0:
return flist
else:
pattern = "{}{}".format(sample, pattern)
if not flist:
flist = filtered_walk(path, bcbb_yaml_filter, exclude_dirs=kw.get("exclude_dirs", None), include_dirs=kw.get("include_dirs", None))
if only_failed:
status = {x:_sample_status(x) for x in flist}
flist = [x for x in flist if _sample_status(x)=="FAIL"]
if len(flist) == 0 and sample:
LOG.info("No such sample {}".format(sample))
return [os.path.abspath(f) for f in flist]
示例2: remove_finished
def remove_finished(self):
if not self._check_pargs(["project"]):
return
# Don't filter out files
def filter_fn(f):
return True
slist = os.listdir(os.path.join(self._meta.root_path, self._meta.path_id))
for s in slist:
spath = os.path.join(self._meta.root_path, self._meta.path_id, s)
if not os.path.isdir(spath):
continue
if not os.path.exists(os.path.join(spath, FINISHED_FILE)):
self.app.log.info("Sample {} not finished; skipping".format(s))
continue
flist = filtered_walk(spath, filter_fn)
dlist = filtered_walk(spath, filter_fn, get_dirs=True)
if os.path.exists(os.path.join(spath, REMOVED_FILE)):
self.app.log.info("Sample {} already removed; skipping".format(s))
continue
if len(flist) > 0 and not query_yes_no("Will remove directory {} containing {} files; continue?".format(s, len(flist)), force=self.pargs.force):
continue
self.app.log.info("Removing {} files from {}".format(len(flist), spath))
for f in flist:
if f == os.path.join(spath, FINISHED_FILE):
continue
self.app.cmd.safe_unlink(f)
self.app.log.info("Removing {} directories from {}".format(len(dlist), spath))
for d in sorted(dlist, reverse=True):
self.app.cmd.safe_rmdir(d)
if not self.pargs.dry_run:
with open(os.path.join(spath, REMOVED_FILE), "w") as fh:
t_utc = utc_time()
fh.write(t_utc)
示例3: purge_alignments
def purge_alignments(self):
"""Cleanup sam and bam files. In some cases, sam files
persist. If the corresponding bam file exists, replace the sam
file contents with a message that the file has been removed to
save space.
"""
pattern = ".sam$"
def purge_filter(f):
if not pattern:
return
return re.search(pattern, f) != None
flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), purge_filter)
if len(flist) == 0:
self.app.log.info("No sam files found")
return
if len(flist) > 0 and not query_yes_no("Going to remove/cleanup {} sam files ({}...). Are you sure you want to continue?".format(len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
return
for f in flist:
self.app.log.info("Purging sam file {}".format(f))
self.app.cmd.safe_unlink(f)
if os.path.exists(f.replace(".sam", ".bam")):
self.app.cmd.write(f, "File removed to save disk space: SAM converted to BAM")
## Find bam files in alignments subfolders
pattern = ".bam$"
flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), purge_filter, include_dirs=["alignments"])
for f in flist:
f_tgt = [f.replace(".bam", "-sort.bam"), os.path.join(os.path.dirname(os.path.dirname(f)),os.path.basename(f) )]
for tgt in f_tgt:
if os.path.exists(tgt):
self.app.log.info("Purging bam file {}".format(f))
self.app.cmd.safe_unlink(f)
self.app.cmd.write(f, "File removed to save disk space: Moved to {}".format(os.path.abspath(tgt)))
示例4: hs_metrics
def hs_metrics(self):
if not self._check_pargs(["project", "targets"]):
return
if not self.pargs.baits:
self.pargs.baits = self.pargs.targets
self.log.info("hs_metrics: This is a temporary solution for calculating hs metrics for samples using picard tools")
pattern = "{}.bam$".format(self.pargs.hs_file_type)
def filter_fn(f):
return re.search(pattern, f) != None
### FIX ME: this isn't caught by _process_args
flist = []
path = self.pargs.flowcell if self.pargs.flowcell else self.pargs.project
basedir = os.path.abspath(os.path.join(self.app.controller._meta.root_path, self.app.controller._meta.path_id))
samples = find_samples(basedir, **vars(self.pargs))
inc_dirs = [os.path.dirname(x) for x in samples]
flist = filtered_walk(os.path.join(self.config.get(self.app.controller._meta.label, "root"), path), filter_fn=filter_fn, exclude_dirs=['nophix', 'alignments', 'fastqc', 'fastq_screen'], include_dirs=inc_dirs)
if not query_yes_no("Going to run hs_metrics on {} files. Are you sure you want to continue?".format(len(flist)), force=self.pargs.force):
return
for f in flist:
self.log.info("running CalculateHsMetrics on {}".format(f))
### Issue with calling java from
### subprocess:http://stackoverflow.com/questions/9795249/issues-with-wrapping-java-program-with-pythons-subprocess-module
### Actually not an issue: command line arguments have to be done the right way
cl = ["java"] + ["-{}".format(self.pargs.java_opts)] + ["-jar", "{}/CalculateHsMetrics.jar".format(os.getenv("PICARD_HOME"))] + ["INPUT={}".format(f)] + ["TARGET_INTERVALS={}".format(os.path.abspath(self.pargs.targets))] + ["BAIT_INTERVALS={}".format(os.path.abspath(self.pargs.baits))] + ["OUTPUT={}".format(f.replace(".bam", ".hs_metrics"))] + ["VALIDATION_STRINGENCY=SILENT"]
out = self.app.cmd.command(cl)
if out:
self.app._output_data["stdout"].write(out.rstrip())
示例5: get_file_copy_list
def get_file_copy_list(proj_base_dir, dest_proj_path, fcid, deliver_all_fcs, deliver_nophix, skip_list):
to_copy = []
for fqfile in filtered_walk(
proj_base_dir, is_fastq, include_dirs=[fcid] if not deliver_all_fcs else None, exclude_dirs=skip_list
):
# Get the run_name and sample_name from the path
sample_name, run_name, _ = os.path.relpath(fqfile, proj_base_dir).split(os.sep, 2)
date, fc_id = run_name.split("_")
# Skip if we deliver from nophix and the parent dir is not nophix (or vice versa)
pdir = os.path.basename(os.path.dirname(fqfile))
if deliver_nophix and pdir != "nophix":
continue
if not deliver_nophix and pdir != run_name:
continue
# Skip if a compressed version of the current file exists
if os.path.exists("{:s}.gz".format(fqfile)):
print (
"WARNING: Both compressed and non-compressed versions of {:s} exists! "
"Is compression/decompression in progress? Will deliver compressed version "
"but you should make sure that the delivered files are complete!".format(fqfile)
)
continue
print ("DEBUG: source_delivery_path = {:s}".format(os.path.dirname(fqfile)))
fname = os.path.basename(fqfile)
print (fname)
dest_run_path = os.path.join(dest_proj_path, sample_name, run_name)
dest_file_name = create_final_name(fname, date, fc_id, sample_name)
to_copy.append([fqfile, dest_run_path, dest_file_name])
return to_copy
示例6: remove_files
def remove_files(f, **kw):
## Remove old files if requested
keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$", "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$", "_[0-9]+_fastq.txt.gz$", "_[0-9]+_fastq.txt$",
"^[0-9][0-9]_.*.txt$", "JOBID", "PID"]
pattern = "|".join(keep_files)
def remove_filter_fn(f):
return re.search(pattern, f) == None
workdir = os.path.dirname(f)
remove_files = filtered_walk(workdir, remove_filter_fn)
remove_dirs = filtered_walk(workdir, remove_filter_fn, get_dirs=True)
if len(remove_files) == 0:
pass
if len(remove_files) > 0 and query_yes_no("Going to remove {} files and {} directories... Are you sure you want to continue?".format(len(remove_files), len(remove_dirs)), force=kw['force']):
[dry_unlink(x, dry_run=kw['dry_run']) for x in remove_files]
## Sort directories by length so we don't accidentally try to remove a non-empty dir
[dry_rmdir(x, dry_run=kw['dry_run']) for x in sorted(remove_dirs, key=lambda x: len(x), reverse=True)]
示例7: _to_casava_structure
def _to_casava_structure(self, fc):
transfer_status = {}
outdir_pfx = os.path.abspath(os.path.join(self.app.config.get("project", "root"), self.pargs.project, "data"))
if self.pargs.transfer_dir:
outdir_pfx = os.path.abspath(
os.path.join(self.app.config.get("project", "root"), self.pargs.transfer_dir, "data")
)
for sample in fc:
key = "{}_{}".format(sample["lane"], sample["sequence"])
sources = {"files": self._prune_sequence_files(sample["files"]), "results": sample["results"]}
outdir = os.path.join(outdir_pfx, sample["name"], fc.fc_id())
dirs = {
"data": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())),
"intermediate": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())),
}
self._make_output_dirs(dirs)
fc_new = fc.subset("lane", sample["lane"]).subset("name", sample["name"])
targets = {
"files": [src.replace(fc.path, dirs["data"]) for src in sources["files"]],
"results": [src.replace(fc.path, dirs["intermediate"]) for src in sources["results"]],
}
fc_new.lane_files = dict(
(k, [os.path.join(outdir, os.path.basename(x)) for x in v]) for k, v in fc_new.lane_files.items()
)
fc_new.set_entry(key, "files", targets["files"])
fc_new.set_entry(key, "results", targets["results"])
## Copy sample files - currently not doing lane files
self._transfer_files(sources, targets)
self.app.cmd.write(
os.path.join(dirs["data"], "{}-bcbb-pm-config.yaml".format(sample["name"])), fc_new.as_yaml()
)
transfer_status[sample["name"]] = {"files": len(sources["files"]), "results": len(sources["results"])}
## Rewrite platform_args; only keep time, workdir, account, partition, outpath and jobname
pattern = "-post_process.yaml$"
def pp_yaml_filter(f):
return re.search(pattern, f) != None
ppfiles = filtered_walk(dirs["data"], pp_yaml_filter)
for pp in ppfiles:
self.app.log.debug("Rewriting platform args for {}".format(pp))
with open(pp, "r") as fh:
conf = yaml.load(fh)
if not conf:
self.app.log.warn("No configuration for {}".format(pp))
continue
newconf = prune_pp_platform_args(conf)
if newconf == conf:
continue
self.app.cmd.safe_unlink(pp)
self.app.cmd.write(pp, yaml.safe_dump(newconf, default_flow_style=False, allow_unicode=True, width=1000))
# Write transfer summary
self.app._output_data["stderr"].write("Transfer summary\n")
self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format("Sample", "Transferred files", "Results"))
for k, v in transfer_status.iteritems():
self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format(k, v["files"], v["results"]))
示例8: test_remove_files
def test_remove_files(self):
"""Test removing files"""
keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$", "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$", "^[0-9][0-9]_.*.txt$"]
pattern = "|".join(keep_files)
def remove_filter_fn(f):
return re.search(pattern, f) == None
flist = find_samples(j_doe_00_05)
for f in flist:
workdir = os.path.dirname(f)
remove_files = filtered_walk(workdir, remove_filter_fn)
self.assertNotIn("01_analysis_start.txt", [os.path.basename(x) for x in remove_files])
示例9: ls
def ls(self):
if self._meta.path_id == "":
self._ls(self._meta.root_path, filter_output=True)
else:
if self._meta.file_ext:
pattern = "|".join(["{}$".format(x) for x in self._meta.file_ext])
flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), file_filter)
if flist:
self.app._output_data["stdout"].write("\n".join(flist))
else:
self._ls(os.path.join(self._meta.root_path, self._meta.path_id))
示例10: test_remove_dirs
def test_remove_dirs(self):
"""Test removing directories before rerunning pipeline"""
keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$", "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$"]
pattern = "|".join(keep_files)
def remove_filter_fn(f):
return re.search(pattern, f) == None
flist = find_samples(j_doe_00_05)
for f in flist:
workdir = os.path.dirname(f)
remove_dirs = filtered_walk(workdir, remove_filter_fn, get_dirs=True)
self.assertIn("fastqc", [os.path.basename(x) for x in remove_dirs])
示例11: test_casava_transfer
def test_casava_transfer(self):
"""Test transfer of casava data from production to project"""
self.app = self.make_app(argv = ['production', 'transfer', 'J.Doe_00_03', '--debug', '--force', '--quiet'], extensions=[])
handler.register(ProductionController)
self._run_app()
os.chdir(filedir)
j_doe_00_03 = os.path.abspath(os.path.join(filedir, "data", "projects", "j_doe_00_03"))
pattern = ".fastq(.gz)?$"
def fastq_filter(f):
return re.search(pattern, f) != None
fastq_files = filtered_walk(j_doe_00_03, fastq_filter)
self.assertEqual(len(fastq_files), 2)
示例12: setUpClass
def setUpClass(cls):
if not os.getcwd() == filedir:
os.chdir(filedir)
LOG.info("Copy tree {} to {}".format(j_doe_00_01, j_doe_00_04))
if not os.path.exists(j_doe_00_04):
shutil.copytree(j_doe_00_01, j_doe_00_04)
pattern = "-bcbb-config.yaml$"
def yaml_filter(f):
return re.search(pattern, f) != None
yaml_files = filtered_walk(j_doe_00_04, yaml_filter)
with open(SAMPLEFILE, "w") as fh:
fh.write("\n".join(yaml_files[0:1]))
示例13: clean
def clean(self):
if not self._check_pargs(["project"]):
return
self._meta.pattern = "|".join(["{}(.gz|.bz2)?$".format(x) for x in self._meta.file_ext])
flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), self._filter_fn, include_dirs=self._meta.include_dirs)
if len(flist) == 0:
self.app.log.info("No files matching pattern '{}' found".format(self._meta.pattern))
return
if len(flist) > 0 and not query_yes_no("Going to remove {} files ({}...). Are you sure you want to continue?".format(len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
return
for f in flist:
self.app.log.info("removing {}".format(f))
self.app.cmd.safe_unlink(f)
示例14: _compress
def _compress(self, label="compress"):
if self.pargs.input_file:
flist = [self.pargs.input_file]
else:
flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), self._filter_fn)
if len(flist) == 0:
self.app.log.info("No files matching pattern '{}' found".format(self._meta.pattern))
return
if len(flist) > 0 and not query_yes_no("Going to {} {} files ({}...). Are you sure you want to continue?".format(label, len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
sys.exit()
for f in flist:
self.log.info("{}ing {}".format(label, f))
self.app.cmd.command([self._meta.compress_prog, self._meta.compress_opt, "%s" % f], label, ignore_error=True, **{'workingDirectory':os.path.dirname(f), 'outputPath':os.path.join(os.path.dirname(f), "{}-{}-drmaa.log".format(label, os.path.basename(f)))})
示例15: best_practice
def best_practice(self):
if not self._check_pargs(["project", "uppmax_project"]):
return
project_path = os.path.normpath(os.path.join("/proj", self.pargs.uppmax_project))
if not os.path.exists(project_path):
self.log.warn("No such project {}; skipping".format(self.pargs.uppmax_project))
return
if self.pargs.outdir:
outpath = os.path.join(project_path, "INBOX", self.pargs.outdir)
else:
outpath = os.path.join(project_path, "INBOX", self.pargs.statusdb_project_name) if self.pargs.statusdb_project_name else os.path.join(project_path, "INBOX", self.pargs.project)
if not query_yes_no("Going to deliver data to {}; continue?".format(outpath)):
return
if not os.path.exists(outpath):
self.app.cmd.safe_makedir(outpath)
kw = vars(self.pargs)
basedir = os.path.abspath(os.path.join(self._meta.root_path, self._meta.path_id))
flist = find_samples(basedir, **vars(self.pargs))
if self.pargs.flowcell:
flist = [ fl for fl in flist if os.path.basename(os.path.dirname(fl)) == self.pargs.flowcell ]
if not len(flist) > 0:
self.log.info("No samples/sample configuration files found")
return
def filter_fn(f):
if not pattern:
return
return re.search(pattern, f) != None
# Setup pattern
plist = [".*.yaml$", ".*.metrics$"]
if not self.pargs.no_bam:
plist.append(".*-{}.bam$".format(self.pargs.bam_file_type))
plist.append(".*-{}.bam.bai$".format(self.pargs.bam_file_type))
if not self.pargs.no_vcf:
plist.append(".*.vcf$")
plist.append(".*.vcf.gz$")
plist.append(".*.tbi$")
plist.append(".*.tsv$")
pattern = "|".join(plist)
size = 0
for f in flist:
path = os.path.dirname(f)
sources = filtered_walk(path, filter_fn=filter_fn, exclude_dirs=BCBIO_EXCLUDE_DIRS)
targets = [src.replace(basedir, outpath) for src in sources]
self._transfer_files(sources, targets)
if self.pargs.size:
statinfo = [os.stat(src).st_size for src in sources]
size = size + sum(statinfo)
if self.pargs.size:
self.app._output_data['stderr'].write("\n********************************\nEstimated delivery size: {:.1f}G\n********************************".format(size/1e9))