当前位置: 首页>>代码示例>>Python>>正文


Python misc.filtered_walk函数代码示例

本文整理汇总了Python中scilifelab.utils.misc.filtered_walk函数的典型用法代码示例。如果您正苦于以下问题:Python filtered_walk函数的具体用法?Python filtered_walk怎么用?Python filtered_walk使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了filtered_walk函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_samples

def find_samples(path, sample=None, pattern = "-bcbb-config.yaml$", only_failed=False, **kw):
    """Find bcbb config files in a path.

    :param path: path to search in
    :param sample: a specific sample, or a file consisting of -bcbb-config.yaml files
    :param pattern: pattern to search for

    :returns: list of file names
    """
    def bcbb_yaml_filter(f):
        return re.search(pattern, f) != None
    flist = []
    if sample:
        if os.path.exists(sample):
            with open(sample) as fh:
                samplelist = fh.readlines()
            flist = [x.rstrip() for x in samplelist if re.search(pattern, x)]
            if len(flist) == 0:
                flist = [os.path.join(path, x.rstrip()) for x in samplelist if len(x) > 1]
                # Make sure there actually is a config file in path
                flist = list(chain.from_iterable([filtered_walk(x, bcbb_yaml_filter, exclude_dirs=kw.get("exclude_dirs", None), include_dirs=kw.get("include_dirs", None)) for x in flist]))
            if len(flist) == 0:
                return flist
        else:
            pattern = "{}{}".format(sample, pattern)
    if not flist:
        flist = filtered_walk(path, bcbb_yaml_filter, exclude_dirs=kw.get("exclude_dirs", None), include_dirs=kw.get("include_dirs", None))
    if only_failed:
        status = {x:_sample_status(x) for x in flist}
        flist = [x for x in flist if _sample_status(x)=="FAIL"]
    if len(flist) == 0 and sample:
        LOG.info("No such sample {}".format(sample))
    return [os.path.abspath(f) for f in flist]
开发者ID:percyfal,项目名称:scilifelab,代码行数:33,代码来源:run.py

示例2: remove_finished

 def remove_finished(self):
     if not self._check_pargs(["project"]):
         return
     # Don't filter out files
     def filter_fn(f):
         return True
     slist = os.listdir(os.path.join(self._meta.root_path, self._meta.path_id))
     for s in slist:
         spath = os.path.join(self._meta.root_path, self._meta.path_id, s)
         if not os.path.isdir(spath):
             continue
         if not os.path.exists(os.path.join(spath, FINISHED_FILE)):
             self.app.log.info("Sample {} not finished; skipping".format(s))
             continue
         flist = filtered_walk(spath, filter_fn)
         dlist = filtered_walk(spath, filter_fn, get_dirs=True)
         if os.path.exists(os.path.join(spath, REMOVED_FILE)):
             self.app.log.info("Sample {} already removed; skipping".format(s))
             continue
         if len(flist) > 0 and not query_yes_no("Will remove directory {} containing {} files; continue?".format(s, len(flist)), force=self.pargs.force):
             continue
         self.app.log.info("Removing {} files from {}".format(len(flist), spath))            
         for f in flist:
             if f == os.path.join(spath, FINISHED_FILE):
                 continue
             self.app.cmd.safe_unlink(f)
         self.app.log.info("Removing {} directories from {}".format(len(dlist), spath))
         for d in sorted(dlist, reverse=True):
             self.app.cmd.safe_rmdir(d)
         if not self.pargs.dry_run:
             with open(os.path.join(spath, REMOVED_FILE), "w") as fh:
                 t_utc = utc_time()
                 fh.write(t_utc)
开发者ID:emmser,项目名称:scilifelab,代码行数:33,代码来源:production.py

示例3: purge_alignments

    def purge_alignments(self):
        """Cleanup sam and bam files. In some cases, sam files
        persist. If the corresponding bam file exists, replace the sam
        file contents with a message that the file has been removed to
        save space.
        """
        pattern = ".sam$"
        def purge_filter(f):
            if not pattern:
                return
            return re.search(pattern, f) != None

        flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), purge_filter)
        if len(flist) == 0:
            self.app.log.info("No sam files found")
            return
        if len(flist) > 0 and not query_yes_no("Going to remove/cleanup {} sam files ({}...). Are you sure you want to continue?".format(len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
            return
        for f in flist:
            self.app.log.info("Purging sam file {}".format(f))
            self.app.cmd.safe_unlink(f)
            if os.path.exists(f.replace(".sam", ".bam")):
                self.app.cmd.write(f, "File removed to save disk space: SAM converted to BAM")

        ## Find bam files in alignments subfolders
        pattern = ".bam$"
        flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), purge_filter, include_dirs=["alignments"])
        for f in flist:
            f_tgt = [f.replace(".bam", "-sort.bam"), os.path.join(os.path.dirname(os.path.dirname(f)),os.path.basename(f) )]
            for tgt in f_tgt:
                if os.path.exists(tgt):
                    self.app.log.info("Purging bam file {}".format(f))
                    self.app.cmd.safe_unlink(f)
                    self.app.cmd.write(f, "File removed to save disk space: Moved to {}".format(os.path.abspath(tgt)))
开发者ID:hussius,项目名称:scilifelab,代码行数:34,代码来源:project.py

示例4: hs_metrics

 def hs_metrics(self):
     if not self._check_pargs(["project", "targets"]):
         return
     if not self.pargs.baits:
         self.pargs.baits = self.pargs.targets
     self.log.info("hs_metrics: This is a temporary solution for calculating hs metrics for samples using picard tools")
     pattern = "{}.bam$".format(self.pargs.hs_file_type)
     def filter_fn(f):
         return re.search(pattern, f) != None
     ### FIX ME: this isn't caught by _process_args
     flist = []
     path =  self.pargs.flowcell if self.pargs.flowcell else self.pargs.project
     basedir = os.path.abspath(os.path.join(self.app.controller._meta.root_path, self.app.controller._meta.path_id))
     samples = find_samples(basedir, **vars(self.pargs))
     inc_dirs = [os.path.dirname(x) for x in samples]
     flist = filtered_walk(os.path.join(self.config.get(self.app.controller._meta.label, "root"), path), filter_fn=filter_fn, exclude_dirs=['nophix', 'alignments', 'fastqc', 'fastq_screen'], include_dirs=inc_dirs)
     if not query_yes_no("Going to run hs_metrics on {} files. Are you sure you want to continue?".format(len(flist)), force=self.pargs.force):
         return
     for f in flist:
         self.log.info("running CalculateHsMetrics on {}".format(f))
         ### Issue with calling java from
         ### subprocess:http://stackoverflow.com/questions/9795249/issues-with-wrapping-java-program-with-pythons-subprocess-module
         ### Actually not an issue: command line arguments have to be done the right way
         cl = ["java"] + ["-{}".format(self.pargs.java_opts)] +  ["-jar", "{}/CalculateHsMetrics.jar".format(os.getenv("PICARD_HOME"))] + ["INPUT={}".format(f)] + ["TARGET_INTERVALS={}".format(os.path.abspath(self.pargs.targets))] + ["BAIT_INTERVALS={}".format(os.path.abspath(self.pargs.baits))] +  ["OUTPUT={}".format(f.replace(".bam", ".hs_metrics"))] + ["VALIDATION_STRINGENCY=SILENT"]
         out = self.app.cmd.command(cl)
         if out:
             self.app._output_data["stdout"].write(out.rstrip())
开发者ID:Galithil,项目名称:scilifelab,代码行数:27,代码来源:bcbio.py

示例5: get_file_copy_list

def get_file_copy_list(proj_base_dir, dest_proj_path, fcid, deliver_all_fcs, deliver_nophix, skip_list):
    to_copy = []
    for fqfile in filtered_walk(
        proj_base_dir, is_fastq, include_dirs=[fcid] if not deliver_all_fcs else None, exclude_dirs=skip_list
    ):

        # Get the run_name and sample_name from the path
        sample_name, run_name, _ = os.path.relpath(fqfile, proj_base_dir).split(os.sep, 2)
        date, fc_id = run_name.split("_")

        # Skip if we deliver from nophix and the parent dir is not nophix (or vice versa)
        pdir = os.path.basename(os.path.dirname(fqfile))
        if deliver_nophix and pdir != "nophix":
            continue
        if not deliver_nophix and pdir != run_name:
            continue

        # Skip if a compressed version of the current file exists
        if os.path.exists("{:s}.gz".format(fqfile)):
            print (
                "WARNING: Both compressed and non-compressed versions of {:s} exists! "
                "Is compression/decompression in progress? Will deliver compressed version "
                "but you should make sure that the delivered files are complete!".format(fqfile)
            )
            continue

        print ("DEBUG: source_delivery_path = {:s}".format(os.path.dirname(fqfile)))

        fname = os.path.basename(fqfile)
        print (fname)

        dest_run_path = os.path.join(dest_proj_path, sample_name, run_name)
        dest_file_name = create_final_name(fname, date, fc_id, sample_name)
        to_copy.append([fqfile, dest_run_path, dest_file_name])
    return to_copy
开发者ID:pombredanne,项目名称:scilifelab,代码行数:35,代码来源:casava_data_delivery.py

示例6: remove_files

def remove_files(f, **kw):
    ## Remove old files if requested
    keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$",  "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$", "_[0-9]+_fastq.txt.gz$", "_[0-9]+_fastq.txt$",
                  "^[0-9][0-9]_.*.txt$", "JOBID", "PID"]
    pattern = "|".join(keep_files)
    def remove_filter_fn(f):
        return re.search(pattern, f) == None

    workdir = os.path.dirname(f)
    remove_files = filtered_walk(workdir, remove_filter_fn)
    remove_dirs = filtered_walk(workdir, remove_filter_fn, get_dirs=True)
    if len(remove_files) == 0:
        pass
    if len(remove_files) > 0 and query_yes_no("Going to remove {} files and {} directories... Are you sure you want to continue?".format(len(remove_files), len(remove_dirs)), force=kw['force']):
        [dry_unlink(x, dry_run=kw['dry_run']) for x in remove_files]
        ## Sort directories by length so we don't accidentally try to remove a non-empty dir
        [dry_rmdir(x, dry_run=kw['dry_run']) for x in sorted(remove_dirs, key=lambda x: len(x), reverse=True)]
开发者ID:percyfal,项目名称:scilifelab,代码行数:17,代码来源:run.py

示例7: _to_casava_structure

    def _to_casava_structure(self, fc):
        transfer_status = {}
        outdir_pfx = os.path.abspath(os.path.join(self.app.config.get("project", "root"), self.pargs.project, "data"))
        if self.pargs.transfer_dir:
            outdir_pfx = os.path.abspath(
                os.path.join(self.app.config.get("project", "root"), self.pargs.transfer_dir, "data")
            )
        for sample in fc:
            key = "{}_{}".format(sample["lane"], sample["sequence"])
            sources = {"files": self._prune_sequence_files(sample["files"]), "results": sample["results"]}
            outdir = os.path.join(outdir_pfx, sample["name"], fc.fc_id())
            dirs = {
                "data": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())),
                "intermediate": os.path.abspath(os.path.join(outdir_pfx, sample["name"], fc.fc_id())),
            }
            self._make_output_dirs(dirs)
            fc_new = fc.subset("lane", sample["lane"]).subset("name", sample["name"])
            targets = {
                "files": [src.replace(fc.path, dirs["data"]) for src in sources["files"]],
                "results": [src.replace(fc.path, dirs["intermediate"]) for src in sources["results"]],
            }

            fc_new.lane_files = dict(
                (k, [os.path.join(outdir, os.path.basename(x)) for x in v]) for k, v in fc_new.lane_files.items()
            )
            fc_new.set_entry(key, "files", targets["files"])
            fc_new.set_entry(key, "results", targets["results"])
            ## Copy sample files - currently not doing lane files
            self._transfer_files(sources, targets)
            self.app.cmd.write(
                os.path.join(dirs["data"], "{}-bcbb-pm-config.yaml".format(sample["name"])), fc_new.as_yaml()
            )
            transfer_status[sample["name"]] = {"files": len(sources["files"]), "results": len(sources["results"])}
        ## Rewrite platform_args; only keep time, workdir, account, partition, outpath and jobname
        pattern = "-post_process.yaml$"

        def pp_yaml_filter(f):
            return re.search(pattern, f) != None

        ppfiles = filtered_walk(dirs["data"], pp_yaml_filter)
        for pp in ppfiles:
            self.app.log.debug("Rewriting platform args for {}".format(pp))
            with open(pp, "r") as fh:
                conf = yaml.load(fh)
            if not conf:
                self.app.log.warn("No configuration for {}".format(pp))
                continue
            newconf = prune_pp_platform_args(conf)
            if newconf == conf:
                continue
            self.app.cmd.safe_unlink(pp)
            self.app.cmd.write(pp, yaml.safe_dump(newconf, default_flow_style=False, allow_unicode=True, width=1000))

        # Write transfer summary
        self.app._output_data["stderr"].write("Transfer summary\n")
        self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format("Sample", "Transferred files", "Results"))
        for k, v in transfer_status.iteritems():
            self.app._output_data["stderr"].write("{:<18}{:>18}{:>18}\n".format(k, v["files"], v["results"]))
开发者ID:pombredanne,项目名称:scilifelab,代码行数:58,代码来源:production.py

示例8: test_remove_files

 def test_remove_files(self):
     """Test removing files"""
     keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$",  "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$", "^[0-9][0-9]_.*.txt$"]
     pattern = "|".join(keep_files)
     def remove_filter_fn(f):
         return re.search(pattern, f) == None
     flist = find_samples(j_doe_00_05)
     for f in flist:
         workdir = os.path.dirname(f)
         remove_files = filtered_walk(workdir, remove_filter_fn)
         self.assertNotIn("01_analysis_start.txt", [os.path.basename(x) for x in remove_files])
开发者ID:dargorr,项目名称:scilifelab,代码行数:11,代码来源:test_production.py

示例9: ls

 def ls(self):
     if self._meta.path_id == "":
         self._ls(self._meta.root_path, filter_output=True)
     else:
         if self._meta.file_ext:
             pattern = "|".join(["{}$".format(x) for x in self._meta.file_ext])
             flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), file_filter)
             if flist:
                 self.app._output_data["stdout"].write("\n".join(flist))
         else:
             self._ls(os.path.join(self._meta.root_path, self._meta.path_id))
开发者ID:brainstorm,项目名称:scilifelab,代码行数:11,代码来源:controller.py

示例10: test_remove_dirs

 def test_remove_dirs(self):
     """Test removing directories before rerunning pipeline"""
     keep_files = ["-post_process.yaml$", "-post_process.yaml.bak$", "-bcbb-config.yaml$", "-bcbb-config.yaml.bak$",  "-bcbb-command.txt$", "-bcbb-command.txt.bak$", "_[0-9]+.fastq$", "_[0-9]+.fastq.gz$"]
     pattern = "|".join(keep_files)
     def remove_filter_fn(f):
         return re.search(pattern, f) == None
     flist = find_samples(j_doe_00_05)
     for f in flist:
         workdir = os.path.dirname(f)
         remove_dirs = filtered_walk(workdir, remove_filter_fn, get_dirs=True)
         self.assertIn("fastqc", [os.path.basename(x) for x in remove_dirs])
开发者ID:dargorr,项目名称:scilifelab,代码行数:11,代码来源:test_production.py

示例11: test_casava_transfer

 def test_casava_transfer(self):
     """Test transfer of casava data from production to project"""
     self.app = self.make_app(argv = ['production', 'transfer', 'J.Doe_00_03', '--debug', '--force', '--quiet'], extensions=[])
     handler.register(ProductionController)
     self._run_app()
     os.chdir(filedir)
     j_doe_00_03 = os.path.abspath(os.path.join(filedir, "data", "projects", "j_doe_00_03"))
     pattern = ".fastq(.gz)?$"
     def fastq_filter(f):
         return re.search(pattern, f) != None
     fastq_files = filtered_walk(j_doe_00_03, fastq_filter)
     self.assertEqual(len(fastq_files), 2)
开发者ID:dargorr,项目名称:scilifelab,代码行数:12,代码来源:test_production.py

示例12: setUpClass

 def setUpClass(cls):
     if not os.getcwd() == filedir:
         os.chdir(filedir)
     LOG.info("Copy tree {} to {}".format(j_doe_00_01, j_doe_00_04))
     if not os.path.exists(j_doe_00_04):
         shutil.copytree(j_doe_00_01, j_doe_00_04)
     pattern = "-bcbb-config.yaml$"
     def yaml_filter(f):
         return re.search(pattern, f) != None
     yaml_files = filtered_walk(j_doe_00_04, yaml_filter)
     with open(SAMPLEFILE, "w") as fh:
         fh.write("\n".join(yaml_files[0:1]))
开发者ID:dargorr,项目名称:scilifelab,代码行数:12,代码来源:test_project.py

示例13: clean

 def clean(self):
     if not self._check_pargs(["project"]):
         return
     self._meta.pattern = "|".join(["{}(.gz|.bz2)?$".format(x) for x in self._meta.file_ext])
     flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), self._filter_fn, include_dirs=self._meta.include_dirs)
     if len(flist) == 0:
         self.app.log.info("No files matching pattern '{}' found".format(self._meta.pattern))
         return
     if len(flist) > 0 and not query_yes_no("Going to remove {} files ({}...). Are you sure you want to continue?".format(len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
         return
     for f in flist:
         self.app.log.info("removing {}".format(f))
         self.app.cmd.safe_unlink(f)
开发者ID:brainstorm,项目名称:scilifelab,代码行数:13,代码来源:controller.py

示例14: _compress

    def _compress(self, label="compress"):
        if self.pargs.input_file:
            flist = [self.pargs.input_file]
        else:
            flist = filtered_walk(os.path.join(self._meta.root_path, self._meta.path_id), self._filter_fn)

        if len(flist) == 0:
            self.app.log.info("No files matching pattern '{}' found".format(self._meta.pattern))
            return
        if len(flist) > 0 and not query_yes_no("Going to {} {} files ({}...). Are you sure you want to continue?".format(label, len(flist), ",".join([os.path.basename(x) for x in flist[0:10]])), force=self.pargs.force):
            sys.exit()
        for f in flist:
            self.log.info("{}ing {}".format(label, f))
            self.app.cmd.command([self._meta.compress_prog, self._meta.compress_opt, "%s" % f], label, ignore_error=True, **{'workingDirectory':os.path.dirname(f), 'outputPath':os.path.join(os.path.dirname(f), "{}-{}-drmaa.log".format(label, os.path.basename(f)))})
开发者ID:brainstorm,项目名称:scilifelab,代码行数:14,代码来源:controller.py

示例15: best_practice

 def best_practice(self):
     if not self._check_pargs(["project", "uppmax_project"]):
         return
     project_path = os.path.normpath(os.path.join("/proj", self.pargs.uppmax_project))
     if not os.path.exists(project_path):
         self.log.warn("No such project {}; skipping".format(self.pargs.uppmax_project))
         return
     if self.pargs.outdir:
         outpath = os.path.join(project_path, "INBOX", self.pargs.outdir)
     else:
         outpath = os.path.join(project_path, "INBOX", self.pargs.statusdb_project_name) if self.pargs.statusdb_project_name else os.path.join(project_path, "INBOX", self.pargs.project)
     if not query_yes_no("Going to deliver data to {}; continue?".format(outpath)):
         return
     if not os.path.exists(outpath):
         self.app.cmd.safe_makedir(outpath)
     kw = vars(self.pargs)
     basedir = os.path.abspath(os.path.join(self._meta.root_path, self._meta.path_id))
     flist = find_samples(basedir, **vars(self.pargs))
     if self.pargs.flowcell:
         flist = [ fl for fl in flist if os.path.basename(os.path.dirname(fl)) == self.pargs.flowcell ]
     if not len(flist) > 0:
         self.log.info("No samples/sample configuration files found")
         return
     def filter_fn(f):
         if not pattern:
             return
         return re.search(pattern, f) != None
     # Setup pattern
     plist = [".*.yaml$", ".*.metrics$"]
     if not self.pargs.no_bam:
         plist.append(".*-{}.bam$".format(self.pargs.bam_file_type))
         plist.append(".*-{}.bam.bai$".format(self.pargs.bam_file_type))
     if not self.pargs.no_vcf:
         plist.append(".*.vcf$")
         plist.append(".*.vcf.gz$")
         plist.append(".*.tbi$")
         plist.append(".*.tsv$")
     pattern = "|".join(plist)
     size = 0
     for f in flist:
         path = os.path.dirname(f)
         sources = filtered_walk(path, filter_fn=filter_fn, exclude_dirs=BCBIO_EXCLUDE_DIRS)
         targets = [src.replace(basedir, outpath) for src in sources]
         self._transfer_files(sources, targets)
         if self.pargs.size:
             statinfo = [os.stat(src).st_size for src in sources]
             size = size + sum(statinfo)
     if self.pargs.size:
         self.app._output_data['stderr'].write("\n********************************\nEstimated delivery size: {:.1f}G\n********************************".format(size/1e9))
开发者ID:guillermo-carrasco,项目名称:scilifelab,代码行数:49,代码来源:deliver.py


注:本文中的scilifelab.utils.misc.filtered_walk函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。