本文整理匯總了Python中ruffus.Pipeline.collate方法的典型用法代碼示例。如果您正苦於以下問題:Python Pipeline.collate方法的具體用法?Python Pipeline.collate怎麽用?Python Pipeline.collate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類ruffus.Pipeline
的用法示例。
在下文中一共展示了Pipeline.collate方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_newstyle_collate
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
def test_newstyle_collate(self):
"""
As above but create pipeline on the fly using object orientated syntax rather than decorators
"""
#
# Create pipeline on the fly, joining up tasks
#
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func=generate_initial_files,
output=original_files)\
.mkdir(tempdir, tempdir+"/test")
test_pipeline.subdivide(task_func=split_fasta_file,
input=generate_initial_files,
# match original files
filter=regex(r".*\/original_(\d+).fa"),
output=[tempdir + r"/files.split.\1.success", # flag file for each original file
tempdir + r"/files.split.\1.*.fa"], # glob pattern
extras=[r"\1"])\
.posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))
test_pipeline.transform(task_func=align_sequences,
input=split_fasta_file,
filter=suffix(".fa"),
output=".aln") \
.posttask(lambda: sys.stderr.write("\tSequences aligned\n"))
test_pipeline.transform(task_func=percentage_identity,
input=align_sequences, # find all results from align_sequences
# replace suffix with:
filter=suffix(".aln"),
output=[r".pcid", # .pcid suffix for the result
r".pcid_success"] # .pcid_success to indicate job completed
)\
.posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))
test_pipeline.collate(task_func=combine_results,
input=percentage_identity,
filter=regex(r".*files.split\.(\d+)\.\d+.pcid"),
output=[tempdir + r"/\1.all.combine_results",
tempdir + r"/\1.all.combine_results_success"])\
.posttask(lambda: sys.stderr.write("\tResults recombined\n"))
#
# Cleanup, printout and run
#
self.cleanup_tmpdir()
s = StringIO()
test_pipeline.printout(s, [combine_results],
verbose=5, wrap_width=10000)
self.assertTrue(re.search(
'Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None)
test_pipeline.run(verbose=0)
示例2: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
def test_newstyle_ruffus(self):
test_pipeline = Pipeline("test")
test_pipeline.split(task_func=prepare_files,
input=None,
output=tempdir + '*.animal')\
.follows(mkdir(tempdir, tempdir + "test"))\
.posttask(lambda: do_write(tempdir + "task.done", "Task 1 Done\n"))
test_pipeline.collate(task_func=summarise_by_grouping,
input=prepare_files,
filter=regex(r'(.*/).*\.(.*)\.animal'),
output=r'\1\2.results')\
.posttask(lambda: do_write(tempdir + "task.done", "Task 2 Done\n"))
test_pipeline.run(multiprocess=10, verbose=0)
check_species_correct()
示例3: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
def test_newstyle_ruffus (self):
test_pipeline = Pipeline("test")
test_pipeline.follows(setup_simulation_data, mkdir(gene_data_dir, simulation_data_dir))
test_pipeline.files(gwas_simulation, generate_simulation_params)\
.follows(setup_simulation_data)\
.follows(mkdir(working_dir, os.path.join(working_dir, "simulation_results")))
test_pipeline.collate(statistical_summary, gwas_simulation, regex(r"simulation_results/(\d+).\d+.simulation_res"), r"\1.mean")\
.posttask(lambda : sys.stdout.write("\nOK\n"))
test_pipeline.run(multiprocess = 50, verbose = 0)
for oo in "000.mean", "001.mean":
results_file_name = os.path.join(working_dir, oo)
if not os.path.exists(results_file_name):
raise Exception("Missing %s" % results_file_name)
示例4: create_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
def create_pipeline (self):
#each pipeline has a different name
global cnt_pipelines
cnt_pipelines = cnt_pipelines + 1
test_pipeline = Pipeline("test %d" % cnt_pipelines)
test_pipeline.originate(task_func = generate_initial_files1,
output = [tempdir + prefix + "_name.tmp1" for prefix in "abcd"])
test_pipeline.originate(task_func = generate_initial_files2,
output = [tempdir + "e_name.tmp1", tempdir + "f_name.tmp1"])
test_pipeline.originate(task_func = generate_initial_files3,
output = [tempdir + "g_name.tmp1", tempdir + "h_name.tmp1"])
test_pipeline.originate(task_func = generate_initial_files4,
output = tempdir + "i_name.tmp1")
test_pipeline.collate( task_func = test_task2,
input = [generate_initial_files1,
generate_initial_files2,
generate_initial_files3,
generate_initial_files4],
filter = formatter(),
output = "{path[0]}/all.tmp2")
test_pipeline.transform(task_func = test_task3,
input = test_task2,
filter = suffix(".tmp2"),
output = ".tmp3")
test_pipeline.transform(task_func = test_task4,
input = test_task3,
filter = suffix(".tmp3"),
output = ".tmp4")
return test_pipeline
示例5: Pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
"test_active_if/a.2" -> "test_active_if/a.4"
null -> "test_active_if/b.1"
"test_active_if/b.1" -> "test_active_if/b.2"
"test_active_if/b.2" -> "test_active_if/b.4"
"test_active_if/b.4" -> "test_active_if/summary.5"
"""
# alternative syntax
test_pipeline = Pipeline("test")
test_pipeline.originate(task1, ['test_active_if/a.1', 'test_active_if/b.1'], "an extra_parameter")\
.follows(mkdir("test_active_if"))
test_pipeline.transform(task2, task1, suffix(".1"), ".2")
test_pipeline.transform(task3, task1, suffix(
".1"), ".3").active_if(lambda: pipeline_active_if)
test_pipeline.collate(task4, [task2, task3], regex(r"(.+)\.[23]"), r"\1.4")
test_pipeline.merge(task5, task4, "test_active_if/summary.5")
class Test_ruffus(unittest.TestCase):
def setUp(self):
try:
shutil.rmtree(tempdir)
except:
pass
os.makedirs(tempdir)
def tearDown(self):
try:
shutil.rmtree(tempdir)
pass
示例6: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import collate [as 別名]
#.........這裏部分代碼省略.........
'.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).intervals'),
# add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.bam'),
add_inputs=add_inputs(
'alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.bam'),
output='alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam')
.follows('mark_duplicates_picard'))
# Base recalibration using GATK
pipeline.transform(
task_func=stages.base_recalibration_gatk,
name='base_recalibration_gatk',
input=output_from('local_realignment_gatk'),
filter=suffix('.sort.dedup.realn.bam'),
output=['.recal_data.csv', '.count_cov.log'])
# Print reads using GATK
(pipeline.transform(
task_func=stages.print_reads_gatk,
name='print_reads_gatk',
input=output_from('base_recalibration_gatk'),
# filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
'.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).recal_data.csv'),
# '.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).recal_data.csv'),
# add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.realn.bam'),
add_inputs=add_inputs(
'alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam'),
# output='{path[0]}/{sample[0]}.sort.dedup.realn.recal.bam')
output='alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.recal.bam')
.follows('local_realignment_gatk'))
# Merge lane bams to sample bams
pipeline.collate(
task_func=stages.merge_sample_bams,
name='merge_sample_bams',
filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).sort.dedup.realn.recal.bam'),
'.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).sort.dedup.realn.recal.bam'),
# inputs=add_inputs('alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam'),
input=output_from('print_reads_gatk'),
output='alignments/{sample[0]}/{sample[0]}.merged.bam')
# Mark duplicates in the BAM file using Picard
pipeline.transform(
task_func=stages.mark_duplicates_picard,
name='mark_duplicates_picard2',
input=output_from('merge_sample_bams'),
# filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).merged.bam'),
filter=suffix('.merged.bam'),
# XXX should make metricsup an extra output?
output=['.merged.dedup.bam', '.metricsdup'])
# Local realignment2 using GATK
# Generate RealignerTargetCreator using GATK
pipeline.transform(
task_func=stages.realigner_target_creator,
name='realigner_target_creator2',
input=output_from('mark_duplicates_picard2'),
filter=suffix('.dedup.bam'),
output='.intervals')
# Local realignment using GATK
(pipeline.transform(
task_func=stages.local_realignment_gatk,