本文整理匯總了Python中ruffus.Pipeline.merge方法的典型用法代碼示例。如果您正苦於以下問題:Python Pipeline.merge方法的具體用法?Python Pipeline.merge怎麽用?Python Pipeline.merge使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類ruffus.Pipeline
的用法示例。
在下文中一共展示了Pipeline.merge方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def test_newstyle_ruffus (self):
test_pipeline = Pipeline("test")
test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
.follows(mkdir(tempdir))
test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
input = tempdir + "random_numbers.list",
output = tempdir + "*.chunks")\
.follows(create_random_numbers)
test_pipeline.transform(task_func = step_5_calculate_sum_of_squares,
input = step_4_split_numbers_into_chunks,
filter = suffix(".chunks"),
output = ".sums")
test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
.posttask(lambda: sys.stdout.write(" hooray\n"))\
.posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))
test_pipeline.run(multiprocess = 50, verbose = 0)
output_file = os.path.join(tempdir, "variance.result")
if not os.path.exists (output_file):
raise Exception("Missing %s" % output_file)
示例2: create_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def create_pipeline(self):
"""
Create new pipeline on the fly without using decorators
"""
global count_pipelines
count_pipelines = count_pipelines + 1
test_pipeline = Pipeline("test %d" % count_pipelines)
test_pipeline.transform(task_func=transform1,
input=input_file,
filter=suffix('.txt'),
output='.output',
extras=[runtime_data])
test_pipeline.transform(task_func=transform_raise_error,
input=input_file,
filter=suffix('.txt'),
output='.output',
extras=[runtime_data])
test_pipeline.split(task_func=split1,
input=input_file,
output=split1_outputs)
test_pipeline.merge(task_func=merge2,
input=split1,
output=merge2_output)
return test_pipeline
示例3: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def test_newstyle_ruffus(self):
# alternative syntax
test_pipeline = Pipeline("test")
test_pipeline.mkdir(data_dir, work_dir)
test_pipeline.originate(task_func=task1,
output=[os.path.join(data_dir, "%s.1" % aa) for aa in "abcd"])
test_pipeline.mkdir(filter=suffix(".1"),
output=".dir",
output_dir=work_dir)
test_pipeline.transform(task_func=task2,
input=task1,
filter=suffix(".1"),
output=[".1", ".bak"],
extras=["extra.tst", 4, r"orig_dir=\1"],
output_dir=work_dir)
test_pipeline.subdivide(task3, task2, suffix(
".1"), r"\1.*.2", [r"\1.a.2", r"\1.b.2"], output_dir=data_dir)
test_pipeline.transform(task4, task3, suffix(
".2"), ".3", output_dir=work_dir)
test_pipeline.merge(task5, task4, os.path.join(data_dir, "summary.5"))
test_pipeline.run(multiprocess=50, verbose=0)
with open(os.path.join(data_dir, "summary.5")) as ii:
active_text = ii.read()
if active_text != expected_active_text:
raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
(expected_active_text, active_text))
示例4: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def test_newstyle_ruffus(self):
test_pipeline = Pipeline("test")
test_pipeline.split(task_func=split_fasta_file,
input=tempdir + "original.fa",
output=[tempdir + "files.split.success",
tempdir + "files.split.*.fa"])\
.posttask(lambda: verbose_output.write(" Split into %d files\n" % 10))
test_pipeline.transform(task_func=align_sequences,
input=split_fasta_file,
filter=suffix(".fa"),
output=".aln" # fa -> aln
)\
.posttask(lambda: verbose_output.write(" Sequences aligned\n"))
test_pipeline.transform(task_func=percentage_identity,
input=align_sequences, # find all results from align_sequences
# replace suffix with:
filter=suffix(".aln"),
output=[r".pcid", # .pcid suffix for the result
r".pcid_success"] # .pcid_success to indicate job completed
)\
.posttask(lambda: verbose_output.write(" %Identity calculated\n"))
test_pipeline.merge(task_func=combine_results,
input=percentage_identity,
output=[tempdir + "all.combine_results",
tempdir + "all.combine_results_success"])\
.posttask(lambda: verbose_output.write(" Results recombined\n"))
test_pipeline.run(multiprocess=50, verbose=0)
if not os.path.exists(tempdir + "all.combine_results"):
raise Exception("Missing %s" % (tempdir + "all.combine_results"))
示例5: test_newstyle_simpler
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def test_newstyle_simpler (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task1, input_file_names, extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task2, task1, suffix(".1"), ".2", extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task3, task2, suffix(".2"), ".3", extras = [logger_proxy, logging_mutex])
test_pipeline.merge(task4, task3, final_file_name, extras = [logger_proxy, logging_mutex])
#test_pipeline.merge(task4, task3, final_file_name, extras = {"logger_proxy": logger_proxy, "logging_mutex": logging_mutex})
test_pipeline.run(multiprocess = 500, verbose = 0)
示例6: make_pipeline2
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def make_pipeline2( pipeline_name = "pipeline2"):
test_pipeline2 = Pipeline(pipeline_name)
test_pipeline2.transform(task_func = task_1_to_1,
# task name
name = "44_to_55",
# placeholder: will be replaced later with set_input()
input = None,
filter = suffix(".44"),
output = ".55")
test_pipeline2.merge( task_func = task_m_to_1,
input = test_pipeline2["44_to_55"],
output = tempdir + "/final.output",)
# Set head and tail
test_pipeline2.set_tail_tasks([test_pipeline2[task_m_to_1]])
if not DEBUG_do_not_define_head_task:
test_pipeline2.set_head_tasks([test_pipeline2["44_to_55"]])
return test_pipeline2
示例7: test_newstyle_task
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def test_newstyle_task(self):
test_pipeline = Pipeline("test")
test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\
.follows(mkdir(tempdir))
test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\
.follows(mkdir(tempdir))
test_pipeline.transform(task_func=task3,
input=task1,
filter=regex(r"(.+)"),
replace_inputs=ruffus.inputs(
((r"\1"), task2, "test_transform_inputs.*y")),
output=r"\1.output")
test_pipeline.merge(task4, (task3), tempdir + "final.output")
test_pipeline.run([task4], multiprocess=10, verbose=0)
correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
tempdir=tempdir)
with open(tempdir + "final.output") as ff:
real_output = ff.read()
self.assertEqual(correct_output, real_output)
示例8: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='complexo')
# Get a list of paths to all the FASTQ files
fastq_files = state.config.get_option('fastqs')
# Stages are dependent on the state
stages = Stages(state)
# The original FASTQ files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_fastqs,
name='original_fastqs',
output=fastq_files)
# Align paired end reads in FASTQ to the reference producing a BAM file
pipeline.transform(
task_func=stages.align_bwa,
name='align_bwa',
input=output_from('original_fastqs'),
# Match the R1 (read 1) FASTQ file and grab the path and sample name.
# This will be the first input to the stage.
# We assume the sample name may consist of only alphanumeric
# characters.
filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+)_R1.fastq.gz'),
# Add one more inputs to the stage:
# 1. The corresponding R2 FASTQ file
add_inputs=add_inputs('{path[0]}/{sample[0]}_R2.fastq.gz'),
# Add an "extra" argument to the state (beyond the inputs and outputs)
# which is the sample name. This is needed within the stage for finding out
# sample specific configuration options
extras=['{sample[0]}'],
# The output file name is the sample name with a .bam extension.
output='{path[0]}/{sample[0]}.bam')
# Sort the BAM file using Picard
pipeline.transform(
task_func=stages.sort_bam_picard,
name='sort_bam_picard',
input=output_from('align_bwa'),
filter=suffix('.bam'),
output='.sort.bam')
# Mark duplicates in the BAM file using Picard
pipeline.transform(
task_func=stages.mark_duplicates_picard,
name='mark_duplicates_picard',
input=output_from('sort_bam_picard'),
filter=suffix('.sort.bam'),
# XXX should make metricsup an extra output?
output=['.sort.dedup.bam', '.metricsdup'])
# Generate chromosome intervals using GATK
pipeline.transform(
task_func=stages.chrom_intervals_gatk,
name='chrom_intervals_gatk',
input=output_from('mark_duplicates_picard'),
filter=suffix('.sort.dedup.bam'),
output='.chr.intervals')
# Local realignment using GATK
(pipeline.transform(
task_func=stages.local_realignment_gatk,
name='local_realignment_gatk',
input=output_from('chrom_intervals_gatk'),
filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).chr.intervals'),
add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.bam'),
output='{path[0]}/{sample[0]}.sort.dedup.realn.bam')
.follows('mark_duplicates_picard'))
# Base recalibration using GATK
pipeline.transform(
task_func=stages.base_recalibration_gatk,
name='base_recalibration_gatk',
input=output_from('local_realignment_gatk'),
filter=suffix('.sort.dedup.realn.bam'),
output=['.recal_data.csv', '.count_cov.log'])
# Print reads using GATK
(pipeline.transform(
task_func=stages.print_reads_gatk,
name='print_reads_gatk',
input=output_from('base_recalibration_gatk'),
filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.realn.bam'),
output='{path[0]}/{sample[0]}.sort.dedup.realn.recal.bam')
.follows('local_realignment_gatk'))
# Call variants using GATK
pipeline.transform(
task_func=stages.call_variants_gatk,
name='call_variants_gatk',
input=output_from('print_reads_gatk'),
filter=suffix('.sort.dedup.realn.recal.bam'),
output='.raw.snps.indels.g.vcf')
# Combine G.VCF files for all samples using GATK
pipeline.merge(
#.........這裏部分代碼省略.........
示例9: only
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
output=[tempdir + "/g_name.tmp1", tempdir + "/h_name.tmp1"])
test_pipeline1.product(task_func=check_product_task,
input=[tempdir + "/" + prefix +
"_name.tmp1" for prefix in "abcd"],
filter=formatter(".*/(?P<FILE_PART>.+).tmp1$"),
input2=generate_initial_files2,
filter2=formatter(),
input3=generate_initial_files3,
filter3=formatter(r"tmp1$"),
output="{path[0][0]}/{FILE_PART[0][0]}.{basename[1][0]}.{basename[2][0]}.tmp2",
extras=["{basename[0][0][0]}{basename[1][0][0]}{basename[2][0][0]}", # extra: prefices only (abcd etc)
# extra: path for 2nd input, 1st file
"{subpath[0][0][0]}",
"{subdir[0][0][0]}"]).follows("WOWWWEEE").follows(gen_task1).follows(generate_initial_files1).follows("generate_initial_files1")
test_pipeline1.merge(task_func=check_product_merged_task,
input=check_product_task,
output=tempdir + "/merged.results")
test_pipeline1.product(task_func=check_product_misspelt_capture_error_task,
input=gen_task1,
filter=formatter(".*/(?P<FILE_PART>.+).tmp1$"),
output="{path[0][0]}/{FILEPART[0][0]}.tmp2")
test_pipeline1.product(task_func=check_product_out_of_range_formatter_ref_error_task,
input=generate_initial_files1, #
filter=formatter(".*/(?P<FILE_PART>.+).tmp1$"),
output="{path[2][0]}/{basename[0][0]}.tmp2",
extras=["{FILE_PART[0][0]}"])
test_pipeline1.product(task_func=check_product_formatter_ref_index_error_task,
input=output_from("generate_initial_files1"),
filter=formatter(".*/(?P<FILE_PART>.+).tmp1$"),
output="{path[0][0][1000]}/{basename[0][0]}.tmp2",
extras=["{FILE_PART[0][0]}"])
示例10: Pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
null -> "test_active_if/b.1"
"test_active_if/b.1" -> "test_active_if/b.2"
"test_active_if/b.2" -> "test_active_if/b.4"
"test_active_if/b.4" -> "test_active_if/summary.5"
"""
# alternative syntax
test_pipeline = Pipeline("test")
test_pipeline.originate(task1, ['test_active_if/a.1', 'test_active_if/b.1'], "an extra_parameter")\
.follows(mkdir("test_active_if"))
test_pipeline.transform(task2, task1, suffix(".1"), ".2")
test_pipeline.transform(task3, task1, suffix(
".1"), ".3").active_if(lambda: pipeline_active_if)
test_pipeline.collate(task4, [task2, task3], regex(r"(.+)\.[23]"), r"\1.4")
test_pipeline.merge(task5, task4, "test_active_if/summary.5")
class Test_ruffus(unittest.TestCase):
def setUp(self):
try:
shutil.rmtree(tempdir)
except:
pass
os.makedirs(tempdir)
def tearDown(self):
try:
shutil.rmtree(tempdir)
pass
except:
示例11: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
#.........這裏部分代碼省略.........
# add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.bam'),
add_inputs=add_inputs(
'alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.bam'),
output='alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam')
.follows('mark_duplicates_picard'))
# Base recalibration using GATK
pipeline.transform(
task_func=stages.base_recalibration_gatk,
name='base_recalibration_gatk',
input=output_from('local_realignment_gatk'),
filter=suffix('.sort.dedup.realn.bam'),
output=['.recal_data.csv', '.count_cov.log'])
# Print reads using GATK
(pipeline.transform(
task_func=stages.print_reads_gatk,
name='print_reads_gatk',
input=output_from('base_recalibration_gatk'),
# filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
'.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).recal_data.csv'),
# '.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).recal_data.csv'),
# add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.realn.bam'),
add_inputs=add_inputs(
'alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam'),
# output='{path[0]}/{sample[0]}.sort.dedup.realn.recal.bam')
output='alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.recal.bam')
.follows('local_realignment_gatk'))
# Merge lane bams to sample bams
pipeline.collate(
task_func=stages.merge_sample_bams,
name='merge_sample_bams',
filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).sort.dedup.realn.recal.bam'),
'.+/(?P<readid>[a-zA-Z0-9-]+)_(?P<lib>[a-zA-Z0-9-:]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9-]+).sort.dedup.realn.recal.bam'),
# inputs=add_inputs('alignments/{sample[0]}/{readid[0]}_{lib[0]}_{lane[0]}_{sample[0]}.sort.dedup.realn.bam'),
input=output_from('print_reads_gatk'),
output='alignments/{sample[0]}/{sample[0]}.merged.bam')
# Mark duplicates in the BAM file using Picard
pipeline.transform(
task_func=stages.mark_duplicates_picard,
name='mark_duplicates_picard2',
input=output_from('merge_sample_bams'),
# filter=formatter(
# '.+/(?P<readid>[a-zA-Z0-9-\.]+)_(?P<lib>[a-zA-Z0-9-]+)_(?P<lane>[a-zA-Z0-9]+)_(?P<sample>[a-zA-Z0-9]+).merged.bam'),
filter=suffix('.merged.bam'),
# XXX should make metricsup an extra output?
output=['.merged.dedup.bam', '.metricsdup'])
# Local realignment2 using GATK
# Generate RealignerTargetCreator using GATK
pipeline.transform(
task_func=stages.realigner_target_creator,
name='realigner_target_creator2',
input=output_from('mark_duplicates_picard2'),
filter=suffix('.dedup.bam'),
output='.intervals')
# Local realignment using GATK
(pipeline.transform(
task_func=stages.local_realignment_gatk,
name='local_realignment_gatk2',
示例12: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
#.........這裏部分代碼省略.........
(
pipeline.transform(
task_func=stages.structural_variants_lumpy,
name="structural_variants_lumpy",
input=output_from("sort_alignment"),
filter=formatter(".+/(?P<sample>[a-zA-Z0-9]+).sorted.bam"),
add_inputs=add_inputs(["{path[0]}/{sample[0]}.splitters.bam", "{path[0]}/{sample[0]}.discordants.bam"]),
output="{path[0]}/{sample[0]}.lumpy.vcf",
)
.follows("index_alignment")
.follows("sort_splitters")
.follows("sort_discordants")
)
# Call genotypes on lumpy output using SVTyper
# (pipeline.transform(
# task_func=stages.genotype_svtyper,
# name='genotype_svtyper',
# input=output_from('structural_variants_lumpy'),
# filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).lumpy.vcf'),
# add_inputs=add_inputs(['{path[0]}/{sample[0]}.sorted.bam', '{path[0]}/{sample[0]}.splitters.bam']),
# output='{path[0]}/{sample[0]}.svtyper.vcf')
# .follows('align_bwa')
# .follows('sort_splitters')
# .follows('index_alignment')
# .follows('index_splitters')
# .follows('index_discordants'))
# Call SVs with Socrates
(
pipeline.transform(
task_func=stages.structural_variants_socrates,
name="structural_variants_socrates",
input=output_from("sort_alignment"),
filter=formatter(".+/(?P<sample>[a-zA-Z0-9]+).sorted.bam"),
# output goes to {path[0]}/socrates/
output="{path[0]}/socrates/results_Socrates_paired_{sample[0]}.sorted_long_sc_l25_q5_m5_i95.txt",
extras=["{path[0]}"],
)
)
# Call DELs with DELLY
pipeline.merge(
task_func=stages.deletions_delly,
name="deletions_delly",
input=output_from("sort_alignment"),
output="delly.DEL.vcf",
)
# Call DUPs with DELLY
pipeline.merge(
task_func=stages.duplications_delly,
name="duplications_delly",
input=output_from("sort_alignment"),
output="delly.DUP.vcf",
)
# Call INVs with DELLY
pipeline.merge(
task_func=stages.inversions_delly,
name="inversions_delly",
input=output_from("sort_alignment"),
output="delly.INV.vcf",
)
# Call TRAs with DELLY
pipeline.merge(
task_func=stages.translocations_delly,
name="translocations_delly",
input=output_from("sort_alignment"),
output="delly.TRA.vcf",
)
# Join both read pair files using gustaf_mate_joining
# pipeline.transform(
# task_func=stages.gustaf_mate_joining,
# name='gustaf_mate_joining',
# input=output_from('fastq_to_fasta'),
# # Match the R1 (read 1) FASTA file and grab the path and sample name.
# # This will be the first input to the stage.
# # We assume the sample name may consist of only alphanumeric
# # characters.
# filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+)_R1.fasta'),
# # Add one more input to the stage:
# # 1. The corresponding R2 FASTA file
# add_inputs=add_inputs(['{path[0]}/{sample[0]}_R2.fasta']),
# output='{path[0]}/{sample[0]}.joined_mates.fasta')
# Call structural variants with pindel
# (pipeline.transform(
# task_func=stages.structural_variants_pindel,
# name='structural_variants_pindel',
# input=output_from('sort_alignment'),
# filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).sorted.bam'),
# add_inputs=add_inputs(['{path[0]}/{sample[0]}.pindel_config.txt', reference_file]),
# output='{path[0]}/{sample[0]}.pindel')
# .follows('index_reference_bwa')
# .follows('index_reference_samtools'))
return pipeline
示例13: regex
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
test_pipeline.transform(task3, task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3')\
.posttask(lambda: do_write(test_file, "Task 3 Done\n"))
test_pipeline.transform(task4, tempdir + "*.1", suffix(".1"), ".4")\
.follows(task1)\
.posttask(lambda: do_write(test_file, "Task 4 Done\n"))\
.jobs_limit(1)
test_pipeline.files(task5, None, tempdir + 'a.5')\
.follows(mkdir(tempdir))\
.posttask(lambda: do_write(test_file, "Task 5 Done\n"))
test_pipeline.merge(task_func = task6,
input = [task3, task4, task5],
output = tempdir + "final.6")\
.follows(task3, task4, task5, ) \
.posttask(lambda: do_write(test_file, "Task 6 Done\n"))
def check_job_order_correct(filename):
"""
1 -> 2 -> 3 ->
-> 4 ->
5 -> 6
"""
precedence_rules = [[1, 2],
[2, 3],
[1, 4],
[5, 6],
示例14: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import merge [as 別名]
#.........這裏部分代碼省略.........
input=output_from('chrom_intervals_gatk'),
filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).chr.intervals'),
add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.bam'),
output='{path[0]}/{sample[0]}.sort.dedup.realn.bam')
.follows('mark_duplicates_picard'))
# Base recalibration using GATK
pipeline.transform(
task_func=stages.base_recalibration_gatk,
name='base_recalibration_gatk',
input=output_from('local_realignment_gatk'),
filter=suffix('.sort.dedup.realn.bam'),
output=['.recal_data.csv', '.count_cov.log'])
# Print reads using GATK
(pipeline.transform(
task_func=stages.print_reads_gatk,
name='print_reads_gatk',
input=output_from('base_recalibration_gatk'),
filter=formatter('.+/(?P<sample>[a-zA-Z0-9]+).recal_data.csv'),
add_inputs=add_inputs('{path[0]}/{sample[0]}.sort.dedup.realn.bam'),
output='{path[0]}/{sample[0]}.sort.dedup.realn.recal.bam')
.follows('local_realignment_gatk'))
# Call variants using GATK
pipeline.transform(
task_func=stages.call_variants_gatk,
name='call_variants_gatk',
input=output_from('print_reads_gatk'),
filter=suffix('.sort.dedup.realn.recal.bam'),
output='.raw.snps.indels.g.vcf')
# Combine G.VCF files for all samples using GATK
pipeline.merge(
task_func=stages.combine_gvcf_gatk,
name='combine_gvcf_gatk',
input=output_from('call_variants_gatk'),
output='PCExomes.mergegvcf.vcf')
# Genotype G.VCF files using GATK
pipeline.transform(
task_func=stages.genotype_gvcf_gatk,
name='genotype_gvcf_gatk',
input=output_from('combine_gvcf_gatk'),
filter=suffix('.mergegvcf.vcf'),
output='.genotyped.vcf')
# SNP recalibration using GATK
pipeline.transform(
task_func=stages.snp_recalibrate_gatk,
name='snp_recalibrate_gatk',
input=output_from('genotype_gvcf_gatk'),
filter=suffix('.genotyped.vcf'),
output=['.snp_recal', '.snp_tranches', '.snp_plots.R'])
# INDEL recalibration using GATK
pipeline.transform(
task_func=stages.indel_recalibrate_gatk,
name='indel_recalibrate_gatk',
input=output_from('genotype_gvcf_gatk'),
filter=suffix('.genotyped.vcf'),
output=['.indel_recal', '.indel_tranches', '.indel_plots.R'])
# Apply SNP recalibration using GATK
(pipeline.transform(
task_func=stages.apply_snp_recalibrate_gatk,