本文整理匯總了Python中ruffus.Pipeline.transform方法的典型用法代碼示例。如果您正苦於以下問題:Python Pipeline.transform方法的具體用法?Python Pipeline.transform怎麽用?Python Pipeline.transform使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類ruffus.Pipeline
的用法示例。
在下文中一共展示了Pipeline.transform方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='md5')
# Get a list of paths to all the input files
input_files = state.config.get_option('files')
# Stages are dependent on the state
stages = Stages(state)
# The original FASTQ files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_files,
name='original_files',
output=input_files)
# Align paired end reads in FASTQ to the reference producing a BAM file
pipeline.transform(
task_func=stages.md5_checksum,
name='md5_checksum',
input=output_from('original_files'),
filter=suffix(''),
output='.md5')
return pipeline
示例2: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_ruffus(self):
# alternative syntax
test_pipeline = Pipeline("test")
test_pipeline.mkdir(data_dir, work_dir)
test_pipeline.originate(task_func=task1,
output=[os.path.join(data_dir, "%s.1" % aa) for aa in "abcd"])
test_pipeline.mkdir(filter=suffix(".1"),
output=".dir",
output_dir=work_dir)
test_pipeline.transform(task_func=task2,
input=task1,
filter=suffix(".1"),
output=[".1", ".bak"],
extras=["extra.tst", 4, r"orig_dir=\1"],
output_dir=work_dir)
test_pipeline.subdivide(task3, task2, suffix(
".1"), r"\1.*.2", [r"\1.a.2", r"\1.b.2"], output_dir=data_dir)
test_pipeline.transform(task4, task3, suffix(
".2"), ".3", output_dir=work_dir)
test_pipeline.merge(task5, task4, os.path.join(data_dir, "summary.5"))
test_pipeline.run(multiprocess=50, verbose=0)
with open(os.path.join(data_dir, "summary.5")) as ii:
active_text = ii.read()
if active_text != expected_active_text:
raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
(expected_active_text, active_text))
示例3: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_ruffus(self):
test_pipeline = Pipeline("test")
test_pipeline.split(task_func=split_fasta_file,
input=tempdir + "original.fa",
output=[tempdir + "files.split.success",
tempdir + "files.split.*.fa"])\
.posttask(lambda: verbose_output.write(" Split into %d files\n" % 10))
test_pipeline.transform(task_func=align_sequences,
input=split_fasta_file,
filter=suffix(".fa"),
output=".aln" # fa -> aln
)\
.posttask(lambda: verbose_output.write(" Sequences aligned\n"))
test_pipeline.transform(task_func=percentage_identity,
input=align_sequences, # find all results from align_sequences
# replace suffix with:
filter=suffix(".aln"),
output=[r".pcid", # .pcid suffix for the result
r".pcid_success"] # .pcid_success to indicate job completed
)\
.posttask(lambda: verbose_output.write(" %Identity calculated\n"))
test_pipeline.merge(task_func=combine_results,
input=percentage_identity,
output=[tempdir + "all.combine_results",
tempdir + "all.combine_results_success"])\
.posttask(lambda: verbose_output.write(" Results recombined\n"))
test_pipeline.run(multiprocess=50, verbose=0)
if not os.path.exists(tempdir + "all.combine_results"):
raise Exception("Missing %s" % (tempdir + "all.combine_results"))
示例4: create_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def create_pipeline(self):
"""
Create new pipeline on the fly without using decorators
"""
global count_pipelines
count_pipelines = count_pipelines + 1
test_pipeline = Pipeline("test %d" % count_pipelines)
test_pipeline.transform(task_func=transform1,
input=input_file,
filter=suffix('.txt'),
output='.output',
extras=[runtime_data])
test_pipeline.transform(task_func=transform_raise_error,
input=input_file,
filter=suffix('.txt'),
output='.output',
extras=[runtime_data])
test_pipeline.split(task_func=split1,
input=input_file,
output=split1_outputs)
test_pipeline.merge(task_func=merge2,
input=split1,
output=merge2_output)
return test_pipeline
示例5: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_ruffus (self):
test_pipeline = Pipeline("test")
test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
.follows(mkdir(tempdir))
test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
input = tempdir + "random_numbers.list",
output = tempdir + "*.chunks")\
.follows(create_random_numbers)
test_pipeline.transform(task_func = step_5_calculate_sum_of_squares,
input = step_4_split_numbers_into_chunks,
filter = suffix(".chunks"),
output = ".sums")
test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
.posttask(lambda: sys.stdout.write(" hooray\n"))\
.posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))
test_pipeline.run(multiprocess = 50, verbose = 0)
output_file = os.path.join(tempdir, "variance.result")
if not os.path.exists (output_file):
raise Exception("Missing %s" % output_file)
示例6: test_newstyle_simpler
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_simpler (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task1, input_file_names, extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task2, task1, suffix(".1"), ".2", extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task3, task2, suffix(".2"), ".3", extras = [logger_proxy, logging_mutex])
test_pipeline.merge(task4, task3, final_file_name, extras = [logger_proxy, logging_mutex])
#test_pipeline.merge(task4, task3, final_file_name, extras = {"logger_proxy": logger_proxy, "logging_mutex": logging_mutex})
test_pipeline.run(multiprocess = 500, verbose = 0)
示例7: test_newstyle_collate
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_collate(self):
"""
As above but create pipeline on the fly using object orientated syntax rather than decorators
"""
#
# Create pipeline on the fly, joining up tasks
#
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func=generate_initial_files,
output=original_files)\
.mkdir(tempdir, tempdir+"/test")
test_pipeline.subdivide(task_func=split_fasta_file,
input=generate_initial_files,
# match original files
filter=regex(r".*\/original_(\d+).fa"),
output=[tempdir + r"/files.split.\1.success", # flag file for each original file
tempdir + r"/files.split.\1.*.fa"], # glob pattern
extras=[r"\1"])\
.posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))
test_pipeline.transform(task_func=align_sequences,
input=split_fasta_file,
filter=suffix(".fa"),
output=".aln") \
.posttask(lambda: sys.stderr.write("\tSequences aligned\n"))
test_pipeline.transform(task_func=percentage_identity,
input=align_sequences, # find all results from align_sequences
# replace suffix with:
filter=suffix(".aln"),
output=[r".pcid", # .pcid suffix for the result
r".pcid_success"] # .pcid_success to indicate job completed
)\
.posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))
test_pipeline.collate(task_func=combine_results,
input=percentage_identity,
filter=regex(r".*files.split\.(\d+)\.\d+.pcid"),
output=[tempdir + r"/\1.all.combine_results",
tempdir + r"/\1.all.combine_results_success"])\
.posttask(lambda: sys.stderr.write("\tResults recombined\n"))
#
# Cleanup, printout and run
#
self.cleanup_tmpdir()
s = StringIO()
test_pipeline.printout(s, [combine_results],
verbose=5, wrap_width=10000)
self.assertTrue(re.search(
'Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None)
test_pipeline.run(verbose=0)
示例8: make_pipeline1
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def make_pipeline1(pipeline_name, # Pipelines need to have a unique name
starting_file_names):
test_pipeline = Pipeline(pipeline_name)
# We can change the starting files later using
# set_input() for transform etc.
# or set_output() for originate
# But it can be more convenient to just pass this to the function making the pipeline
#
test_pipeline.originate(task_originate, starting_file_names)\
.follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\
.posttask(touch_file(tempdir + "/testdir/whatever.txt"))
test_pipeline.transform(task_func=task_m_to_1,
name="add_input",
# Lookup Task from function name task_originate()
# So long as this is unique in the pipeline
input=task_originate,
# requires an anchor from 3.7 onwards, see
# https://bugs.python.org/issue34982
filter=regex(r"^(.*)"),
add_inputs=add_inputs(
tempdir + "/testdir/whatever.txt"),
output=r"\1.22")
test_pipeline.transform(task_func=task_1_to_1,
name="22_to_33",
# Lookup Task from Task name
# Function name is not unique in the pipeline
input=output_from("add_input"),
filter=suffix(".22"),
output=".33")
tail_task = test_pipeline.transform(task_func=task_1_to_1,
name="33_to_44",
# Ask Pipeline to lookup Task from Task name
input=test_pipeline["22_to_33"],
filter=suffix(".33"),
output=".44")
# Set the tail task so that users of my sub pipeline can use it as a dependency
# without knowing the details of task names
#
# Use Task() object directly without having to lookup
test_pipeline.set_tail_tasks([tail_task])
# If we try to connect a Pipeline without tail tasks defined, we have to
# specify the exact task within the Pipeline.
# Otherwise Ruffus will not know which task we mean and throw an exception
if DEBUG_do_not_define_tail_task:
test_pipeline.set_tail_tasks([])
# Set the head task so that users of my sub pipeline send input into it
# without knowing the details of task names
test_pipeline.set_head_tasks([test_pipeline[task_originate]])
return test_pipeline
示例9: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='test_pipeline')
# Get a list of paths to all the FASTQ files
input_files = state.config.get_option('files')
# Stages are dependent on the state
stages = Stages(state)
# The original files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_files,
name='original_files',
output=input_files)
pipeline.transform(
task_func=stages.stage1,
name='stage1',
input=output_from('original_files'),
filter=suffix('.0'),
output='.1')
pipeline.transform(
task_func=stages.stage2,
name='stage2',
input=output_from('stage1'),
filter=suffix('.1'),
output='.2')
pipeline.transform(
task_func=stages.stage3,
name='stage3',
input=output_from('stage2'),
filter=suffix('.2'),
output='.3')
pipeline.transform(
task_func=stages.stage4,
name='stage4',
input=output_from('stage3'),
filter=suffix('.3'),
output='.4')
pipeline.transform(
task_func=stages.stage5,
name='stage5',
input=output_from('stage4'),
filter=suffix('.4'),
output='.5')
return pipeline
示例10: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='twin ion')
# Get a list of paths to all the MZML files
mzml_files = state.config.get_option('mzml')
# Stages are dependent on the state
stages = Stages(state)
# The original MZML files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_mzml,
name='original_mzml',
output=mzml_files)
pipeline.transform(
task_func=stages.resample,
name='resample',
input=output_from('original_mzml'),
filter=suffix('.mzML'),
output='.resample.mzML')
pipeline.transform(
task_func=stages.noise_filter_sgolay,
name='noise_filter_sgolay',
input=output_from('resample'),
filter=suffix('.resample.mzML'),
output='.denoise.mzML')
pipeline.transform(
task_func=stages.baseline_filter,
name='baseline_filter',
input=output_from('noise_filter_sgolay'),
filter=suffix('.denoise.mzML'),
output='.baseline.mzML')
pipeline.transform(
task_func=stages.peak_picker_hires,
name='peak_picker_hires',
input=output_from('baseline_filter'),
filter=suffix('.baseline.mzML'),
output='.peaks.mzML')
pipeline.transform(
task_func=stages.feature_finder_centroid,
name='feature_finder_centroid',
input=output_from('peak_picker_hires'),
filter=suffix('.peaks.mzML'),
output='.featureXML')
return pipeline
示例11: test_newstyle_no_re_match
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_no_re_match (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task_1, tempdir + "a").mkdir(tempdir)
test_pipeline.transform(task_2, task_1, regex("b"), "task_2.output")
save_to_str_logger = t_save_to_str_logger()
test_pipeline.run(multiprocess = 10, logger = save_to_str_logger, verbose = 1)
print(save_to_str_logger.warning_str)
self.assertTrue("no file names matched" in save_to_str_logger.warning_str)
print("\n Warning printed out correctly", file=sys.stderr)
示例12: test_newstyle_no_re_match
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_no_re_match (self):
try:
test_pipeline = Pipeline("test")
test_pipeline.transform(task_func = task_2,
input = None,
filter = regex(tempdir + "b"),
replace_inputs = inputs(tempdir + "a", tempdir + "b"),
output = "task_1.output")
test_pipeline.run(multiprocess = 10, verbose = 0)
except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
print("\tExpected exception thrown 1")
return
except ruffus.ruffus_exceptions.error_inputs_multiple_args:
print("\tExpected exception thrown 2")
return
raise Exception("Inputs(...) with multiple arguments should have thrown an exception")
示例13: test_newstyle_task
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_task (self):
"""
Same as above but construct a new pipeline on the fly without decorators
"""
test_pipeline = Pipeline("test")
test_pipeline.files(task1, None, tempdir + 'a.1')\
.follows(mkdir(tempdir))
test_pipeline.transform(task_func = task2,
input = task1,
filter = regex(r".*"),
output = tempdir + 'b.1')
test_pipeline.files(task3, task2, tempdir + 'c.1')
test_pipeline.files(task4, [[None, tempdir + 'd.1'], [None, tempdir + 'e.1']])\
.follows(task3)
test_pipeline.files(task5, task4, tempdir + "f.1")
test_pipeline.run(multiprocess = 10, verbose = 0)
示例14: make_pipeline2
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def make_pipeline2( pipeline_name = "pipeline2"):
test_pipeline2 = Pipeline(pipeline_name)
test_pipeline2.transform(task_func = task_1_to_1,
# task name
name = "44_to_55",
# placeholder: will be replaced later with set_input()
input = None,
filter = suffix(".44"),
output = ".55")
test_pipeline2.merge( task_func = task_m_to_1,
input = test_pipeline2["44_to_55"],
output = tempdir + "/final.output",)
# Set head and tail
test_pipeline2.set_tail_tasks([test_pipeline2[task_m_to_1]])
if not DEBUG_do_not_define_head_task:
test_pipeline2.set_head_tasks([test_pipeline2["44_to_55"]])
return test_pipeline2
示例15: test_newstyle_mkdir_run
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import transform [as 別名]
def test_newstyle_mkdir_run(self):
test_pipeline = Pipeline("test")
test_pipeline.split(task_func = generate_initial_files1,
input = 1,
output = [tempdir + "/" + prefix + "_name.tmp1" for prefix in "abcd"])
test_pipeline.transform( task_func = test_transform,
input = generate_initial_files1,
filter = formatter(),
output = "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")\
.mkdir(tempdir + "/test1")\
.mkdir(tempdir + "/test2")\
.mkdir(generate_initial_files1, formatter(),
["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"])
test_pipeline.mkdir(test_transform2, tempdir + "/test3")\
.mkdir(generate_initial_files1, formatter(),
"{path[0]}/{basename[0]}.dir2")
cleanup_tmpdir()
pipeline_run([test_transform, test_transform2], verbose=0, multiprocess = 2, pipeline= "main")