本文整理匯總了Python中ruffus.Pipeline.originate方法的典型用法代碼示例。如果您正苦於以下問題:Python Pipeline.originate方法的具體用法?Python Pipeline.originate怎麽用?Python Pipeline.originate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類ruffus.Pipeline
的用法示例。
在下文中一共展示了Pipeline.originate方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_ruffus (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(start_task, ["a.1", "b.1"])
test_pipeline.transform(same_file_name_task, start_task, suffix(".1"), ".1")
test_pipeline.transform(linked_file_name_task, start_task, suffix(".1"), ".linked.1")
test_pipeline.transform(final_task, [linked_file_name_task, same_file_name_task], suffix(".1"), ".3")
test_pipeline.run(log_exceptions = True, verbose = 0)
示例2: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_ruffus(self):
# alternative syntax
test_pipeline = Pipeline("test")
test_pipeline.mkdir(data_dir, work_dir)
test_pipeline.originate(task_func=task1,
output=[os.path.join(data_dir, "%s.1" % aa) for aa in "abcd"])
test_pipeline.mkdir(filter=suffix(".1"),
output=".dir",
output_dir=work_dir)
test_pipeline.transform(task_func=task2,
input=task1,
filter=suffix(".1"),
output=[".1", ".bak"],
extras=["extra.tst", 4, r"orig_dir=\1"],
output_dir=work_dir)
test_pipeline.subdivide(task3, task2, suffix(
".1"), r"\1.*.2", [r"\1.a.2", r"\1.b.2"], output_dir=data_dir)
test_pipeline.transform(task4, task3, suffix(
".2"), ".3", output_dir=work_dir)
test_pipeline.merge(task5, task4, os.path.join(data_dir, "summary.5"))
test_pipeline.run(multiprocess=50, verbose=0)
with open(os.path.join(data_dir, "summary.5")) as ii:
active_text = ii.read()
if active_text != expected_active_text:
raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
(expected_active_text, active_text))
示例3: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_ruffus (self):
print(" Run pipeline normally...")
test_pipeline = Pipeline("test")
test_pipeline.originate(make_start, [tempdir + 'start'])
test_pipeline.split(split_start, make_start, tempdir + '*.split')
test_pipeline.subdivide(subdivide_start, split_start, formatter(), tempdir + '{basename[0]}_*.subdivided', tempdir + '{basename[0]}')
if self.graph_viz_present:
test_pipeline.printout_graph(tempdir + "flowchart.dot")
test_pipeline.printout_graph(tempdir + "flowchart.jpg",
target_tasks =[subdivide_start],
forcedtorun_tasks = [split_start],
no_key_legend = True)
test_pipeline.printout_graph(tempdir + "flowchart.svg", no_key_legend = False)
# Unknown format
try:
test_pipeline.printout_graph(tempdir + "flowchart.unknown", no_key_legend = False)
raise Exception("Failed to throw exception for test_pipeline.printout_graph unknown extension ")
except CalledProcessError as err:
pass
test_pipeline.printout_graph(tempdir + "flowchart.unknown", "svg", no_key_legend = False)
else:
test_pipeline.printout_graph(tempdir + "flowchart.dot",
target_tasks =[subdivide_start],
forcedtorun_tasks = [split_start],
no_key_legend = True)
示例4: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='md5')
# Get a list of paths to all the input files
input_files = state.config.get_option('files')
# Stages are dependent on the state
stages = Stages(state)
# The original FASTQ files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_files,
name='original_files',
output=input_files)
# Align paired end reads in FASTQ to the reference producing a BAM file
pipeline.transform(
task_func=stages.md5_checksum,
name='md5_checksum',
input=output_from('original_files'),
filter=suffix(''),
output='.md5')
return pipeline
示例5: test_newstyle_simpler
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_simpler (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task1, input_file_names, extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task2, task1, suffix(".1"), ".2", extras = [logger_proxy, logging_mutex])
test_pipeline.transform(task3, task2, suffix(".2"), ".3", extras = [logger_proxy, logging_mutex])
test_pipeline.merge(task4, task3, final_file_name, extras = [logger_proxy, logging_mutex])
#test_pipeline.merge(task4, task3, final_file_name, extras = {"logger_proxy": logger_proxy, "logging_mutex": logging_mutex})
test_pipeline.run(multiprocess = 500, verbose = 0)
示例6: test_newstyle_collate
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_collate(self):
"""
As above but create pipeline on the fly using object orientated syntax rather than decorators
"""
#
# Create pipeline on the fly, joining up tasks
#
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func=generate_initial_files,
output=original_files)\
.mkdir(tempdir, tempdir+"/test")
test_pipeline.subdivide(task_func=split_fasta_file,
input=generate_initial_files,
# match original files
filter=regex(r".*\/original_(\d+).fa"),
output=[tempdir + r"/files.split.\1.success", # flag file for each original file
tempdir + r"/files.split.\1.*.fa"], # glob pattern
extras=[r"\1"])\
.posttask(lambda: sys.stderr.write("\tSplit into %d files each\n" % JOBS_PER_TASK))
test_pipeline.transform(task_func=align_sequences,
input=split_fasta_file,
filter=suffix(".fa"),
output=".aln") \
.posttask(lambda: sys.stderr.write("\tSequences aligned\n"))
test_pipeline.transform(task_func=percentage_identity,
input=align_sequences, # find all results from align_sequences
# replace suffix with:
filter=suffix(".aln"),
output=[r".pcid", # .pcid suffix for the result
r".pcid_success"] # .pcid_success to indicate job completed
)\
.posttask(lambda: sys.stderr.write("\t%Identity calculated\n"))
test_pipeline.collate(task_func=combine_results,
input=percentage_identity,
filter=regex(r".*files.split\.(\d+)\.\d+.pcid"),
output=[tempdir + r"/\1.all.combine_results",
tempdir + r"/\1.all.combine_results_success"])\
.posttask(lambda: sys.stderr.write("\tResults recombined\n"))
#
# Cleanup, printout and run
#
self.cleanup_tmpdir()
s = StringIO()
test_pipeline.printout(s, [combine_results],
verbose=5, wrap_width=10000)
self.assertTrue(re.search(
'Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None)
test_pipeline.run(verbose=0)
示例7: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_ruffus(self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func=make_start,
output=[tempdir + 'start'])
test_pipeline.split(task_func=split_start,
input=make_start, output=tempdir + '*.split')
test_pipeline.subdivide(task_func=subdivide_start, input=split_start, filter=formatter(
), output=tempdir + '{basename[0]}_*.subdivided', extras=[tempdir + '{basename[0]}'])
expected_files_after_1_runs = ["start", "0.split", "0_0.subdivided"]
expected_files_after_2_runs = [
"1.split", "0_1.subdivided", "1_0.subdivided"]
expected_files_after_3_runs = [
"2.split", "0_2.subdivided", "1_1.subdivided", "2_0.subdivided"]
expected_files_after_4_runs = [
"3.split", "0_3.subdivided", "1_2.subdivided", "2_1.subdivided", "3_0.subdivided"]
print(" 1 Run pipeline normally...")
test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
expected_files_after_2_runs)
print(" 2 Check that running again does nothing. (All up to date).")
test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs,
expected_files_after_2_runs)
time.sleep(2)
print(" 3 Running again with forced tasks to generate more files...")
test_pipeline.run(forcedtorun_tasks=[
"test::make_start"], multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
+ expected_files_after_2_runs,
expected_files_after_3_runs)
print(" 4 Check that running again does nothing. (All up to date).")
test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
+ expected_files_after_2_runs,
expected_files_after_3_runs)
time.sleep(2)
print(" 5 Running again with forced tasks to generate even more files...")
test_pipeline.run(forcedtorun_tasks=make_start,
multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
+ expected_files_after_2_runs
+ expected_files_after_3_runs,
expected_files_after_4_runs)
print(" 6 Check that running again does nothing. (All up to date).")
test_pipeline.run(multiprocess=10, verbose=TEST_VERBOSITY)
self.check_file_exists_or_not_as_expected(expected_files_after_1_runs
+ expected_files_after_2_runs
+ expected_files_after_3_runs,
expected_files_after_4_runs)
示例8: make_pipeline1
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def make_pipeline1(pipeline_name, # Pipelines need to have a unique name
starting_file_names):
test_pipeline = Pipeline(pipeline_name)
# We can change the starting files later using
# set_input() for transform etc.
# or set_output() for originate
# But it can be more convenient to just pass this to the function making the pipeline
#
test_pipeline.originate(task_originate, starting_file_names)\
.follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\
.posttask(touch_file(tempdir + "/testdir/whatever.txt"))
test_pipeline.transform(task_func=task_m_to_1,
name="add_input",
# Lookup Task from function name task_originate()
# So long as this is unique in the pipeline
input=task_originate,
# requires an anchor from 3.7 onwards, see
# https://bugs.python.org/issue34982
filter=regex(r"^(.*)"),
add_inputs=add_inputs(
tempdir + "/testdir/whatever.txt"),
output=r"\1.22")
test_pipeline.transform(task_func=task_1_to_1,
name="22_to_33",
# Lookup Task from Task name
# Function name is not unique in the pipeline
input=output_from("add_input"),
filter=suffix(".22"),
output=".33")
tail_task = test_pipeline.transform(task_func=task_1_to_1,
name="33_to_44",
# Ask Pipeline to lookup Task from Task name
input=test_pipeline["22_to_33"],
filter=suffix(".33"),
output=".44")
# Set the tail task so that users of my sub pipeline can use it as a dependency
# without knowing the details of task names
#
# Use Task() object directly without having to lookup
test_pipeline.set_tail_tasks([tail_task])
# If we try to connect a Pipeline without tail tasks defined, we have to
# specify the exact task within the Pipeline.
# Otherwise Ruffus will not know which task we mean and throw an exception
if DEBUG_do_not_define_tail_task:
test_pipeline.set_tail_tasks([])
# Set the head task so that users of my sub pipeline send input into it
# without knowing the details of task names
test_pipeline.set_head_tasks([test_pipeline[task_originate]])
return test_pipeline
示例9: test_newstyle_no_re_match
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_no_re_match (self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task_1, tempdir + "a").mkdir(tempdir)
test_pipeline.transform(task_2, task_1, regex("b"), "task_2.output")
save_to_str_logger = t_save_to_str_logger()
test_pipeline.run(multiprocess = 10, logger = save_to_str_logger, verbose = 1)
print(save_to_str_logger.warning_str)
self.assertTrue("no file names matched" in save_to_str_logger.warning_str)
print("\n Warning printed out correctly", file=sys.stderr)
示例10: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='test_pipeline')
# Get a list of paths to all the FASTQ files
input_files = state.config.get_option('files')
# Stages are dependent on the state
stages = Stages(state)
# The original files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_files,
name='original_files',
output=input_files)
pipeline.transform(
task_func=stages.stage1,
name='stage1',
input=output_from('original_files'),
filter=suffix('.0'),
output='.1')
pipeline.transform(
task_func=stages.stage2,
name='stage2',
input=output_from('stage1'),
filter=suffix('.1'),
output='.2')
pipeline.transform(
task_func=stages.stage3,
name='stage3',
input=output_from('stage2'),
filter=suffix('.2'),
output='.3')
pipeline.transform(
task_func=stages.stage4,
name='stage4',
input=output_from('stage3'),
filter=suffix('.3'),
output='.4')
pipeline.transform(
task_func=stages.stage5,
name='stage5',
input=output_from('stage4'),
filter=suffix('.4'),
output='.5')
return pipeline
示例11: make_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def make_pipeline(state):
'''Build the pipeline by constructing stages and connecting them together'''
# Build an empty pipeline
pipeline = Pipeline(name='twin ion')
# Get a list of paths to all the MZML files
mzml_files = state.config.get_option('mzml')
# Stages are dependent on the state
stages = Stages(state)
# The original MZML files
# This is a dummy stage. It is useful because it makes a node in the
# pipeline graph, and gives the pipeline an obvious starting point.
pipeline.originate(
task_func=stages.original_mzml,
name='original_mzml',
output=mzml_files)
pipeline.transform(
task_func=stages.resample,
name='resample',
input=output_from('original_mzml'),
filter=suffix('.mzML'),
output='.resample.mzML')
pipeline.transform(
task_func=stages.noise_filter_sgolay,
name='noise_filter_sgolay',
input=output_from('resample'),
filter=suffix('.resample.mzML'),
output='.denoise.mzML')
pipeline.transform(
task_func=stages.baseline_filter,
name='baseline_filter',
input=output_from('noise_filter_sgolay'),
filter=suffix('.denoise.mzML'),
output='.baseline.mzML')
pipeline.transform(
task_func=stages.peak_picker_hires,
name='peak_picker_hires',
input=output_from('baseline_filter'),
filter=suffix('.baseline.mzML'),
output='.peaks.mzML')
pipeline.transform(
task_func=stages.feature_finder_centroid,
name='feature_finder_centroid',
input=output_from('peak_picker_hires'),
filter=suffix('.peaks.mzML'),
output='.featureXML')
return pipeline
示例12: test_newstyle_ruffus
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_ruffus(self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func=task1,
output=[tempdir + 'a.1'] + runtime_files)
test_pipeline.transform(task2, task1, suffix(".1"), ".2")
test_pipeline.transform(task_func=task3,
input=task2,
filter=suffix(".2"),
output=".3")
test_pipeline.transform(task_func=task4,
input=runtime_parameter("a"),
filter=suffix(".3"),
output=".4").follows(task3)
test_pipeline.run(verbose=0, runtime_data={"a": runtime_files})
示例13: test_newstyle_mkdir
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_newstyle_mkdir (self):
test_pipeline = Pipeline("test")
test_pipeline.follows(task_which_makes_directories,
mkdir(directories),
mkdir(unicode(tempdir + "c")),
mkdir(unicode(tempdir + "d"),
unicode(tempdir + "e")),
mkdir(unicode(tempdir + "e")))\
.posttask(touch_file(unicode(tempdir + "f")))
test_pipeline.originate(task_which_makes_files, [tempdir + "g", tempdir + "h"])
test_pipeline.run(multiprocess = 10, verbose = 0)
for d in 'abcdefgh':
fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
self.assertTrue(os.path.exists(fullpath))
示例14: test_transform_with_missing_formatter_args_b
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def test_transform_with_missing_formatter_args_b(self):
test_pipeline = Pipeline("test")
test_pipeline.originate(task_func = generate_initial_files,
output = [os.path.join(tempdir, ff + ".tmp") for ff in "abcd"])\
.mkdir(tempdir)
test_pipeline.transform(task_func = transform_with_missing_formatter_args,
input = generate_initial_files,
filter = formatter(),
output = "{path[0]}/{basename[0]}.task1",
extras =['echo {dynamic_message} > {some_file}'])
s = StringIO()
test_pipeline.printout(s, [transform_with_missing_formatter_args], verbose=4, wrap_width = 10000, pipeline= "test")
self.assertIn("Missing key = {dynamic_message}", s.getvalue())
#log to stream
s = StringIO()
logger = t_stream_logger(s)
test_pipeline.run([transform_with_missing_formatter_args], verbose=5, pipeline= "test", logger=logger)
self.assertIn("Missing key = {dynamic_message}", s.getvalue())
示例15: create_pipeline
# 需要導入模塊: from ruffus import Pipeline [as 別名]
# 或者: from ruffus.Pipeline import originate [as 別名]
def create_pipeline (self):
#each pipeline has a different name
global cnt_pipelines
cnt_pipelines = cnt_pipelines + 1
test_pipeline = Pipeline("test %d" % cnt_pipelines)
test_pipeline.originate(task_func = generate_initial_files1,
output = [tempdir + prefix + "_name.tmp1" for prefix in "abcd"])
test_pipeline.originate(task_func = generate_initial_files2,
output = [tempdir + "e_name.tmp1", tempdir + "f_name.tmp1"])
test_pipeline.originate(task_func = generate_initial_files3,
output = [tempdir + "g_name.tmp1", tempdir + "h_name.tmp1"])
test_pipeline.originate(task_func = generate_initial_files4,
output = tempdir + "i_name.tmp1")
test_pipeline.collate( task_func = test_task2,
input = [generate_initial_files1,
generate_initial_files2,
generate_initial_files3,
generate_initial_files4],
filter = formatter(),
output = "{path[0]}/all.tmp2")
test_pipeline.transform(task_func = test_task3,
input = test_task2,
filter = suffix(".tmp2"),
output = ".tmp3")
test_pipeline.transform(task_func = test_task4,
input = test_task3,
filter = suffix(".tmp3"),
output = ".tmp4")
return test_pipeline