本文整理匯總了Python中airflow.operators.bash_operator.BashOperator.set_upstream方法的典型用法代碼示例。如果您正苦於以下問題:Python BashOperator.set_upstream方法的具體用法?Python BashOperator.set_upstream怎麽用?Python BashOperator.set_upstream使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類airflow.operators.bash_operator.BashOperator
的用法示例。
在下文中一共展示了BashOperator.set_upstream方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: DAG
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
from airflow.operators.bash_operator import BashOperator
from airflow.models import DAG
from datetime import timedelta
args = {
'owner': 'airflow',
'start_date': airflow.utils.dates.days_ago(3),
}
dag = DAG(
dag_id='perf_dag_1', default_args=args,
schedule_interval='@daily',
dagrun_timeout=timedelta(minutes=60))
task_1 = BashOperator(
task_id='perf_task_1',
bash_command='sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
dag=dag)
for i in range(2, 5):
task = BashOperator(
task_id='perf_task_{}'.format(i),
bash_command='''
sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"
''',
dag=dag)
task.set_upstream(task_1)
if __name__ == "__main__":
dag.cli()
示例2: datetime
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
# 'end_date': datetime(2016, 4, 24),
}
dag = DAG('undeploy_prediction_pmml', default_args=default_args)
# TODO: dockerFileTag and dockerFilePath should be passed in from webhook
switch_to_aws = BashOperator(
task_id='switch_to_aws',
bash_command='sudo kubectl config use-context awsdemo',
dag=dag)
undeploy_container_aws = BashOperator(
task_id='undeploy_container_to_aws',
bash_command='sudo kubectl delete prediction-pmml',
dag=dag)
switch_to_gcp = BashOperator(
task_id='switch_to_gcp',
bash_command='sudo kubectl config use-context gcpdemo',
dag=dag)
undeploy_container_gcp = BashOperator(
task_id='undeploy_container_gcp',
bash_command='sudo kubectl delete prediction-pmml',
dag=dag)
# Setup Airflow DAG
undeploy_container_aws.set_upstream(switch_to_aws)
switch_to_gcp.set_upstream(undeploy_container_aws)
undeploy_container_gcp.set_upstream(switch_to_gcp)
示例3: my_py_command
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
def my_py_command(ds, **kwargs):
# Print out the "foo" param passed in via
# `airflow test example_passing_params_via_test_command run_this <date>
# -tp '{"foo":"bar"}'`
if kwargs["test_mode"]:
print(" 'foo' was passed in via test={} command : kwargs[params][foo] \
= {}".format(kwargs["test_mode"], kwargs["params"]["foo"]))
# Print out the value of "miff", passed in below via the Python Operator
print(" 'miff' was passed in via task params = {}".format(kwargs["params"]["miff"]))
return 1
my_templated_command = """
echo " 'foo was passed in via Airflow CLI Test command with value {{ params.foo }} "
echo " 'miff was passed in via BashOperator with value {{ params.miff }} "
"""
run_this = PythonOperator(
task_id='run_this',
provide_context=True,
python_callable=my_py_command,
params={"miff":"agg"},
dag=dag)
also_run_this = BashOperator(
task_id='also_run_this',
bash_command=my_templated_command,
params={"miff":"agg"},
dag=dag)
also_run_this.set_upstream(run_this)
示例4: create_sde_tasks
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
#.........這裏部分代碼省略.........
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: Upload topojson to S3
topojson_to_S3 = S3FileTransferOperator(
task_id='{layer}_topojson_to_S3'.format(layer=layer),
source_base_path=conf['prod_data_dir'],
source_key='{datasd_name}.topojson'.format(datasd_name=datasd_name),
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='sde/{folder}/{datasd_name}.topojson'
.format(folder=folder, datasd_name=datasd_name),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: Update portal modified date
update_md = get_seaboard_update_dag('{md}.md'.format(md=md), dag)
if layer not in no_pbf:
#: Convert GeoJSON to Geobuf format
to_geobuf = PythonOperator(
task_id='{layer}_to_geobuf'.format(layer=layer),
python_callable=geojson_to_geobuf,
op_kwargs={'path_to_file': path_to_file},
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Convert geobuf to gzipped geobuf
to_gzip = PythonOperator(
task_id='{layer}_geobuf_to_gzip'.format(layer=layer),
python_callable=geobuf_to_gzip,
op_kwargs={'datasd_name': datasd_name},
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Upload geobuf to S3
geobuf_to_S3 = S3FileTransferOperator(
task_id='{layer}_geobuf_to_S3'.format(layer=layer),
source_base_path=conf['prod_data_dir'],
source_key='{datasd_name}.pbf'.format(datasd_name=datasd_name),
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='sde/{folder}/{datasd_name}.pbf'
.format(folder=folder, datasd_name=datasd_name),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
use_gzip=True,
dag=dag)
#: Conversion to geobuf is triggered after conversion to geojson.
to_geobuf.set_upstream(to_geojson)
#: Compression to gzip is triggered after conversion to geobuf.
to_gzip.set_upstream(to_geobuf)
#: geobuf upload to S3 is triggered after compression to gzipped geobuf.
geobuf_to_S3.set_upstream(to_gzip)
#: Github update depends on shapefile S3 upload success.
update_md.set_upstream(geobuf_to_S3)
#: Execution rules:
#: sde_latest_only must run before shp conversion.
to_shp.set_upstream(sde_latest_only)
#: Conversion to geojson is triggered after conversion to shp.
to_geojson.set_upstream(to_shp)
#: Conversion to topojson is triggered after conversion to shapefile.
to_topojson.set_upstream(to_shp)
#: Compression to zip is triggered after conversion to geojson and topojson.
to_zip.set_upstream(to_geojson)
to_zip.set_upstream(to_topojson)
#: shapefile upload to S3 is triggered after conversion to zip.
shp_to_S3.set_upstream(to_zip)
#: geojson upload to S3 is triggered after conversion to geojson.
geojson_to_S3.set_upstream(to_geojson)
#: topojson upload to S3 is triggered after conversion to topojson.
topojson_to_S3.set_upstream(to_topojson)
#: Github update depends on shapefile S3 upload success.
update_md.set_upstream(shp_to_S3)
update_md.set_upstream(geojson_to_S3)
update_md.set_upstream(topojson_to_S3)
示例5: timedelta
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
}
THE_HUMAN_GENOME = "/Users/mlyons/genomics/reference/human_g1k_v37.fasta"
BAM_DIR = "/Users/mlyons/genomics/1kg/bam"
BIN_DIR = "/Users/mlyons/genomics/bin"
simple_mapping_pipeline = DAG(dag_id="simple_mapping_pipeline", default_args=default_args, schedule_interval=timedelta(minutes=2))
# figure out some sensor to look for a fastq file to map
fastq_sensor = FastqSensor(directory="/Users/mlyons/genomics/1kg/unprocessed_fastq",
dag=simple_mapping_pipeline,
task_id='fastq_sensor',
poke_interval=60)
"""bwa mem {{ path_to_reference_file }} {{ ti.xcom_pull('unmapped_fastq') }} > {{ path_to_output }}/{{ task_instance_key_str }}.sam"""
bwa_mem = BashOperator(bash_command=BWA_MEM_COMMAND,
params={'path_to_reference_file': THE_HUMAN_GENOME,
'path_to_output': BAM_DIR,
'bin': BIN_DIR},
dag=simple_mapping_pipeline,
task_id='bwa_mem',
wait_for_downstream=False)
bwa_mem.set_upstream(fastq_sensor)
示例6: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
#: dsd_code_enf_latest_only must run before get_code_enf_files
get_code_enf_files.set_upstream(dsd_ce_latest_only)
for i in fname_list:
#: Create fme shell command
build_csv_task = BashOperator(
task_id='get_' + i,
bash_command=get_bash_command(i),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Set Task as Downstream for downloading files
build_csv_task.set_upstream(get_code_enf_files)
#: Create S3 Upload task
s3_task = S3FileTransferOperator(
task_id='upload_' + i,
source_base_path=conf['prod_data_dir'],
source_key=i + '_datasd.csv',
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_key='dsd/' + i + '_datasd.csv',
replace=True,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
示例7: timedelta
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
}
dag = DAG('test_1', default_args=default_args)
# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
task_id='step1',
bash_command='echo start',
dag=dag)
template_command='''
sh step2.sh
'''
t2 = BashOperator(
task_id='step2',
bash_command=template_command,
retries=3,
dag=dag)
t2.set_upstream(t1)
示例8: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
for toople in chromosome_split_operators.iteritems():
region, operator = toople
outfile = "{WORK_DIR}/{region}.vcf"
freebayes_by_region = BashOperator(bash_command=freebayes_command,
params={
'reference': "/path/to/human.fasta",
'outfile': outfile,
'region': region,
'opts': default_args['freebayes'],
'in_bam': "{WORK_DIR}/{region}.bam".format(**locals())
},
dag=BAKE_OFF_PIPE,
task_id="{region}_freebayes".format(**locals())
)
freebayes_operators[region] = freebayes_by_region
freebayes_by_region.set_upstream(operator)
# now merge
vcf_concat_command = """vcf-concat-parts {{ in_files }} | vcf-sort > {{ outfile }}"""
infiles = []
for toople in freebayes_operators.iteritems():
region, operator = toople
infiles.append("{WORK_DIR}/{region}.vcf".format(**locals()))
concat_operator = BashOperator(bash_command=vcf_concat_command,
params={
'in_files': ' '.join(infiles),
'outfile': 'concatted.vcf'
},
示例9: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
y1_task = BashOperator(
task_id='y1',
bash_command='sleep 1 && echo [y1 start]',
dag=dag)
y2_task = BashOperator(
task_id='y2',
bash_command='sleep 2 && echo [y2 start]',
dag=dag)
segmentation_task = BashOperator(
task_id='segmentation',
bash_command='sleep 1 && echo [segmentation start]',
dag=dag)
merge_task.set_upstream(xlsx_to_csv_task)
cleansing_task.set_upstream(merge_task)
x1_task.set_upstream(cleansing_task)
x2_task.set_upstream(cleansing_task)
x3_task.set_upstream(cleansing_task)
y1_task.set_upstream(cleansing_task)
y2_task.set_upstream(cleansing_task)
x1_task.set_downstream(segmentation_task)
x2_task.set_downstream(segmentation_task)
x3_task.set_downstream(segmentation_task)
y1_task.set_downstream(segmentation_task)
y2_task.set_downstream(segmentation_task)
示例10: get_seaboard_update_dag
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_key='parking_meters/' + flist['by_day'],
replace=True,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_parking_trans_md = get_seaboard_update_dag('parking-meters-transactions.md', dag)
#: Execution Rules
#: parking_meters_latest_only must run before get_parking_files
get_parking_files.set_upstream(parking_meters_latest_only)
#: Download Files, build prod file.
#: build_prod_file depends on get_parking_files:
build_prod_file.set_upstream(get_parking_files)
#: Upload Prod File
#: upload_prod_file depends on build_prod_file
upload_prod_file.set_upstream(build_prod_file)
#: Build Aggs
#: build_by_month_aggregation depends on build_prod_file:
build_by_month_aggregation.set_upstream(build_prod_file)
示例11: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
# Run a simple PySpark Script
pyspark_local_task_one = BashOperator(
task_id = "pyspark_local_task_one",
bash_command = """spark-submit \
--master {{ params.master }}
{{ params.base_path }}/{{ params.filename }} {{ ts }} {{ params.base_path }}""",
params = {
"master": "local[8]",
"filename": "ch02/pyspark_task_one.py",
"base_path": "{}/".format(project_home)
},
dag=dag
)
# Run another simple PySpark Script that depends on the previous one
pyspark_local_task_two = BashOperator(
task_id = "pyspark_local_task_two",
bash_command = """spark-submit \
--master {{ params.master }}
{{ params.base_path }}/{{ params.filename }} {{ ts }} {{ params.base_path }}""",
params = {
"master": "local[8]",
"filename": "ch02/pyspark_task_two.py",
"base_path": "{}/".format(project_home)
},
dag=dag
)
# Add the dependency from the second to the first task
pyspark_local_task_two.set_upstream(pyspark_local_task_one)
示例12: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
date_operator = BashOperator(
task_id='date_task',
bash_command='date',
dag=dag)
#-------------------------------------------------------------------------------
# second operator
sleep_operator = BashOperator(
task_id='sleep_task',
depends_on_past=False,
bash_command='sleep 5',
dag=dag)
#-------------------------------------------------------------------------------
# third operator
def print_hello():
return 'Hello world!'
hello_operator = PythonOperator(
task_id='hello_task',
python_callable=print_hello,
dag=dag)
#-------------------------------------------------------------------------------
# dependencies
sleep_operator.set_upstream(date_operator)
hello_operator.set_upstream(date_operator)
示例13:
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
source_base_path=conf['prod_data_dir'],
source_key='stormwater_violations_merged.geojson',
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='tsw_int/stormwater_violations_merged.geojson',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: Execution rules
# Get VPM violations runs after latest only
get_vpm_violations.set_upstream(violations_latest_only)
# Get salesforce violations runs after latest only
get_sf_violations.set_upstream(violations_latest_only)
# Get pts violations runs after latest only
get_pts_violations.set_upstream(violations_latest_only)
# SW Violations merge runs after get_pts and get_sf
combine_sw_violations.set_upstream(get_sf_violations)
combine_sw_violations.set_upstream(get_pts_violations)
combine_sw_violations.set_upstream(get_vpm_violations)
# Upload of CSV happens after combine
violations_csv_to_s3.set_upstream(combine_sw_violations)
violations_geojson_to_s3.set_upstream(combine_sw_violations)
violations_csv_null_geos_to_s3.set_upstream(combine_sw_violations)
示例14: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
default_args=default_args)
t1 = BashOperator(
task_id='print_date',
bash_command='date',
dag=dag)
t2 = PythonOperator(
task_id='run_job',
python_callable=run_job,
op_args=('75588', 300, '8edd9e11f4de44b39f666777ac79bfe1'),
retries=1,
dag=dag)
templated_command = """
{% for i in range(5) %}
echo "{{ ds }}"
echo "{{ macros.ds_add(ds, 7)}}"
echo "{{ params.my_param }}"
{% endfor %}
"""
t3 = BashOperator(
task_id='templated',
bash_command=templated_command,
params={'my_param': 'Parameter I passed in'},
dag=dag)
t2.set_upstream(t1)
t3.set_upstream(t1)
示例15: BashOperator
# 需要導入模塊: from airflow.operators.bash_operator import BashOperator [as 別名]
# 或者: from airflow.operators.bash_operator.BashOperator import set_upstream [as 別名]
local_dir = "/tmp/"
# define the location where you want to store in HDFS
hdfs_dir = " /tmp/"
for channel in to_channels:
file_name = "to_" + channel + "_" + yesterday.strftime("%Y-%m-%d") + ".csv"
load_to_hdfs = BashOperator(
task_id="put_" + channel + "_to_hdfs",
bash_command="HADOOP_USER_NAME=hdfs hadoop fs -put -f " +
local_dir + file_name +
hdfs_dir + channel + "/",
dag=dag)
load_to_hdfs.set_upstream(analyze_tweets)
load_to_hive = HiveOperator(
task_id="load_" + channel + "_to_hive",
hql="LOAD DATA INPATH '" +
hdfs_dir + channel + "/" + file_name + "' "
"INTO TABLE " + channel + " "
"PARTITION(dt='" + dt + "')",
dag=dag)
load_to_hive.set_upstream(load_to_hdfs)
load_to_hive.set_downstream(hive_to_mysql)
for channel in from_channels:
file_name = "from_" + channel + "_" + yesterday.strftime("%Y-%m-%d") + ".csv"
load_to_hdfs = BashOperator(
task_id="put_" + channel + "_to_hdfs",