本文整理汇总了Python中airflow.operators.python_operator.PythonOperator.set_upstream方法的典型用法代码示例。如果您正苦于以下问题:Python PythonOperator.set_upstream方法的具体用法?Python PythonOperator.set_upstream怎么用?Python PythonOperator.set_upstream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类airflow.operators.python_operator.PythonOperator
的用法示例。
在下文中一共展示了PythonOperator.set_upstream方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: PoseidonEmailFileUpdatedOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='tsw/sd_paving_imcat_datasd.csv',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: send file update email to interested parties
send_last_file_updated_email = PoseidonEmailFileUpdatedOperator(
task_id='send_last_file_updated',
to='[email protected],[email protected],[email protected]',
subject='IMCAT Streets File Updated',
file_url='http://{}/{}'.format(conf['dest_s3_bucket'],
'tsw/sd_paving_imcat_datasd.csv'),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Execution order
#: streets_latest_only must run before get_streets_data
get_streets_data.set_upstream(streets_latest_only)
#: upload_streets_data is dependent on successful run of get_streets_data
upload_streets_data.set_upstream(get_streets_data)
#: email notification is sent after the data was uploaded to S3
send_last_file_updated_email.set_upstream(upload_streets_data)
示例2: S3FileTransferOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
#: Upload prod file to S3
cfs_to_S3 = S3FileTransferOperator(
task_id='cfs_to_S3',
source_base_path=conf['prod_data_dir'],
source_key='pd_calls_for_service_'+curr_year+'_datasd.csv',
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_key='pd/pd_calls_for_service_'+curr_year+'_datasd.csv',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_pd_cfs_md = get_seaboard_update_dag('police-calls-for-service.md', dag)
#: Execution rules:
#: pd_cfs_latest_only must run before pd_cfs_data
get_cfs_data.set_upstream(pd_cfs_latest_only)
#: Data processing is triggered after data retrieval.
process_cfs_data.set_upstream(get_cfs_data)
#: Data upload to S3 is triggered after data processing completion.
cfs_to_S3.set_upstream(process_cfs_data)
#: Github update depends on S3 upload success.
update_pd_cfs_md.set_upstream(cfs_to_S3)
示例3: S3FileTransferOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
#: Uploads the generated production file
upload_traffic_counts = S3FileTransferOperator(
task_id='upload_traffic_counts',
source_base_path=conf['prod_data_dir'],
source_key='traffic_counts_datasd.csv',
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_key='traffic_counts/traffic_counts_datasd.csv',
replace=True,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_traffic_md = get_seaboard_update_dag('traffic-volumes.md', dag)
#: Execution Rules
#: traffic_counts_latest_only must run before get_traffic_counts
get_traffic_counts.set_upstream(tc_latest_only)
#: Cleaning task triggered after data retrieval.
clean_traffic_counts.set_upstream(get_traffic_counts)
#: Production build task triggered after cleaning task.
build_traffic_counts.set_upstream(clean_traffic_counts)
#: Data upload to S3 triggered after production build task.
upload_traffic_counts.set_upstream(build_traffic_counts)
#: Update .md file after S3 upload
update_traffic_md.set_upstream(upload_traffic_counts)
示例4: PythonOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
get_code_enf_files = PythonOperator(
task_id='get_code_enf_files',
python_callable=dfg.get_files,
op_kwargs={'fname_list': fname_list,
'target_dir': dsd_temp_dir},
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_code_enf_md = get_seaboard_update_dag('code-enforcement-violations.md', dag)
#: Execution rules
#: dsd_code_enf_latest_only must run before get_code_enf_files
get_code_enf_files.set_upstream(dsd_ce_latest_only)
for i in fname_list:
#: Create fme shell command
build_csv_task = BashOperator(
task_id='get_' + i,
bash_command=get_bash_command(i),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Set Task as Downstream for downloading files
build_csv_task.set_upstream(get_code_enf_files)
示例5: my_sleeping_function
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
def my_sleeping_function(random_base):
'''This is a function that will run within the DAG execution'''
time.sleep(random_base)
def print_context(ds, **kwargs):
pprint(kwargs)
print(ds)
return 'Whatever you return gets printed in the logs'
run_this = PythonOperator(
task_id='print_the_context',
provide_context=True,
python_callable=print_context,
dag=dag)
for i in range(10):
'''
Generating 10 sleeping task, sleeping from 0 to 9 seconds
respectively
'''
task = PythonOperator(
task_id='sleep_for_'+str(i),
python_callable=my_sleeping_function,
op_kwargs={'random_base': float(i)/10},
dag=dag)
task.set_upstream(run_this)
示例6: get_seaboard_update_dag
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
on_success_callback=notify,
dag=dag)
#: update permits.md file
update_permits_md = get_seaboard_update_dag('permits.md', dag)
#: update permits.md file
update_solar_md = get_seaboard_update_dag('solar-permits.md', dag)
#: Execution rules
#: dsd_permits_latest_only must run before get_permits_files
get_permits_files.set_upstream(dsd_permits_latest_only)
#: clean_data tasks are executed after get_approvals_files task
clean_data.set_upstream(get_permits_files)
#: upload_dsd tasks are executed after clean_data tasks
join_bids.set_upstream(clean_data)
#: subset_solar tasks are executed after clean_data tasks
subset_solar.set_upstream(join_bids)
#: upload_dsd tasks are executed after subset_solar tasks
upload_dsd_permits.set_upstream(subset_solar)
#: upload_dsd tasks are executed after clean_data tasks
upload_solar_permits.set_upstream(subset_solar)
示例7: PythonOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
check_updates_with_judges_task = PythonOperator(
task_id='check_updates_with_judges',
python_callable=check_updates_with_judges,
dag=dag)
def extract_name():
# TODO: Criar função para extrair o nome do juiz do texto
return None # http://blog.yhat.com/posts/named-entities-in-law-and-order-using-nlp.html
def check_name():
# TODO: Verificar o nome extraido
return None # Validar com uma base de nomes de JUIZES (portal da transparencia)
extract_name_task = PythonOperator(
task_id='extract_name_task',
python_callable=extract_name,
dag=dag)
check_name_task = PythonOperator(
task_id='check_name_task',
python_callable=check_name,
dag=dag)
extract_name_task.set_upstream(check_updates_with_judges_task)
check_name_task.set_upstream(extract_name_task)
示例8: DAG
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
df = df.astype('float64')
df.to_csv('dags/c2k_final.csv')
default_args = {
'owner': 'Israel Z',
'start_date': dt.datetime(2018, 5, 9),
'retries': 1,
'retry_delay': dt.timedelta(minutes=5),
}
with DAG('flow_pandas',
default_args=default_args,
schedule_interval='*/10 * * * *',
) as dag:
download = PythonOperator(task_id='download',
python_callable=download)
dropn = PythonOperator(task_id='dropn',
python_callable=dropn)
fill = PythonOperator(task_id='fill',
python_callable=fill)
cast = PythonOperator(task_id='cast',
python_callable=cast)
# Dependencies
dropn.set_upstream(download)
fill.set_upstream(dropn)
cast.set_upstream(fill)
示例9: DAG
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
dag = DAG(dag_id='sire_docs', default_args=args, start_date=start_date, schedule_interval=schedule)
sire_docs_latest_only = LatestOnlyOperator(task_id='sire_docs_latest_only', dag=dag)
#: Get sire tables
get_doc_tables = PythonOperator(
task_id='get_sire_tables',
python_callable=get_sire,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Execution rules
#: sire_docs_latest_only must run before get_doc_tables
get_doc_tables.set_upstream(sire_docs_latest_only)
files = [f for f in os.listdir(conf['prod_data_dir'])]
for f in files:
if f.split('_')[0] == "sire":
#: Upload sire prod files to S3
upload_doc_tables = S3FileTransferOperator(
task_id='upload_{}'.format(f),
source_base_path=conf['prod_data_dir'],
source_key=f,
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='city_docs/{}'.format(f),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
示例10: get_seaboard_update_dag
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_parking_trans_md = get_seaboard_update_dag('parking-meters-transactions.md', dag)
#: Execution Rules
#: parking_meters_latest_only must run before get_parking_files
get_parking_files.set_upstream(parking_meters_latest_only)
#: Download Files, build prod file.
#: build_prod_file depends on get_parking_files:
build_prod_file.set_upstream(get_parking_files)
#: Upload Prod File
#: upload_prod_file depends on build_prod_file
upload_prod_file.set_upstream(build_prod_file)
#: Build Aggs
#: build_by_month_aggregation depends on build_prod_file:
build_by_month_aggregation.set_upstream(build_prod_file)
#: build_by_day_aggregation depends on build_prod_file:
build_by_day_aggregation.set_upstream(build_prod_file)
#: Upload Aggs
示例11: S3FileTransferOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
on_success_callback=notify,
dag=dag)
#: Upload prod cip_datasd.csv file to S3
upload_cip_data = S3FileTransferOperator(
task_id='upload_cip_data',
source_base_path=conf['prod_data_dir'],
source_key='cip_{0}_datasd.csv'.format(fiscal_yr),
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='cip/' + 'cip_{0}_datasd.csv'.format(fiscal_yr),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: Update portal modified date
update_cip_md = get_seaboard_update_dag('cip.md', dag)
#: Execution order
#: cip_latest_only must run before get_cip_data
get_cip_data.set_upstream(cip_latest_only)
#: upload_cip_data is dependent on successful run of get_cip_data
upload_cip_data.set_upstream(get_cip_data)
#: upload_cip_data must succeed before updating github
update_cip_md.set_upstream(upload_cip_data)
示例12: BashOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
date_operator = BashOperator(
task_id='date_task',
bash_command='date',
dag=dag)
#-------------------------------------------------------------------------------
# second operator
sleep_operator = BashOperator(
task_id='sleep_task',
depends_on_past=False,
bash_command='sleep 5',
dag=dag)
#-------------------------------------------------------------------------------
# third operator
def print_hello():
return 'Hello world!'
hello_operator = PythonOperator(
task_id='hello_task',
python_callable=print_hello,
dag=dag)
#-------------------------------------------------------------------------------
# dependencies
sleep_operator.set_upstream(date_operator)
hello_operator.set_upstream(date_operator)
示例13:
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
source_base_path=conf['prod_data_dir'],
source_key='stormwater_violations_merged.geojson',
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='tsw_int/stormwater_violations_merged.geojson',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
replace=True,
dag=dag)
#: Execution rules
# Get VPM violations runs after latest only
get_vpm_violations.set_upstream(violations_latest_only)
# Get salesforce violations runs after latest only
get_sf_violations.set_upstream(violations_latest_only)
# Get pts violations runs after latest only
get_pts_violations.set_upstream(violations_latest_only)
# SW Violations merge runs after get_pts and get_sf
combine_sw_violations.set_upstream(get_sf_violations)
combine_sw_violations.set_upstream(get_pts_violations)
combine_sw_violations.set_upstream(get_vpm_violations)
# Upload of CSV happens after combine
violations_csv_to_s3.set_upstream(combine_sw_violations)
violations_geojson_to_s3.set_upstream(combine_sw_violations)
violations_csv_null_geos_to_s3.set_upstream(combine_sw_violations)
示例14: PythonOperator
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
get_task = PythonOperator(
task_id='get_' + machine_service_name,
python_callable=get_requests_service_name,
op_kwargs={
'service_name': service_name,
'machine_service_name': machine_service_name
},
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
service_tasks.append(get_task)
#: join_council_districts must run before get_task
get_task.set_upstream(create_prod_files)
if i == 'pothole':
#: get_task must run before sonar potholes
get_task.set_downstream(create_potholes_sonar)
filename = conf['prod_data_dir'] + "/get_it_done_*.csv"
files = [os.path.basename(x) for x in glob.glob(filename)]
for index, file_ in enumerate(files):
file_name = file_.split('.')[0]
name_parts = file_name.split('_')
task_name = '_'.join(name_parts[3:-2])
md_name = '-'.join(name_parts[3:-2])
#: Upload prod gid file to S3
示例15: puller
# 需要导入模块: from airflow.operators.python_operator import PythonOperator [as 别名]
# 或者: from airflow.operators.python_operator.PythonOperator import set_upstream [as 别名]
# pushes an XCom without a specific target, just by returning it
return value_2
def puller(**kwargs):
ti = kwargs['ti']
# get value_1
v1 = ti.xcom_pull(key=None, task_ids='push')
assert v1 == value_1
# get value_2
v2 = ti.xcom_pull(task_ids='push_by_returning')
assert v2 == value_2
# get both value_1 and value_2
v1, v2 = ti.xcom_pull(key=None, task_ids=['push', 'push_by_returning'])
assert (v1, v2) == (value_1, value_2)
push1 = PythonOperator(
task_id='push', dag=dag, python_callable=push)
push2 = PythonOperator(
task_id='push_by_returning', dag=dag, python_callable=push_by_returning)
pull = PythonOperator(
task_id='puller', dag=dag, python_callable=puller)
pull.set_upstream([push1, push2])