本文整理汇总了Python中airflow.operators.python_operator.PythonOperator类的典型用法代码示例。如果您正苦于以下问题:Python PythonOperator类的具体用法?Python PythonOperator怎么用?Python PythonOperator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PythonOperator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_python_callable_keyword_arguments_are_templatized
def test_python_callable_keyword_arguments_are_templatized(self):
"""Test PythonOperator op_kwargs are templatized"""
recorded_calls = []
task = PythonOperator(
task_id='python_operator',
# a Mock instance cannot be used as a callable function or test fails with a
# TypeError: Object of type Mock is not JSON serializable
python_callable=(build_recording_function(recorded_calls)),
op_kwargs={
'an_int': 4,
'a_date': date(2019, 1, 1),
'a_templated_string': "dag {{dag.dag_id}} ran on {{ds}}."
},
dag=self.dag)
self.dag.create_dagrun(
run_id='manual__' + DEFAULT_DATE.isoformat(),
execution_date=DEFAULT_DATE,
start_date=DEFAULT_DATE,
state=State.RUNNING
)
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
self.assertEqual(1, len(recorded_calls))
self._assertCallsEqual(
recorded_calls[0],
Call(an_int=4,
a_date=date(2019, 1, 1),
a_templated_string="dag {} ran on {}.".format(
self.dag.dag_id, DEFAULT_DATE.date().isoformat()))
)
示例2: test_python_operator_run
def test_python_operator_run(self):
"""Tests that the python callable is invoked on task run."""
task = PythonOperator(
python_callable=self.do_run,
task_id='python_operator',
dag=self.dag)
self.assertFalse(self.is_run())
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
self.assertTrue(self.is_run())
示例3: test_echo_env_variables
def test_echo_env_variables(self):
"""
Test that env variables are exported correctly to the
python callback in the task.
"""
self.dag.create_dagrun(
run_id='manual__' + DEFAULT_DATE.isoformat(),
execution_date=DEFAULT_DATE,
start_date=DEFAULT_DATE,
state=State.RUNNING,
external_trigger=False,
)
t = PythonOperator(task_id='hive_in_python_op',
dag=self.dag,
python_callable=self._env_var_check_callback
)
t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
示例4: DAG
#: Dag spec for dsd permits
dag = DAG(dag_id='dsd_permits',
default_args=args,
start_date=start_date,
schedule_interval=schedule)
#: Latest Only Operator for dsd permits.
dsd_permits_latest_only = LatestOnlyOperator(
task_id='dsd_permits_latest_only', dag=dag)
#: Get permits reports
get_permits_files = PythonOperator(
task_id='get_permits_files',
python_callable=get_permits_files,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Clean permits reports
clean_data = PythonOperator(
task_id='clean_data',
python_callable=clean_data,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Join BIDs to permits
join_bids = PythonOperator(
示例5: direction
# output files generated by this task and naming convention
# is direction(from or to)_twitterHandle_date.csv
# --------------------------------------------------------------------------------
fetch_tweets = PythonOperator(
task_id='fetch_tweets',
python_callable=fetchtweets,
dag=dag)
# --------------------------------------------------------------------------------
# Clean the eight files. In this step you can get rid of or cherry pick columns
# and different parts of the text
# --------------------------------------------------------------------------------
clean_tweets = PythonOperator(
task_id='clean_tweets',
python_callable=cleantweets,
dag=dag)
clean_tweets.set_upstream(fetch_tweets)
# --------------------------------------------------------------------------------
# In this section you can use a script to analyze the twitter data. Could simply
# be a sentiment analysis through algorithms like bag of words or something more
# complicated. You can also take a look at Web Services to do such tasks
# --------------------------------------------------------------------------------
analyze_tweets = PythonOperator(
task_id='analyze_tweets',
python_callable=analyzetweets,
dag=dag)
示例6: datetime
DEFAULT_DATE = datetime(2016, 1, 1)
default_args = dict(
start_date=DEFAULT_DATE,
owner='airflow')
def fail():
raise ValueError('Expected failure.')
def success(ti=None, *args, **kwargs):
if ti.execution_date != DEFAULT_DATE + timedelta(days=1):
fail()
return
# DAG tests that tasks ignore all dependencies
dag1 = DAG(dag_id='test_run_ignores_all_dependencies', default_args=dict(depends_on_past=True, **default_args))
dag1_task1 = PythonOperator(
task_id='test_run_dependency_task',
python_callable=fail,
dag=dag1,)
dag1_task2 = PythonOperator(
task_id='test_run_dependent_task',
python_callable=success,
provide_context=True,
dag=dag1,)
dag1_task1.set_downstream(dag1_task2)
示例7: BashOperator
#: Get CFS data from FTP and save to temp folder
get_cfs_data = BashOperator(
task_id='get_cfs_data',
bash_command=get_cfs_data(),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Process CFS data and save result to prod folder
process_cfs_data = PythonOperator(
task_id='process_cfs_data',
python_callable=process_cfs_data,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Upload prod file to S3
cfs_to_S3 = S3FileTransferOperator(
task_id='cfs_to_S3',
source_base_path=conf['prod_data_dir'],
source_key='pd_calls_for_service_'+curr_year+'_datasd.csv',
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_key='pd/pd_calls_for_service_'+curr_year+'_datasd.csv',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
示例8: my_sleeping_function
def my_sleeping_function(random_base):
'''This is a function that will run within the DAG execution'''
time.sleep(random_base)
def print_context(ds, **kwargs):
pprint(kwargs)
print(ds)
return 'Whatever you return gets printed in the logs'
run_this = PythonOperator(
task_id='print_the_context',
provide_context=True,
python_callable=print_context,
dag=dag)
for i in range(10):
'''
Generating 10 sleeping task, sleeping from 0 to 9 seconds
respectively
'''
task = PythonOperator(
task_id='sleep_for_'+str(i),
python_callable=my_sleeping_function,
op_kwargs={'random_base': float(i)/10},
dag=dag)
task.set_upstream(run_this)
示例9: DAG
dag = DAG(dag_id='dsd_code_enforcement',
default_args=args,
start_date=start_date,
schedule_interval=schedule['dsd_code_enforcement'])
#: Latest Only Operator for dsd code enforcement
dsd_ce_latest_only = LatestOnlyOperator(
task_id='dsd_code_enf_latest_only', dag=dag)
#: Download code enforcement files and unzip them.
get_code_enf_files = PythonOperator(
task_id='get_code_enf_files',
python_callable=dfg.get_files,
op_kwargs={'fname_list': fname_list,
'target_dir': dsd_temp_dir},
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Update portal modified date
update_code_enf_md = get_seaboard_update_dag('code-enforcement-violations.md', dag)
#: Execution rules
#: dsd_code_enf_latest_only must run before get_code_enf_files
get_code_enf_files.set_upstream(dsd_ce_latest_only)
for i in fname_list:
#: Create fme shell command
示例10: print
print("annotated!")
def test_volume_mount():
with open('/foo/volume_mount_test.txt', 'w') as foo:
foo.write('Hello')
rc = os.system("cat /foo/volume_mount_test.txt")
assert rc == 0
# You can use annotations on your kubernetes pods!
start_task = PythonOperator(
task_id="start_task", python_callable=print_stuff, dag=dag,
executor_config={
"KubernetesExecutor": {
"annotations": {"test": "annotation"}
}
}
)
# You can mount volume or secret to the worker pod
second_task = PythonOperator(
task_id="four_task", python_callable=test_volume_mount, dag=dag,
executor_config={
"KubernetesExecutor": {
"volumes": [
{
"name": "test-volume",
"hostPath": {"path": "/tmp/"},
},
],
示例11: PythonOperator
return None # load new data to mongodb
load_new_data_task = PythonOperator(
task_id='load_new_data',
python_callable=load_new_data,
dag=dag)
def extract_type(ds, **kwargs):
year, month, day = ds.split('-') # 2016-04-22
c_ds = "%s/%s/%s" % (day, month, year) # 15/12/2014
count = 0
tp = kwargs['tp']
keyword = kwargs['keyword']
for andamento in Andamentos.objects(data=c_ds):
texto_lw = andamento.texto.lower()
if keyword in texto_lw:
andamento.tipo = tp
andamento.save()
count += 1
return count
for tp in PROGRESS_TYPES:
extract_tipo_task = PythonOperator(
task_id='extract_%s_task' % (tp,),
python_callable=extract_type, op_kwargs={'tp': tp, 'keyword': PROGRESS_TYPES[tp]},
dag=dag, provide_context=True)
extract_tipo_task.set_upstream(load_new_data_task)
示例12: DAG
schedule = general.schedule['fd_incidents']
start_date = general.start_date['fd_incidents']
cur_yr = general.get_year()
#: Dag spec
dag = DAG(dag_id='fd_problem_nature', default_args=args, start_date=start_date, schedule_interval=schedule)
#: Latest Only Operator for fd
fd_latest_only = LatestOnlyOperator(task_id='fd_latest_only', dag=dag)
#: Get fire_department data from DB
get_fd_data = PythonOperator(
task_id='get_fd_data',
python_callable=get_fd_data,
provide_context=True,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Upload prod fire_department_SD.csv file to S3
upload_fd_data = S3FileTransferOperator(
task_id='upload_fd_data',
source_base_path=conf['prod_data_dir'],
source_key='/fd_problems_{}_datasd.csv'.format(cur_yr),
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='fd_cad/' + 'fd_problems_{}_datasd.csv'.format(cur_yr),
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
示例13: DAG
schedule = general.schedule
start_date = general.start_date['pd_col']
dag = DAG(
dag_id='pd_col', default_args=args, start_date=start_date, schedule_interval=schedule['pd_col'])
#: Latest Only Operator for pd_col
pd_col_latest_only = LatestOnlyOperator(
task_id='pd_col_latest_only', dag=dag)
#: Get collisions data from FTP and save to temp folder
get_collisions_data = PythonOperator(
task_id='get_collisions_data',
python_callable=get_collisions_data,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Process collisions data and save result to prod folder
process_collisions_data = PythonOperator(
task_id='process_collisions_data',
python_callable=process_collisions_data,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Upload prod file to S3
collisions_to_S3 = S3FileTransferOperator(
示例14: DAG
# All times in Airflow UTC. Set Start Time in PST?
args = general.args
conf = general.config
schedule = general.schedule['public_art']
start_date = general.start_date['public_art']
#: Dag spec
dag = DAG(dag_id='public_art', default_args=args, start_date=start_date, schedule_interval=schedule)
public_art_latest_only = LatestOnlyOperator(task_id='public_art_latest_only', dag=dag)
#: Get public art from NetX, process, output prod file
get_public_art = PythonOperator(
task_id='get_public_art',
python_callable=get_public_art,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
#: Upload prod art file to S3
upload_public_art = S3FileTransferOperator(
task_id='upload_public_art',
source_base_path=conf['prod_data_dir'],
source_key='public_art_locations_datasd.csv',
dest_s3_conn_id=conf['default_s3_conn_id'],
dest_s3_bucket=conf['dest_s3_bucket'],
dest_s3_key='public_art/public_art_locations_datasd.csv',
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
示例15: LatestOnlyOperator
start_date=start_date,
schedule_interval=general.schedule['indicator_bacteria_tests'])
#: Latest Only Operator for traffic_counts
wtr_latest_only = LatestOnlyOperator(task_id='water_latest_only', dag=dag)
# TODO - teach me how to be yearly
# Pull out all indicator bac tests.
get_indicator_bac_tests = PythonOperator(
task_id='get_indicator_bac_tests',
python_callable=get_indicator_bacteria_tests,
op_kwargs={
'date_start': '01-JUN-2014',
'date_end': (datetime.now() + timedelta(days=5)).strftime('%d-%b-%Y')
},
provide_context=True,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)
# Get last bacteria tests for any given point.
get_latest_bac_tests = PythonOperator(
task_id='get_latest_bac_tests',
python_callable=get_latest_bac_tests,
on_failure_callback=notify,
on_retry_callback=notify,
on_success_callback=notify,
dag=dag)