当前位置: 首页>>代码示例>>Python>>正文


Python python_operator.PythonOperator类代码示例

本文整理汇总了Python中airflow.operators.python_operator.PythonOperator的典型用法代码示例。如果您正苦于以下问题:Python PythonOperator类的具体用法?Python PythonOperator怎么用?Python PythonOperator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了PythonOperator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_python_callable_keyword_arguments_are_templatized

    def test_python_callable_keyword_arguments_are_templatized(self):
        """Test PythonOperator op_kwargs are templatized"""
        recorded_calls = []

        task = PythonOperator(
            task_id='python_operator',
            # a Mock instance cannot be used as a callable function or test fails with a
            # TypeError: Object of type Mock is not JSON serializable
            python_callable=(build_recording_function(recorded_calls)),
            op_kwargs={
                'an_int': 4,
                'a_date': date(2019, 1, 1),
                'a_templated_string': "dag {{dag.dag_id}} ran on {{ds}}."
            },
            dag=self.dag)

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            state=State.RUNNING
        )
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        self.assertEqual(1, len(recorded_calls))
        self._assertCallsEqual(
            recorded_calls[0],
            Call(an_int=4,
                 a_date=date(2019, 1, 1),
                 a_templated_string="dag {} ran on {}.".format(
                     self.dag.dag_id, DEFAULT_DATE.date().isoformat()))
        )
开发者ID:apache,项目名称:incubator-airflow,代码行数:32,代码来源:test_python_operator.py

示例2: test_python_operator_run

 def test_python_operator_run(self):
     """Tests that the python callable is invoked on task run."""
     task = PythonOperator(
         python_callable=self.do_run,
         task_id='python_operator',
         dag=self.dag)
     self.assertFalse(self.is_run())
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
     self.assertTrue(self.is_run())
开发者ID:Nextdoor,项目名称:airflow,代码行数:9,代码来源:python_operator.py

示例3: test_echo_env_variables

    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        python callback in the task.
        """
        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=False,
        )

        t = PythonOperator(task_id='hive_in_python_op',
                           dag=self.dag,
                           python_callable=self._env_var_check_callback
                           )
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:18,代码来源:python_operator.py

示例4: DAG

#: Dag spec for dsd permits
dag = DAG(dag_id='dsd_permits',
          default_args=args,
          start_date=start_date,
          schedule_interval=schedule)

#: Latest Only Operator for dsd permits.
dsd_permits_latest_only = LatestOnlyOperator(
    task_id='dsd_permits_latest_only', dag=dag)

#: Get permits reports
get_permits_files = PythonOperator(
    task_id='get_permits_files',
    python_callable=get_permits_files,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Clean permits reports
clean_data = PythonOperator(
    task_id='clean_data',
    python_callable=clean_data,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Join BIDs to permits
join_bids = PythonOperator(
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:30,代码来源:permits_dags.py

示例5: direction

# output files generated by this task and naming convention
# is direction(from or to)_twitterHandle_date.csv
# --------------------------------------------------------------------------------

fetch_tweets = PythonOperator(
    task_id='fetch_tweets',
    python_callable=fetchtweets,
    dag=dag)

# --------------------------------------------------------------------------------
# Clean the eight files. In this step you can get rid of or cherry pick columns
# and different parts of the text
# --------------------------------------------------------------------------------

clean_tweets = PythonOperator(
    task_id='clean_tweets',
    python_callable=cleantweets,
    dag=dag)

clean_tweets.set_upstream(fetch_tweets)

# --------------------------------------------------------------------------------
# In this section you can use a script to analyze the twitter data. Could simply
# be a sentiment analysis through algorithms like bag of words or something more
# complicated. You can also take a look at Web Services to do such tasks
# --------------------------------------------------------------------------------

analyze_tweets = PythonOperator(
    task_id='analyze_tweets',
    python_callable=analyzetweets,
    dag=dag)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:31,代码来源:example_twitter_dag.py

示例6: datetime

DEFAULT_DATE = datetime(2016, 1, 1)
default_args = dict(
    start_date=DEFAULT_DATE,
    owner='airflow')


def fail():
    raise ValueError('Expected failure.')


def success(ti=None, *args, **kwargs):
    if ti.execution_date != DEFAULT_DATE + timedelta(days=1):
        fail()
    return


# DAG tests that tasks ignore all dependencies

dag1 = DAG(dag_id='test_run_ignores_all_dependencies', default_args=dict(depends_on_past=True, **default_args))
dag1_task1 = PythonOperator(
    task_id='test_run_dependency_task',
    python_callable=fail,
    dag=dag1,)
dag1_task2 = PythonOperator(
    task_id='test_run_dependent_task',
    python_callable=success,
    provide_context=True,
    dag=dag1,)
dag1_task1.set_downstream(dag1_task2)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:29,代码来源:test_cli_triggered_dags.py

示例7: BashOperator


#: Get CFS data from FTP and save to temp folder
get_cfs_data = BashOperator(
    task_id='get_cfs_data',
    bash_command=get_cfs_data(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Process CFS data and save result to prod folder
process_cfs_data = PythonOperator(
    task_id='process_cfs_data',
    python_callable=process_cfs_data,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Upload prod file to S3
cfs_to_S3 = S3FileTransferOperator(
    task_id='cfs_to_S3',
    source_base_path=conf['prod_data_dir'],
    source_key='pd_calls_for_service_'+curr_year+'_datasd.csv',
    dest_s3_bucket=conf['dest_s3_bucket'],
    dest_s3_conn_id=conf['default_s3_conn_id'],
    dest_s3_key='pd/pd_calls_for_service_'+curr_year+'_datasd.csv',
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:29,代码来源:pd_cfs_dags.py

示例8: my_sleeping_function


def my_sleeping_function(random_base):
    '''This is a function that will run within the DAG execution'''
    time.sleep(random_base)


def print_context(ds, **kwargs):
    pprint(kwargs)
    print(ds)
    return 'Whatever you return gets printed in the logs'

run_this = PythonOperator(
    task_id='print_the_context',
    provide_context=True,
    python_callable=print_context,
    dag=dag)

for i in range(10):
    '''
    Generating 10 sleeping task, sleeping from 0 to 9 seconds
    respectively
    '''
    task = PythonOperator(
        task_id='sleep_for_'+str(i),
        python_callable=my_sleeping_function,
        op_kwargs={'random_base': float(i)/10},
        dag=dag)

    task.set_upstream(run_this)
开发者ID:BillWangCS,项目名称:incubator-airflow,代码行数:28,代码来源:example_python_operator.py

示例9: DAG

dag = DAG(dag_id='dsd_code_enforcement',
          default_args=args,
          start_date=start_date,
          schedule_interval=schedule['dsd_code_enforcement'])

#: Latest Only Operator for dsd code enforcement
dsd_ce_latest_only = LatestOnlyOperator(
    task_id='dsd_code_enf_latest_only', dag=dag)


#: Download code enforcement files and unzip them.
get_code_enf_files = PythonOperator(
    task_id='get_code_enf_files',
    python_callable=dfg.get_files,
    op_kwargs={'fname_list': fname_list,
               'target_dir': dsd_temp_dir},
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Update portal modified date
update_code_enf_md = get_seaboard_update_dag('code-enforcement-violations.md', dag)

#: Execution rules
#: dsd_code_enf_latest_only must run before get_code_enf_files
get_code_enf_files.set_upstream(dsd_ce_latest_only)


for i in fname_list:
    #: Create fme shell command
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:31,代码来源:dsd_dags.py

示例10: print

    print("annotated!")


def test_volume_mount():
    with open('/foo/volume_mount_test.txt', 'w') as foo:
        foo.write('Hello')

    rc = os.system("cat /foo/volume_mount_test.txt")
    assert rc == 0


# You can use annotations on your kubernetes pods!
start_task = PythonOperator(
    task_id="start_task", python_callable=print_stuff, dag=dag,
    executor_config={
        "KubernetesExecutor": {
            "annotations": {"test": "annotation"}
        }
    }
)

# You can mount volume or secret to the worker pod
second_task = PythonOperator(
    task_id="four_task", python_callable=test_volume_mount, dag=dag,
    executor_config={
        "KubernetesExecutor": {
            "volumes": [
                {
                    "name": "test-volume",
                    "hostPath": {"path": "/tmp/"},
                },
            ],
开发者ID:MiguelPeralvo,项目名称:incubator-airflow,代码行数:32,代码来源:example_kubernetes_executor_config.py

示例11: PythonOperator

    return None  # load new data to mongodb


load_new_data_task = PythonOperator(
    task_id='load_new_data',
    python_callable=load_new_data,
    dag=dag)


def extract_type(ds, **kwargs):
    year, month, day = ds.split('-')  # 2016-04-22
    c_ds = "%s/%s/%s" % (day, month, year)  # 15/12/2014
    count = 0
    tp = kwargs['tp']
    keyword = kwargs['keyword']
    for andamento in Andamentos.objects(data=c_ds):
        texto_lw = andamento.texto.lower()
        if keyword in texto_lw:
            andamento.tipo = tp
            andamento.save()
            count += 1
    return count


for tp in PROGRESS_TYPES:
    extract_tipo_task = PythonOperator(
        task_id='extract_%s_task' % (tp,),
        python_callable=extract_type, op_kwargs={'tp': tp, 'keyword': PROGRESS_TYPES[tp]},
        dag=dag, provide_context=True)
    extract_tipo_task.set_upstream(load_new_data_task)
开发者ID:intelivix,项目名称:etl-example,代码行数:30,代码来源:update_progress.py

示例12: DAG

schedule = general.schedule['fd_incidents']
start_date = general.start_date['fd_incidents']
cur_yr = general.get_year()

#: Dag spec
dag = DAG(dag_id='fd_problem_nature', default_args=args, start_date=start_date, schedule_interval=schedule)

#: Latest Only Operator for fd
fd_latest_only = LatestOnlyOperator(task_id='fd_latest_only', dag=dag)


#: Get fire_department data from DB
get_fd_data = PythonOperator(
    task_id='get_fd_data',
    python_callable=get_fd_data,
    provide_context=True,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Upload prod fire_department_SD.csv file to S3
upload_fd_data = S3FileTransferOperator(
    task_id='upload_fd_data',
    source_base_path=conf['prod_data_dir'],
    source_key='/fd_problems_{}_datasd.csv'.format(cur_yr),
    dest_s3_conn_id=conf['default_s3_conn_id'],
    dest_s3_bucket=conf['dest_s3_bucket'],
    dest_s3_key='fd_cad/' + 'fd_problems_{}_datasd.csv'.format(cur_yr),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:32,代码来源:fd_pn_dags.py

示例13: DAG

schedule = general.schedule
start_date = general.start_date['pd_col']

dag = DAG(
    dag_id='pd_col', default_args=args, start_date=start_date, schedule_interval=schedule['pd_col'])


#: Latest Only Operator for pd_col
pd_col_latest_only = LatestOnlyOperator(
    task_id='pd_col_latest_only', dag=dag)

#: Get collisions data from FTP and save to temp folder
get_collisions_data = PythonOperator(
    task_id='get_collisions_data',
    python_callable=get_collisions_data,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Process collisions data and save result to prod folder
process_collisions_data = PythonOperator(
    task_id='process_collisions_data',
    python_callable=process_collisions_data,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Upload prod file to S3
collisions_to_S3 = S3FileTransferOperator(
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:31,代码来源:pd_col_dags.py

示例14: DAG

# All times in Airflow UTC.  Set Start Time in PST?
args = general.args
conf = general.config
schedule = general.schedule['public_art']
start_date = general.start_date['public_art']

#: Dag spec
dag = DAG(dag_id='public_art', default_args=args, start_date=start_date, schedule_interval=schedule)

public_art_latest_only = LatestOnlyOperator(task_id='public_art_latest_only', dag=dag)

#: Get public art from NetX, process, output prod file
get_public_art = PythonOperator(
    task_id='get_public_art',
    python_callable=get_public_art,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Upload prod art file to S3
upload_public_art = S3FileTransferOperator(
    task_id='upload_public_art',
    source_base_path=conf['prod_data_dir'],
    source_key='public_art_locations_datasd.csv',
    dest_s3_conn_id=conf['default_s3_conn_id'],
    dest_s3_bucket=conf['dest_s3_bucket'],
    dest_s3_key='public_art/public_art_locations_datasd.csv',
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:31,代码来源:public_art_dags.py

示例15: LatestOnlyOperator

    start_date=start_date,
    schedule_interval=general.schedule['indicator_bacteria_tests'])


#: Latest Only Operator for traffic_counts
wtr_latest_only = LatestOnlyOperator(task_id='water_latest_only', dag=dag)


# TODO - teach me how to be yearly
# Pull out all indicator bac tests.
get_indicator_bac_tests = PythonOperator(
    task_id='get_indicator_bac_tests',
    python_callable=get_indicator_bacteria_tests,
    op_kwargs={
        'date_start': '01-JUN-2014',
        'date_end': (datetime.now() + timedelta(days=5)).strftime('%d-%b-%Y')
    },
    provide_context=True,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

# Get last bacteria tests for any given point.
get_latest_bac_tests = PythonOperator(
    task_id='get_latest_bac_tests',
    python_callable=get_latest_bac_tests,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)
开发者ID:MrMaksimize,项目名称:docker-airflow,代码行数:31,代码来源:water_dags.py


注:本文中的airflow.operators.python_operator.PythonOperator类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。