本文整理汇总了Python中airflow.models.DagBag类的典型用法代码示例。如果您正苦于以下问题:Python DagBag类的具体用法?Python DagBag怎么用?Python DagBag使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DagBag类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: trigger_dag
def trigger_dag(dag_id, run_id=None, conf=None, execution_date=None):
dagbag = DagBag()
if dag_id not in dagbag.dags:
raise AirflowException("Dag id {} not found".format(dag_id))
dag = dagbag.get_dag(dag_id)
if not execution_date:
execution_date = datetime.now()
if not run_id:
run_id = "manual__{0}".format(execution_date.isoformat())
dr = DagRun.find(dag_id=dag_id, run_id=run_id)
if dr:
raise AirflowException("Run id {} already exists for dag id {}".format(
run_id,
dag_id
))
run_conf = None
if conf:
run_conf = json.loads(conf)
trigger = dag.create_dagrun(
run_id=run_id,
execution_date=execution_date,
state=State.RUNNING,
conf=run_conf,
external_trigger=True
)
return trigger
示例2: downgrade
def downgrade():
engine = settings.engine
if engine.dialect.has_table(engine, 'task_instance'):
connection = op.get_bind()
sessionmaker = sa.orm.sessionmaker()
session = sessionmaker(bind=connection)
dagbag = DagBag(settings.DAGS_FOLDER)
query = session.query(sa.func.count(TaskInstance.max_tries)).filter(
TaskInstance.max_tries != -1
)
while query.scalar():
tis = session.query(TaskInstance).filter(
TaskInstance.max_tries != -1
).limit(BATCH_SIZE).all()
for ti in tis:
dag = dagbag.get_dag(ti.dag_id)
if not dag or not dag.has_task(ti.task_id):
ti.try_number = 0
else:
task = dag.get_task(ti.task_id)
# max_tries - try_number is number of times a task instance
# left to retry by itself. So the current try_number should be
# max number of self retry (task.retries) minus number of
# times left for task instance to try the task.
ti.try_number = max(0, task.retries - (ti.max_tries -
ti.try_number))
ti.max_tries = -1
session.merge(ti)
session.commit()
session.commit()
op.drop_column('task_instance', 'max_tries')
示例3: test_dag_with_system_exit
def test_dag_with_system_exit(self):
"""
Test to check that a DAG with a system.exit() doesn't break the scheduler.
"""
dag_id = 'exit_test_dag'
dag_ids = [dag_id]
dag_directory = os.path.join(models.DAGS_FOLDER,
"..",
"dags_with_system_exit")
dag_file = os.path.join(dag_directory,
'b_test_scheduler_dags.py')
dagbag = DagBag(dag_folder=dag_file)
for dag_id in dag_ids:
dag = dagbag.get_dag(dag_id)
dag.clear()
scheduler = SchedulerJob(dag_ids=dag_ids,
subdir= dag_directory,
num_runs=1,
**self.default_scheduler_args)
scheduler.run()
session = settings.Session()
self.assertEqual(
len(session.query(TI).filter(TI.dag_id == dag_id).all()), 1)
示例4: get_task_instance
def get_task_instance(dag_id, task_id, execution_date):
"""Return the task object identified by the given dag_id and task_id."""
dagbag = DagBag()
# Check DAG exists.
if dag_id not in dagbag.dags:
error_message = "Dag id {} not found".format(dag_id)
raise AirflowException(error_message)
# Get DAG object and check Task Exists
dag = dagbag.get_dag(dag_id)
if not dag.has_task(task_id):
error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
raise AirflowException(error_message)
# Get DagRun object and check that it exists
dagrun = dag.get_dagrun(execution_date=execution_date)
if not dagrun:
error_message = ('Dag Run for date {} not found in dag {}'
.format(execution_date, dag_id))
raise AirflowException(error_message)
# Get task instance object and check that it exists
task_instance = dagrun.get_task_instance(task_id)
if not task_instance:
error_message = ('Task {} instance for date {} not found'
.format(task_id, execution_date))
raise AirflowException(error_message)
return task_instance
示例5: poke
def poke(self, context, session=None):
if self.execution_delta:
dttm = context['execution_date'] - self.execution_delta
elif self.execution_date_fn:
dttm = self.execution_date_fn(context['execution_date'])
else:
dttm = context['execution_date']
dttm_filter = dttm if isinstance(dttm, list) else [dttm]
serialized_dttm_filter = ','.join(
[datetime.isoformat() for datetime in dttm_filter])
self.log.info(
'Poking for '
'{self.external_dag_id}.'
'{self.external_task_id} on '
'{} ... '.format(serialized_dttm_filter, **locals()))
DM = DagModel
TI = TaskInstance
DR = DagRun
# we only do the check for 1st time, no need for subsequent poke
if self.check_existence and not self.has_checked_existence:
dag_to_wait = session.query(DM).filter(
DM.dag_id == self.external_dag_id
).first()
if not dag_to_wait:
raise AirflowException('The external DAG '
'{} does not exist.'.format(self.external_dag_id))
else:
if not os.path.exists(dag_to_wait.fileloc):
raise AirflowException('The external DAG '
'{} was deleted.'.format(self.external_dag_id))
if self.external_task_id:
refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id)
if not refreshed_dag_info.has_task(self.external_task_id):
raise AirflowException('The external task'
'{} in DAG {} does not exist.'.format(self.external_task_id,
self.external_dag_id))
self.has_checked_existence = True
if self.external_task_id:
count = session.query(TI).filter(
TI.dag_id == self.external_dag_id,
TI.task_id == self.external_task_id,
TI.state.in_(self.allowed_states),
TI.execution_date.in_(dttm_filter),
).count()
else:
count = session.query(DR).filter(
DR.dag_id == self.external_dag_id,
DR.state.in_(self.allowed_states),
DR.execution_date.in_(dttm_filter),
).count()
session.commit()
return count == len(dttm_filter)
示例6: list_dags
def list_dags(args):
dagbag = DagBag(process_subdir(args.subdir))
s = textwrap.dedent("""\n
-------------------------------------------------------------------
DAGS
-------------------------------------------------------------------
{dag_list}
""")
dag_list = "\n".join(sorted(dagbag.dags))
print(s.format(dag_list=dag_list))
if args.report:
print(dagbag.dagbag_report())
示例7: test_subdag_deadlock
def test_subdag_deadlock(self):
dagbag = DagBag()
dag = dagbag.get_dag('test_subdag_deadlock')
dag.clear()
subdag = dagbag.get_dag('test_subdag_deadlock.subdag')
subdag.clear()
# first make sure subdag has failed
self.assertRaises(AirflowException, subdag.run, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
# now make sure dag picks up the subdag error
self.assertRaises(AirflowException, dag.run, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
示例8: test_scheduler_reschedule
def test_scheduler_reschedule(self):
"""
Checks if tasks that are not taken up by the executor
get rescheduled
"""
executor = TestExecutor()
dagbag = DagBag(executor=executor)
dagbag.dags.clear()
dagbag.executor = executor
dag = DAG(
dag_id='test_scheduler_reschedule',
start_date=DEFAULT_DATE)
dag_task1 = DummyOperator(
task_id='dummy',
dag=dag,
owner='airflow')
dag.clear()
dag.is_subdag = False
session = settings.Session()
orm_dag = DagModel(dag_id=dag.dag_id)
orm_dag.is_paused = False
session.merge(orm_dag)
session.commit()
dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag)
@mock.patch('airflow.models.DagBag', return_value=dagbag)
@mock.patch('airflow.models.DagBag.collect_dags')
def do_schedule(function, function2):
# Use a empty file since the above mock will return the
# expected DAGs. Also specify only a single file so that it doesn't
# try to schedule the above DAG repeatedly.
scheduler = SchedulerJob(num_runs=1,
executor=executor,
subdir=os.path.join(models.DAGS_FOLDER,
"no_dags.py"))
scheduler.heartrate = 0
scheduler.run()
do_schedule()
self.assertEquals(1, len(executor.queued_tasks))
executor.queued_tasks.clear()
do_schedule()
self.assertEquals(2, len(executor.queued_tasks))
示例9: get_task
def get_task(dag_id, task_id):
"""Return the task object identified by the given dag_id and task_id."""
dagbag = DagBag()
# Check DAG exists.
if dag_id not in dagbag.dags:
error_message = "Dag id {} not found".format(dag_id)
raise AirflowException(error_message)
# Get DAG object and check Task Exists
dag = dagbag.get_dag(dag_id)
if not dag.has_task(task_id):
error_message = 'Task {} not found in dag {}'.format(task_id, dag_id)
raise AirflowException(error_message)
# Return the task.
return dag.get_task(task_id)
示例10: execute
def execute(self, context):
dro = DagRunOrder(run_id='trig__' + timezone.utcnow().isoformat())
dro = self.python_callable(context, dro)
if dro:
with create_session() as session:
dbag = DagBag(settings.DAGS_FOLDER)
trigger_dag = dbag.get_dag(self.trigger_dag_id)
dr = trigger_dag.create_dagrun(
run_id=dro.run_id,
state=State.RUNNING,
conf=dro.payload,
external_trigger=True)
self.log.info("Creating DagRun %s", dr)
session.add(dr)
session.commit()
else:
self.log.info("Criteria not met, moving on")
示例11: upgrade
def upgrade():
op.add_column('task_instance', sa.Column('max_tries', sa.Integer,
server_default="-1"))
# Check if table task_instance exist before data migration. This check is
# needed for database that does not create table until migration finishes.
# Checking task_instance table exists prevent the error of querying
# non-existing task_instance table.
connection = op.get_bind()
inspector = Inspector.from_engine(connection)
tables = inspector.get_table_names()
if 'task_instance' in tables:
# Get current session
sessionmaker = sa.orm.sessionmaker()
session = sessionmaker(bind=connection)
dagbag = DagBag(settings.DAGS_FOLDER)
query = session.query(sa.func.count(TaskInstance.max_tries)).filter(
TaskInstance.max_tries == -1
)
# Separate db query in batch to prevent loading entire table
# into memory and cause out of memory error.
while query.scalar():
tis = session.query(TaskInstance).filter(
TaskInstance.max_tries == -1
).limit(BATCH_SIZE).all()
for ti in tis:
dag = dagbag.get_dag(ti.dag_id)
if not dag or not dag.has_task(ti.task_id):
# task_instance table might not have the up-to-date
# information, i.e dag or task might be modified or
# deleted in dagbag but is reflected in task instance
# table. In this case we do not retry the task that can't
# be parsed.
ti.max_tries = ti.try_number
else:
task = dag.get_task(ti.task_id)
if task.retries:
ti.max_tries = task.retries
else:
ti.max_tries = ti.try_number
session.merge(ti)
session.commit()
# Commit the current session.
session.commit()
开发者ID:7digital,项目名称:incubator-airflow,代码行数:45,代码来源:cc1e65623dc7_add_max_tries_column_to_task_instance.py
示例12: execute
def execute(self, context):
dro = DagRunOrder(run_id='trig__' + datetime.now().isoformat())
dro = self.python_callable(context, dro)
if dro:
session = settings.Session()
dbag = DagBag(os.path.expanduser(conf.get('core', 'DAGS_FOLDER')))
trigger_dag = dbag.get_dag(self.trigger_dag_id)
dr = trigger_dag.create_dagrun(
run_id=dro.run_id,
state=State.RUNNING,
conf=dro.payload,
external_trigger=True)
logging.info("Creating DagRun {}".format(dr))
session.add(dr)
session.commit()
session.close()
else:
logging.info("Criteria not met, moving on")
示例13: test_scheduler_reschedule
def test_scheduler_reschedule(self):
"""
Checks if tasks that are not taken up by the executor
get rescheduled
"""
executor = TestExecutor()
dagbag = DagBag(executor=executor)
dagbag.dags.clear()
dagbag.executor = executor
dag = DAG(
dag_id='test_scheduler_reschedule',
start_date=DEFAULT_DATE)
dag_task1 = DummyOperator(
task_id='dummy',
dag=dag,
owner='airflow')
dag.clear()
dag.is_subdag = False
session = settings.Session()
orm_dag = DagModel(dag_id=dag.dag_id)
orm_dag.is_paused = False
session.merge(orm_dag)
session.commit()
dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag)
@mock.patch('airflow.models.DagBag', return_value=dagbag)
@mock.patch('airflow.models.DagBag.collect_dags')
def do_schedule(function, function2):
scheduler = SchedulerJob(num_runs=1, executor=executor,)
scheduler.heartrate = 0
scheduler.run()
do_schedule()
self.assertEquals(1, len(executor.queued_tasks))
executor.queued_tasks.clear()
do_schedule()
self.assertEquals(2, len(executor.queued_tasks))
示例14: test_trigger_dag_for_date
def test_trigger_dag_for_date(self):
url_template = '/api/experimental/dags/{}/dag_runs'
dag_id = 'example_bash_operator'
hour_from_now = utcnow() + timedelta(hours=1)
execution_date = datetime(hour_from_now.year,
hour_from_now.month,
hour_from_now.day,
hour_from_now.hour)
datetime_string = execution_date.isoformat()
# Test Correct execution
response = self.client.post(
url_template.format(dag_id),
data=json.dumps({'execution_date': datetime_string}),
content_type="application/json"
)
self.assertEqual(200, response.status_code)
dagbag = DagBag()
dag = dagbag.get_dag(dag_id)
dag_run = dag.get_dagrun(execution_date)
self.assertTrue(dag_run,
'Dag Run not found for execution date {}'
.format(execution_date))
# Test error for nonexistent dag
response = self.client.post(
url_template.format('does_not_exist_dag'),
data=json.dumps({'execution_date': execution_date.isoformat()}),
content_type="application/json"
)
self.assertEqual(404, response.status_code)
# Test error for bad datetime format
response = self.client.post(
url_template.format(dag_id),
data=json.dumps({'execution_date': 'not_a_datetime'}),
content_type="application/json"
)
self.assertEqual(400, response.status_code)
示例15: test_find_zombies
def test_find_zombies(self):
manager = DagFileProcessorManager(
dag_directory='directory',
file_paths=['abc.txt'],
max_runs=1,
processor_factory=MagicMock().return_value,
signal_conn=MagicMock(),
stat_queue=MagicMock(),
result_queue=MagicMock,
async_mode=True)
dagbag = DagBag(TEST_DAG_FOLDER)
with create_session() as session:
session.query(LJ).delete()
dag = dagbag.get_dag('example_branch_operator')
task = dag.get_task(task_id='run_this_first')
ti = TI(task, DEFAULT_DATE, State.RUNNING)
lj = LJ(ti)
lj.state = State.SHUTDOWN
lj.id = 1
ti.job_id = lj.id
session.add(lj)
session.add(ti)
session.commit()
manager._last_zombie_query_time = timezone.utcnow() - timedelta(
seconds=manager._zombie_threshold_secs + 1)
zombies = manager._find_zombies()
self.assertEqual(1, len(zombies))
self.assertIsInstance(zombies[0], SimpleTaskInstance)
self.assertEqual(ti.dag_id, zombies[0].dag_id)
self.assertEqual(ti.task_id, zombies[0].task_id)
self.assertEqual(ti.execution_date, zombies[0].execution_date)
session.query(TI).delete()
session.query(LJ).delete()