本文整理汇总了Python中airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook类的典型用法代码示例。如果您正苦于以下问题:Python DataFlowHook类的具体用法?Python DataFlowHook怎么用?Python DataFlowHook使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataFlowHook类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: execute
def execute(self, context):
hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to)
dataflow_options = copy.copy(self.dataflow_default_options)
dataflow_options.update(self.options)
hook.start_java_dataflow(self.task_id, dataflow_options, self.jar)
示例2: execute
def execute(self, context):
hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id,
delegate_to=self.delegate_to,
poll_sleep=self.poll_sleep)
hook.start_template_dataflow(self.task_id, self.dataflow_default_options,
self.parameters, self.template)
示例3: execute
def execute(self, context):
bucket_helper = GoogleCloudBucketHelper(
self.gcp_conn_id, self.delegate_to)
self.jar = bucket_helper.google_cloud_to_local(self.jar)
hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id,
delegate_to=self.delegate_to)
dataflow_options = copy.copy(self.dataflow_default_options)
dataflow_options.update(self.options)
hook.start_java_dataflow(self.task_id, dataflow_options, self.jar)
示例4: DataFlowTemplateHookTest
class DataFlowTemplateHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('DataFlowHook._start_template_dataflow'))
def test_start_template_dataflow(self, internal_dataflow_mock):
self.dataflow_hook.start_template_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_TEMPLATE, parameters=PARAMETERS,
dataflow_template=TEMPLATE)
internal_dataflow_mock.assert_called_once_with(
mock.ANY, DATAFLOW_OPTIONS_TEMPLATE, PARAMETERS, TEMPLATE)
示例5: DataFlowHookTest
class DataFlowHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('DataFlowHook._start_dataflow'))
def test_start_python_dataflow(self, internal_dataflow_mock):
self.dataflow_hook.start_python_dataflow(
task_id=TASK_ID, variables=OPTIONS,
dataflow=PY_FILE, py_options=PY_OPTIONS)
internal_dataflow_mock.assert_called_once_with(
TASK_ID, OPTIONS, PY_FILE, mock.ANY, ['python'] + PY_OPTIONS)
示例6: DataFlowTemplateHookTest
class DataFlowTemplateHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('DataFlowHook._start_template_dataflow'))
def test_start_template_dataflow(self, internal_dataflow_mock):
self.dataflow_hook.start_template_dataflow(
job_name=JOB_NAME, variables=DATAFLOW_OPTIONS_TEMPLATE, parameters=PARAMETERS,
dataflow_template=TEMPLATE)
options_with_region = {'region': 'us-central1'}
options_with_region.update(DATAFLOW_OPTIONS_TEMPLATE)
internal_dataflow_mock.assert_called_once_with(
mock.ANY, options_with_region, PARAMETERS, TEMPLATE)
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_template_dataflow_with_runtime_env(self, mock_conn, mock_dataflowjob):
dataflow_options_template = copy.deepcopy(DATAFLOW_OPTIONS_TEMPLATE)
options_with_runtime_env = copy.deepcopy(RUNTIME_ENV)
options_with_runtime_env.update(dataflow_options_template)
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
method = (mock_conn.return_value
.projects.return_value
.locations.return_value
.templates.return_value
.launch)
self.dataflow_hook.start_template_dataflow(
job_name=JOB_NAME,
variables=options_with_runtime_env,
parameters=PARAMETERS,
dataflow_template=TEMPLATE
)
body = {"jobName": mock.ANY,
"parameters": PARAMETERS,
"environment": RUNTIME_ENV
}
method.assert_called_once_with(
projectId=options_with_runtime_env['project'],
location='us-central1',
gcsPath=TEMPLATE,
body=body,
)
示例7: DataFlowTemplateHookTest
class DataFlowTemplateHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('DataFlowHook._start_template_dataflow'))
def test_start_template_dataflow(self, internal_dataflow_mock):
self.dataflow_hook.start_template_dataflow(
job_name=JOB_NAME, variables=DATAFLOW_OPTIONS_TEMPLATE, parameters=PARAMETERS,
dataflow_template=TEMPLATE)
options_with_region = {'region': 'us-central1'}
options_with_region.update(DATAFLOW_OPTIONS_TEMPLATE)
internal_dataflow_mock.assert_called_once_with(
mock.ANY, options_with_region, PARAMETERS, TEMPLATE)
示例8: DataFlowHookTest
class DataFlowHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('DataFlowHook._start_dataflow'))
def test_start_python_dataflow(self, internal_dataflow_mock):
self.dataflow_hook.start_python_dataflow(
task_id=TASK_ID, variables=OPTIONS,
dataflow=PY_FILE, py_options=PY_OPTIONS)
internal_dataflow_mock.assert_called_once_with(
TASK_ID, OPTIONS, PY_FILE, mock.ANY, ['python'] + PY_OPTIONS)
@mock.patch('airflow.contrib.hooks.gcp_dataflow_hook._Dataflow.log')
@mock.patch('subprocess.Popen')
@mock.patch('select.select')
def test_dataflow_wait_for_done_logging(self, mock_select, mock_popen, mock_logging):
mock_logging.info = MagicMock()
mock_logging.warning = MagicMock()
mock_proc = MagicMock()
mock_proc.stderr = MagicMock()
mock_proc.stderr.readlines = MagicMock(return_value=['test\n','error\n'])
mock_stderr_fd = MagicMock()
mock_proc.stderr.fileno = MagicMock(return_value=mock_stderr_fd)
mock_proc_poll = MagicMock()
mock_select.return_value = [[mock_stderr_fd]]
def poll_resp_error():
mock_proc.return_code = 1
return True
mock_proc_poll.side_effect=[None, poll_resp_error]
mock_proc.poll = mock_proc_poll
mock_popen.return_value = mock_proc
dataflow = _Dataflow(['test', 'cmd'])
mock_logging.info.assert_called_with('Running command: %s', 'test cmd')
self.assertRaises(Exception, dataflow.wait_for_done)
mock_logging.warning.assert_has_calls([call('test'), call('error')])
示例9: setUp
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
示例10: DataFlowHookTest
class DataFlowHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_python_dataflow(self, mock_conn,
mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_python_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_PY,
dataflow=PY_FILE, py_options=PY_OPTIONS)
EXPECTED_CMD = ['python', '-m', PY_FILE,
'--region=us-central1',
'--runner=DataflowRunner', '--project=test',
'--labels=foo=bar',
'--staging_location=gs://test/staging',
'--job_name={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_java_dataflow(self, mock_conn,
mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_java_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_JAVA,
dataflow=JAR_FILE)
EXPECTED_CMD = ['java', '-jar', JAR_FILE,
'--region=us-central1',
'--runner=DataflowRunner', '--project=test',
'--stagingLocation=gs://test/staging',
'--labels={"foo":"bar"}',
'--jobName={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_java_dataflow_with_job_class(
self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_java_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_JAVA,
dataflow=JAR_FILE, job_class=JOB_CLASS)
EXPECTED_CMD = ['java', '-cp', JAR_FILE, JOB_CLASS,
'--region=us-central1',
'--runner=DataflowRunner', '--project=test',
'--stagingLocation=gs://test/staging',
'--labels={"foo":"bar"}',
'--jobName={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch('airflow.contrib.hooks.gcp_dataflow_hook._Dataflow.log')
@mock.patch('subprocess.Popen')
@mock.patch('select.select')
def test_dataflow_wait_for_done_logging(self, mock_select, mock_popen, mock_logging):
mock_logging.info = MagicMock()
mock_logging.warning = MagicMock()
mock_proc = MagicMock()
mock_proc.stderr = MagicMock()
mock_proc.stderr.readlines = MagicMock(return_value=['test\n','error\n'])
mock_stderr_fd = MagicMock()
mock_proc.stderr.fileno = MagicMock(return_value=mock_stderr_fd)
mock_proc_poll = MagicMock()
mock_select.return_value = [[mock_stderr_fd]]
def poll_resp_error():
mock_proc.return_code = 1
return True
mock_proc_poll.side_effect=[None, poll_resp_error]
mock_proc.poll = mock_proc_poll
mock_popen.return_value = mock_proc
dataflow = _Dataflow(['test', 'cmd'])
#.........这里部分代码省略.........
示例11: DataFlowHookTest
class DataFlowHookTest(unittest.TestCase):
def setUp(self):
with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__'),
new=mock_init):
self.dataflow_hook = DataFlowHook(gcp_conn_id='test')
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_python_dataflow(self, mock_conn,
mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_python_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_PY,
dataflow=PY_FILE, py_options=PY_OPTIONS)
EXPECTED_CMD = ['python', '-m', PY_FILE,
'--runner=DataflowRunner', '--project=test',
'--labels=foo=bar',
'--staging_location=gs://test/staging',
'--job_name={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_java_dataflow(self, mock_conn,
mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_java_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_JAVA,
dataflow=JAR_FILE)
EXPECTED_CMD = ['java', '-jar', JAR_FILE,
'--runner=DataflowRunner', '--project=test',
'--stagingLocation=gs://test/staging',
'--labels={"foo":"bar"}',
'--jobName={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch(DATAFLOW_STRING.format('uuid.uuid1'))
@mock.patch(DATAFLOW_STRING.format('_DataflowJob'))
@mock.patch(DATAFLOW_STRING.format('_Dataflow'))
@mock.patch(DATAFLOW_STRING.format('DataFlowHook.get_conn'))
def test_start_java_dataflow_with_job_class(
self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
mock_uuid.return_value = MOCK_UUID
mock_conn.return_value = None
dataflow_instance = mock_dataflow.return_value
dataflow_instance.wait_for_done.return_value = None
dataflowjob_instance = mock_dataflowjob.return_value
dataflowjob_instance.wait_for_done.return_value = None
self.dataflow_hook.start_java_dataflow(
task_id=TASK_ID, variables=DATAFLOW_OPTIONS_JAVA,
dataflow=JAR_FILE, job_class=JOB_CLASS)
EXPECTED_CMD = ['java', '-cp', JAR_FILE, JOB_CLASS,
'--runner=DataflowRunner', '--project=test',
'--stagingLocation=gs://test/staging',
'--labels={"foo":"bar"}',
'--jobName={}-{}'.format(TASK_ID, MOCK_UUID)]
self.assertListEqual(sorted(mock_dataflow.call_args[0][0]),
sorted(EXPECTED_CMD))
@mock.patch('airflow.contrib.hooks.gcp_dataflow_hook._Dataflow.log')
@mock.patch('subprocess.Popen')
@mock.patch('select.select')
def test_dataflow_wait_for_done_logging(self, mock_select, mock_popen, mock_logging):
mock_logging.info = MagicMock()
mock_logging.warning = MagicMock()
mock_proc = MagicMock()
mock_proc.stderr = MagicMock()
mock_proc.stderr.readlines = MagicMock(return_value=['test\n','error\n'])
mock_stderr_fd = MagicMock()
mock_proc.stderr.fileno = MagicMock(return_value=mock_stderr_fd)
mock_proc_poll = MagicMock()
mock_select.return_value = [[mock_stderr_fd]]
def poll_resp_error():
mock_proc.return_code = 1
return True
mock_proc_poll.side_effect=[None, poll_resp_error]
mock_proc.poll = mock_proc_poll
mock_popen.return_value = mock_proc
dataflow = _Dataflow(['test', 'cmd'])
mock_logging.info.assert_called_with('Running command: %s', 'test cmd')
self.assertRaises(Exception, dataflow.wait_for_done)
mock_logging.warning.assert_has_calls([call('test'), call('error')])