本文整理汇总了Python中airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook._process_spark_submit_log方法的典型用法代码示例。如果您正苦于以下问题:Python SparkSubmitHook._process_spark_submit_log方法的具体用法?Python SparkSubmitHook._process_spark_submit_log怎么用?Python SparkSubmitHook._process_spark_submit_log使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook
的用法示例。
在下文中一共展示了SparkSubmitHook._process_spark_submit_log方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_yarn_process_on_kill
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_yarn_process_on_kill(self, mock_popen):
# Given
mock_popen.return_value.stdout = six.StringIO('stdout')
mock_popen.return_value.stderr = six.StringIO('stderr')
mock_popen.return_value.poll.return_value = None
mock_popen.return_value.wait.return_value = 0
log_lines = [
'SPARK_MAJOR_VERSION is set to 2, using Spark2',
'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' +
'platform... using builtin-java classes where applicable',
'WARN DomainSocketFactory: The short-circuit local reads feature cannot ' +
'be used because libhadoop cannot be loaded.',
'INFO Client: Requesting a new application from cluster with 10 ' +
'NodeManagerapplication_1486558679801_1820s',
'INFO Client: Submitting application application_1486558679801_1820 ' +
'to ResourceManager'
]
hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
hook._process_spark_submit_log(log_lines)
hook.submit()
# When
hook.on_kill()
# Then
self.assertIn(call(['yarn', 'application', '-kill',
'application_1486558679801_1820'],
stderr=-1, stdout=-1),
mock_popen.mock_calls)
示例2: test_process_spark_submit_log_k8s
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_process_spark_submit_log_k8s(self):
# Given
hook = SparkSubmitHook(conn_id='spark_k8s_cluster')
log_lines = [
'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' +
'spark-role -> driver' +
'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' +
'creation time: 2018-03-05T10:26:55Z' +
'service account name: spark' +
'volumes: spark-init-properties, download-jars-volume,' +
'download-files-volume, spark-token-2vmlm' +
'node name: N/A' +
'start time: N/A' +
'container images: N/A' +
'phase: Pending' +
'status: []' +
'2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' +
' new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'Exit code: 999'
]
# When
hook._process_spark_submit_log(log_lines)
# Then
self.assertEqual(hook._kubernetes_driver_pod,
'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver')
self.assertEqual(hook._spark_exit_code, 999)
示例3: test_process_spark_submit_log_standalone_cluster
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_process_spark_submit_log_standalone_cluster(self):
# Given
hook = SparkSubmitHook(conn_id='spark_standalone_cluster')
log_lines = [
'Running Spark using the REST application submission protocol.',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request '
'to launch an application in spark://spark-standalone-master:6066',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' +
'created as driver-20171128111415-0001. Polling submission state...'
]
# When
hook._process_spark_submit_log(log_lines)
# Then
self.assertEqual(hook._driver_id, 'driver-20171128111415-0001')
示例4: test_process_spark_submit_log_yarn
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_process_spark_submit_log_yarn(self):
# Given
hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
log_lines = [
'SPARK_MAJOR_VERSION is set to 2, using Spark2',
'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' +
'platform... using builtin-java classes where applicable',
'WARN DomainSocketFactory: The short-circuit local reads feature cannot '
'be used because libhadoop cannot be loaded.',
'INFO Client: Requesting a new application from cluster with 10 NodeManagers',
'INFO Client: Submitting application application_1486558679801_1820 ' +
'to ResourceManager'
]
# When
hook._process_spark_submit_log(log_lines)
# Then
self.assertEqual(hook._yarn_application_id, 'application_1486558679801_1820')
示例5: test_k8s_process_on_kill
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_k8s_process_on_kill(self, mock_popen, mock_client_method):
# Given
mock_popen.return_value.stdout = six.StringIO('stdout')
mock_popen.return_value.stderr = six.StringIO('stderr')
mock_popen.return_value.poll.return_value = None
mock_popen.return_value.wait.return_value = 0
client = mock_client_method.return_value
hook = SparkSubmitHook(conn_id='spark_k8s_cluster')
log_lines = [
'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' +
'spark-role -> driver' +
'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' +
'creation time: 2018-03-05T10:26:55Z' +
'service account name: spark' +
'volumes: spark-init-properties, download-jars-volume,' +
'download-files-volume, spark-token-2vmlm' +
'node name: N/A' +
'start time: N/A' +
'container images: N/A' +
'phase: Pending' +
'status: []' +
'2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' +
' new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'Exit code: 0'
]
hook._process_spark_submit_log(log_lines)
hook.submit()
# When
hook.on_kill()
# Then
import kubernetes
kwargs = {'pretty': True, 'body': kubernetes.client.V1DeleteOptions()}
client.delete_namespaced_pod.assert_called_once_with(
'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver',
'mynamespace', **kwargs)
示例6: test_standalone_cluster_process_on_kill
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _process_spark_submit_log [as 别名]
def test_standalone_cluster_process_on_kill(self):
# Given
log_lines = [
'Running Spark using the REST application submission protocol.',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request ' +
'to launch an application in spark://spark-standalone-master:6066',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' +
'created as driver-20171128111415-0001. Polling submission state...'
]
hook = SparkSubmitHook(conn_id='spark_standalone_cluster')
hook._process_spark_submit_log(log_lines)
# When
kill_cmd = hook._build_spark_driver_kill_command()
# Then
self.assertEqual(kill_cmd[0], '/path/to/spark_home/bin/spark-submit')
self.assertEqual(kill_cmd[1], '--master')
self.assertEqual(kill_cmd[2], 'spark://spark-standalone-master:6066')
self.assertEqual(kill_cmd[3], '--kill')
self.assertEqual(kill_cmd[4], 'driver-20171128111415-0001')