本文整理汇总了Python中airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook类的典型用法代码示例。如果您正苦于以下问题:Python SparkSubmitHook类的具体用法?Python SparkSubmitHook怎么用?Python SparkSubmitHook使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SparkSubmitHook类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_process_spark_submit_log_k8s
def test_process_spark_submit_log_k8s(self):
# Given
hook = SparkSubmitHook(conn_id='spark_k8s_cluster')
log_lines = [
'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' +
'spark-role -> driver' +
'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' +
'creation time: 2018-03-05T10:26:55Z' +
'service account name: spark' +
'volumes: spark-init-properties, download-jars-volume,' +
'download-files-volume, spark-token-2vmlm' +
'node name: N/A' +
'start time: N/A' +
'container images: N/A' +
'phase: Pending' +
'status: []' +
'2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' +
' new state:' +
'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' +
'namespace: default' +
'Exit code: 999'
]
# When
hook._process_spark_submit_log(log_lines)
# Then
self.assertEqual(hook._kubernetes_driver_pod,
'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver')
self.assertEqual(hook._spark_exit_code, 999)
示例2: test_build_spark_submit_command
def test_build_spark_submit_command(self):
# Given
hook = SparkSubmitHook(**self._config)
# When
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
expected_build_cmd = [
'spark-submit',
'--master', 'yarn',
'--conf', 'parquet.compression=SNAPPY',
'--files', 'hive-site.xml',
'--py-files', 'sample_library.py',
'--jars', 'parquet.jar',
'--packages', 'com.databricks:spark-avro_2.11:3.2.0',
'--exclude-packages', 'org.bad.dependency:1.0.0',
'--repositories', 'http://myrepo.org',
'--num-executors', '10',
'--total-executor-cores', '4',
'--executor-cores', '4',
'--executor-memory', '22g',
'--driver-memory', '3g',
'--keytab', 'privileged_user.keytab',
'--principal', 'user/[email protected]',
'--name', 'spark-job',
'--class', 'com.foo.bar.AppMain',
'--verbose',
'test_application.py',
'-f', 'foo',
'--bar', 'bar',
'--with-spaces', 'args should keep embdedded spaces',
'baz'
]
self.assertEquals(expected_build_cmd, cmd)
示例3: env_vars_exception_in_standalone_cluster_mode
def env_vars_exception_in_standalone_cluster_mode():
# Given
hook = SparkSubmitHook(conn_id='spark_standalone_cluster',
env_vars={"bar": "foo"})
# When
hook._build_spark_submit_command(self._spark_job_file)
示例4: test_build_command
def test_build_command(self):
# Given
hook = SparkSubmitHook(**self._config)
# When
cmd = hook._build_command(self._spark_job_file)
# Then
expected_build_cmd = [
'spark-submit',
'--master', 'yarn',
'--conf', 'parquet.compression=SNAPPY',
'--files', 'hive-site.xml',
'--py-files', 'sample_library.py',
'--jars', 'parquet.jar',
'--num-executors', '10',
'--total-executor-cores', '4',
'--executor-cores', '4',
'--executor-memory', '22g',
'--driver-memory', '3g',
'--keytab', 'privileged_user.keytab',
'--principal', 'user/[email protected]',
'--name', 'spark-job',
'--class', 'com.foo.bar.AppMain',
'--verbose',
'test_application.py',
'-f', 'foo',
'--bar', 'bar',
'baz'
]
self.assertEquals(expected_build_cmd, cmd)
示例5: test_submit
def test_submit(self, mock_process):
# We don't have spark-submit available, and this is hard to mock, so let's
# just use this simple mock.
mock_Popen = mock_process.Popen.return_value
mock_Popen.stdout = StringIO(u'stdout')
mock_Popen.stderr = StringIO(u'stderr')
mock_Popen.returncode = None
mock_Popen.communicate.return_value = ['extra stdout', 'extra stderr']
hook = SparkSubmitHook()
hook.submit(self._spark_job_file)
示例6: test_resolve_spark_submit_env_vars_k8s
def test_resolve_spark_submit_env_vars_k8s(self):
# Given
hook = SparkSubmitHook(conn_id='spark_k8s_cluster',
env_vars={"bar": "foo"})
# When
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
self.assertEqual(cmd[4], "spark.kubernetes.driverEnv.bar=foo")
示例7: test_resolve_spark_submit_env_vars_standalone_client_mode
def test_resolve_spark_submit_env_vars_standalone_client_mode(self):
# Given
hook = SparkSubmitHook(conn_id='spark_standalone_cluster_client_mode',
env_vars={"bar": "foo"})
# When
hook._build_spark_submit_command(self._spark_job_file)
# Then
self.assertEqual(hook._env, {"bar": "foo"})
示例8: test_spark_process_runcmd
def test_spark_process_runcmd(self, mock_popen):
# Given
mock_popen.return_value.stdout = StringIO(u'stdout')
mock_popen.return_value.stderr = StringIO(u'stderr')
mock_popen.return_value.wait.return_value = 0
# When
hook = SparkSubmitHook(conn_id='')
hook.submit()
# Then
self.assertEqual(mock_popen.mock_calls[0], call(['spark-submit', '--master', 'yarn', '--name', 'default-name', ''], stdout=-1, stderr=-2))
示例9: test_process_log
def test_process_log(self):
# Must select yarn connection
hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
log_lines = [
'SPARK_MAJOR_VERSION is set to 2, using Spark2',
'WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable',
'WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.',
'INFO Client: Requesting a new application from cluster with 10 NodeManagers',
'INFO Client: Submitting application application_1486558679801_1820 to ResourceManager'
]
hook._process_log(log_lines)
assert hook._yarn_application_id == 'application_1486558679801_1820'
示例10: execute
def execute(self, context):
"""
Call the SparkSubmitHook to run the provided spark job
"""
self._hook = SparkSubmitHook(
conf=self._conf,
conn_id=self._conn_id,
files=self._files,
py_files=self._py_files,
driver_classpath=self._driver_classpath,
jars=self._jars,
java_class=self._java_class,
packages=self._packages,
exclude_packages=self._exclude_packages,
repositories=self._repositories,
total_executor_cores=self._total_executor_cores,
executor_cores=self._executor_cores,
executor_memory=self._executor_memory,
driver_memory=self._driver_memory,
keytab=self._keytab,
principal=self._principal,
name=self._name,
num_executors=self._num_executors,
application_args=self._application_args,
verbose=self._verbose
)
self._hook.submit(self._application)
示例11: test_process_spark_submit_log_standalone_cluster
def test_process_spark_submit_log_standalone_cluster(self):
# Given
hook = SparkSubmitHook(conn_id='spark_standalone_cluster')
log_lines = [
'Running Spark using the REST application submission protocol.',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request '
'to launch an application in spark://spark-standalone-master:6066',
'17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' +
'created as driver-20171128111415-0001. Polling submission state...'
]
# When
hook._process_spark_submit_log(log_lines)
# Then
self.assertEqual(hook._driver_id, 'driver-20171128111415-0001')
示例12: test_resolve_connection_spark_binary_set_connection
def test_resolve_connection_spark_binary_set_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_binary_set')
# When
connection = hook._resolve_connection()
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
expected_spark_connection = {"master": "yarn",
"spark_binary": "custom-spark-submit",
"deploy_mode": None,
"queue": None,
"spark_home": None}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(cmd[0], 'custom-spark-submit')
示例13: test_resolve_connection_spark_standalone_cluster_connection
def test_resolve_connection_spark_standalone_cluster_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_standalone_cluster')
# When
connection = hook._resolve_connection()
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
expected_spark_connection = {"master": "spark://spark-standalone-master:6066",
"spark_binary": "spark-submit",
"deploy_mode": "cluster",
"queue": None,
"spark_home": "/path/to/spark_home"}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(cmd[0], '/path/to/spark_home/bin/spark-submit')
示例14: test_resolve_connection_mesos_default_connection
def test_resolve_connection_mesos_default_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_default_mesos')
# When
connection = hook._resolve_connection()
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
dict_cmd = self.cmd_args_to_dict(cmd)
expected_spark_connection = {"master": "mesos://host:5050",
"spark_binary": "spark-submit",
"deploy_mode": None,
"queue": None,
"spark_home": None}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
示例15: test_resolve_connection_spark_home_set_connection
def test_resolve_connection_spark_home_set_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_home_set')
# When
connection = hook._resolve_connection()
cmd = hook._build_spark_submit_command(self._spark_job_file)
# Then
expected_spark_connection = {"master": "yarn://yarn-master",
"spark_binary": "spark-submit",
"deploy_mode": None,
"queue": None,
"spark_home": "/opt/myspark",
"namespace": 'default'}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(cmd[0], '/opt/myspark/bin/spark-submit')