本文整理汇总了Python中airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook._build_command方法的典型用法代码示例。如果您正苦于以下问题:Python SparkSubmitHook._build_command方法的具体用法?Python SparkSubmitHook._build_command怎么用?Python SparkSubmitHook._build_command使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook
的用法示例。
在下文中一共展示了SparkSubmitHook._build_command方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_build_command
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_build_command(self):
# Given
hook = SparkSubmitHook(**self._config)
# When
cmd = hook._build_command(self._spark_job_file)
# Then
expected_build_cmd = [
'spark-submit',
'--master', 'yarn',
'--conf', 'parquet.compression=SNAPPY',
'--files', 'hive-site.xml',
'--py-files', 'sample_library.py',
'--jars', 'parquet.jar',
'--num-executors', '10',
'--total-executor-cores', '4',
'--executor-cores', '4',
'--executor-memory', '22g',
'--driver-memory', '3g',
'--keytab', 'privileged_user.keytab',
'--principal', 'user/[email protected]',
'--name', 'spark-job',
'--class', 'com.foo.bar.AppMain',
'--verbose',
'test_application.py',
'-f', 'foo',
'--bar', 'bar',
'--with-spaces', 'args should keep embdedded spaces',
'baz'
]
self.assertEquals(expected_build_cmd, cmd)
示例2: test_resolve_connection
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_resolve_connection(self):
# Default to the standard yarn connection because conn_id does not exists
hook = SparkSubmitHook(conn_id='')
self.assertEqual(hook._resolve_connection(), ('yarn', None, None, None))
assert "--master yarn" in ' '.join(hook._build_command(self._spark_job_file))
# Default to the standard yarn connection
hook = SparkSubmitHook(conn_id='spark_default')
self.assertEqual(
hook._resolve_connection(),
('yarn', 'root.default', None, None)
)
cmd = ' '.join(hook._build_command(self._spark_job_file))
assert "--master yarn" in cmd
assert "--queue root.default" in cmd
# Connect to a mesos master
hook = SparkSubmitHook(conn_id='spark_default_mesos')
self.assertEqual(
hook._resolve_connection(),
('mesos://host:5050', None, None, None)
)
cmd = ' '.join(hook._build_command(self._spark_job_file))
assert "--master mesos://host:5050" in cmd
# Set specific queue and deploy mode
hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
self.assertEqual(
hook._resolve_connection(),
('yarn://yarn-master', 'root.etl', 'cluster', None)
)
cmd = ' '.join(hook._build_command(self._spark_job_file))
assert "--master yarn://yarn-master" in cmd
assert "--queue root.etl" in cmd
assert "--deploy-mode cluster" in cmd
# Set the spark home
hook = SparkSubmitHook(conn_id='spark_home_set')
self.assertEqual(
hook._resolve_connection(),
('yarn://yarn-master', None, None, '/opt/myspark')
)
cmd = ' '.join(hook._build_command(self._spark_job_file))
assert cmd.startswith('/opt/myspark/bin/spark-submit')
# Spark home not set
hook = SparkSubmitHook(conn_id='spark_home_not_set')
self.assertEqual(
hook._resolve_connection(),
('yarn://yarn-master', None, None, None)
)
cmd = ' '.join(hook._build_command(self._spark_job_file))
assert cmd.startswith('spark-submit')
示例3: test_resolve_connection_spark_binary_and_home_set_connection
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_resolve_connection_spark_binary_and_home_set_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_binary_and_home_set')
# When
connection = hook._resolve_connection()
cmd = hook._build_command(self._spark_job_file)
# Then
expected_spark_connection = {"master": "yarn",
"spark_binary": "custom-spark-submit",
"deploy_mode": None,
"queue": None,
"spark_home": "/path/to/spark_home"}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(cmd[0], '/path/to/spark_home/bin/custom-spark-submit')
示例4: test_resolve_connection_mesos_default_connection
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_resolve_connection_mesos_default_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_default_mesos')
# When
connection = hook._resolve_connection()
cmd = hook._build_command(self._spark_job_file)
# Then
dict_cmd = self.cmd_args_to_dict(cmd)
expected_spark_connection = {"master": "mesos://host:5050",
"spark_binary": "spark-submit",
"deploy_mode": None,
"queue": None,
"spark_home": None}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(dict_cmd["--master"], "mesos://host:5050")
示例5: test_resolve_connection_spark_yarn_cluster_connection
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_resolve_connection_spark_yarn_cluster_connection(self):
# Given
hook = SparkSubmitHook(conn_id='spark_yarn_cluster')
# When
connection = hook._resolve_connection()
cmd = hook._build_command(self._spark_job_file)
# Then
dict_cmd = self.cmd_args_to_dict(cmd)
expected_spark_connection = {"master": "yarn://yarn-master",
"spark_binary": "spark-submit",
"deploy_mode": "cluster",
"queue": "root.etl",
"spark_home": None}
self.assertEqual(connection, expected_spark_connection)
self.assertEqual(dict_cmd["--master"], "yarn://yarn-master")
self.assertEqual(dict_cmd["--queue"], "root.etl")
self.assertEqual(dict_cmd["--deploy-mode"], "cluster")
示例6: test_build_command
# 需要导入模块: from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook [as 别名]
# 或者: from airflow.contrib.hooks.spark_submit_hook.SparkSubmitHook import _build_command [as 别名]
def test_build_command(self):
hook = SparkSubmitHook(**self._config)
# The subprocess requires an array but we build the cmd by joining on a space
cmd = ' '.join(hook._build_command(self._spark_job_file))
# Check if the URL gets build properly and everything exists.
assert self._spark_job_file in cmd
# Check all the parameters
assert "--files {}".format(self._config['files']) in cmd
assert "--py-files {}".format(self._config['py_files']) in cmd
assert "--jars {}".format(self._config['jars']) in cmd
assert "--total-executor-cores {}".format(self._config['total_executor_cores']) in cmd
assert "--executor-cores {}".format(self._config['executor_cores']) in cmd
assert "--executor-memory {}".format(self._config['executor_memory']) in cmd
assert "--keytab {}".format(self._config['keytab']) in cmd
assert "--principal {}".format(self._config['principal']) in cmd
assert "--name {}".format(self._config['name']) in cmd
assert "--num-executors {}".format(self._config['num_executors']) in cmd
assert "--class {}".format(self._config['java_class']) in cmd
assert "--driver-memory {}".format(self._config['driver_memory']) in cmd
# Check if all config settings are there
for k in self._config['conf']:
assert "--conf {0}={1}".format(k, self._config['conf'][k]) in cmd
# Check the application arguments are there
for a in self._config['application_args']:
assert a in cmd
# Check if application arguments are after the application
application_idx = cmd.find(self._spark_job_file)
for a in self._config['application_args']:
assert cmd.find(a) > application_idx
if self._config['verbose']:
assert "--verbose" in cmd