本文整理汇总了Python中airflow.contrib.hooks.sqoop_hook.SqoopHook类的典型用法代码示例。如果您正苦于以下问题:Python SqoopHook类的具体用法?Python SqoopHook怎么用?Python SqoopHook使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SqoopHook类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_popen
def test_popen(self, mock_popen):
# Given
mock_popen.return_value.stdout = StringIO(u'stdout')
mock_popen.return_value.stderr = StringIO(u'stderr')
mock_popen.return_value.returncode = 0
mock_popen.return_value.communicate.return_value = [StringIO(u'stdout\nstdout'), StringIO(u'stderr\nstderr')]
# When
hook = SqoopHook(conn_id='sqoop_test')
hook.export_table(**self._config_export)
# Then
self.assertEqual(mock_popen.mock_calls[0], call(
['sqoop',
'export',
'-jt', self._config_json['job_tracker'],
'-libjars', self._config_json['libjars'],
'-files', self._config_json['files'],
'-fs', self._config_json['namenode'],
'-archives', self._config_json['archives'],
'--connect', 'rmdbs:5050/schema',
'--input-null-string', self._config_export['input_null_string'],
'--input-null-non-string', self._config_export['input_null_non_string'],
'--staging-table', self._config_export['staging_table'],
'--clear-staging-table',
'--enclosed-by', self._config_export['enclosed_by'],
'--escaped-by', self._config_export['escaped_by'],
'--input-fields-terminated-by', self._config_export['input_fields_terminated_by'],
'--input-lines-terminated-by', self._config_export['input_lines_terminated_by'],
'--input-optionally-enclosed-by', self._config_export['input_optionally_enclosed_by'],
'--batch',
'--relaxed-isolation',
'--export-dir', self._config_export['export_dir'],
'--table', self._config_export['table']], stderr=-2, stdout=-1))
示例2: test_import_cmd
def test_import_cmd(self):
hook = SqoopHook()
# The subprocess requires an array but we build the cmd by joining on a space
cmd = ' '.join(
hook._import_cmd(self._config_import['target_dir'],
append=self._config_import['append'],
file_type=self._config_import['file_type'],
split_by=self._config_import['split_by'],
direct=self._config_import['direct'],
driver=self._config_import['driver'])
)
if self._config_import['append']:
self.assertIn('--append', cmd)
if self._config_import['direct']:
self.assertIn('--direct', cmd)
self.assertIn('--target-dir {}'.format(
self._config_import['target_dir']), cmd)
self.assertIn('--driver {}'.format(self._config_import['driver']), cmd)
self.assertIn('--split-by {}'.format(self._config_import['split_by']),
cmd)
示例3: test_export_cmd
def test_export_cmd(self):
"""
Tests to verify the hook export command is building correct Sqoop export command.
"""
hook = SqoopHook()
# The subprocess requires an array but we build the cmd by joining on a space
cmd = ' '.join(
hook._export_cmd(
self._config_export['table'],
self._config_export['export_dir'],
input_null_string=self._config_export['input_null_string'],
input_null_non_string=self._config_export[
'input_null_non_string'],
staging_table=self._config_export['staging_table'],
clear_staging_table=self._config_export['clear_staging_table'],
enclosed_by=self._config_export['enclosed_by'],
escaped_by=self._config_export['escaped_by'],
input_fields_terminated_by=self._config_export[
'input_fields_terminated_by'],
input_lines_terminated_by=self._config_export[
'input_lines_terminated_by'],
input_optionally_enclosed_by=self._config_export[
'input_optionally_enclosed_by'],
batch=self._config_export['batch'],
relaxed_isolation=self._config_export['relaxed_isolation'],
extra_export_options=self._config_export['extra_export_options']
)
)
self.assertIn("--input-null-string {}".format(
self._config_export['input_null_string']), cmd)
self.assertIn("--input-null-non-string {}".format(
self._config_export['input_null_non_string']), cmd)
self.assertIn("--staging-table {}".format(
self._config_export['staging_table']), cmd)
self.assertIn("--enclosed-by {}".format(
self._config_export['enclosed_by']), cmd)
self.assertIn("--escaped-by {}".format(
self._config_export['escaped_by']), cmd)
self.assertIn("--input-fields-terminated-by {}".format(
self._config_export['input_fields_terminated_by']), cmd)
self.assertIn("--input-lines-terminated-by {}".format(
self._config_export['input_lines_terminated_by']), cmd)
self.assertIn("--input-optionally-enclosed-by {}".format(
self._config_export['input_optionally_enclosed_by']), cmd)
# these options are from the extra export options
self.assertIn("--update-key id", cmd)
self.assertIn("--update-mode allowinsert", cmd)
if self._config_export['clear_staging_table']:
self.assertIn("--clear-staging-table", cmd)
if self._config_export['batch']:
self.assertIn("--batch", cmd)
if self._config_export['relaxed_isolation']:
self.assertIn("--relaxed-isolation", cmd)
示例4: test_submit_none_mappers
def test_submit_none_mappers(self):
"""
Test to check that if value of num_mappers is None, then it shouldn't be in the cmd built.
"""
_config_without_mappers = self._config.copy()
_config_without_mappers['num_mappers'] = None
hook = SqoopHook(**_config_without_mappers)
cmd = ' '.join(hook._prepare_command())
self.assertNotIn('--num-mappers', cmd)
示例5: test_cmd_mask_password
def test_cmd_mask_password(self):
hook = SqoopHook()
self.assertEqual(
hook.cmd_mask_password(['--password', 'supersecret']),
['--password', 'MASKED']
)
cmd = ['--target', 'targettable']
self.assertEqual(
hook.cmd_mask_password(cmd),
cmd
)
示例6: test_import_cmd
def test_import_cmd(self):
"""
Tests to verify the hook import command is building correct Sqoop import command.
"""
hook = SqoopHook()
# The subprocess requires an array but we build the cmd by joining on a space
cmd = ' '.join(
hook._import_cmd(
self._config_import['target_dir'],
append=self._config_import['append'],
file_type=self._config_import['file_type'],
split_by=self._config_import['split_by'],
direct=self._config_import['direct'],
driver=self._config_import['driver'],
extra_import_options=None
)
)
if self._config_import['append']:
self.assertIn('--append', cmd)
if self._config_import['direct']:
self.assertIn('--direct', cmd)
self.assertIn('--target-dir {}'.format(
self._config_import['target_dir']), cmd)
self.assertIn('--driver {}'.format(self._config_import['driver']), cmd)
self.assertIn('--split-by {}'.format(self._config_import['split_by']), cmd)
# these are from extra options, but not passed to this cmd import command
self.assertNotIn('--show', cmd)
self.assertNotIn('hcatalog-storage-stanza \"stored as orcfile\"', cmd)
cmd = ' '.join(
hook._import_cmd(
target_dir=None,
append=self._config_import['append'],
file_type=self._config_import['file_type'],
split_by=self._config_import['split_by'],
direct=self._config_import['direct'],
driver=self._config_import['driver'],
extra_import_options=self._config_import['extra_import_options']
)
)
self.assertNotIn('--target-dir', cmd)
# these checks are from the extra import options
self.assertIn('--show', cmd)
self.assertIn('hcatalog-storage-stanza \"stored as orcfile\"', cmd)
self.assertIn('--fetch-size', cmd)
示例7: test_export_cmd
def test_export_cmd(self):
hook = SqoopHook()
# The subprocess requires an array but we build the cmd by joining on a space
cmd = ' '.join(
hook._export_cmd(
self._config_export['table'],
self._config_export['export_dir'],
input_null_string=self._config_export['input_null_string'],
input_null_non_string=self._config_export[
'input_null_non_string'],
staging_table=self._config_export['staging_table'],
clear_staging_table=self._config_export['clear_staging_table'],
enclosed_by=self._config_export['enclosed_by'],
escaped_by=self._config_export['escaped_by'],
input_fields_terminated_by=self._config_export[
'input_fields_terminated_by'],
input_lines_terminated_by=self._config_export[
'input_lines_terminated_by'],
input_optionally_enclosed_by=self._config_export[
'input_optionally_enclosed_by'],
batch=self._config_export['batch'],
relaxed_isolation=self._config_export['relaxed_isolation'])
)
self.assertIn("--input-null-string {}".format(
self._config_export['input_null_string']), cmd)
self.assertIn("--input-null-non-string {}".format(
self._config_export['input_null_non_string']), cmd)
self.assertIn("--staging-table {}".format(
self._config_export['staging_table']), cmd)
self.assertIn("--enclosed-by {}".format(
self._config_export['enclosed_by']), cmd)
self.assertIn("--escaped-by {}".format(
self._config_export['escaped_by']), cmd)
self.assertIn("--input-fields-terminated-by {}".format(
self._config_export['input_fields_terminated_by']), cmd)
self.assertIn("--input-lines-terminated-by {}".format(
self._config_export['input_lines_terminated_by']), cmd)
self.assertIn("--input-optionally-enclosed-by {}".format(
self._config_export['input_optionally_enclosed_by']), cmd)
if self._config_export['clear_staging_table']:
self.assertIn("--clear-staging-table", cmd)
if self._config_export['batch']:
self.assertIn("--batch", cmd)
if self._config_export['relaxed_isolation']:
self.assertIn("--relaxed-isolation", cmd)
示例8: test_cmd_mask_password
def test_cmd_mask_password(self):
"""
Tests to verify the hook masking function will correctly mask a user password in Sqoop command.
"""
hook = SqoopHook()
self.assertEqual(
hook.cmd_mask_password(['--password', 'supersecret']),
['--password', 'MASKED']
)
cmd = ['--target', 'targettable']
self.assertEqual(
hook.cmd_mask_password(cmd),
cmd
)
示例9: test_submit
def test_submit(self):
hook = SqoopHook(**self._config)
cmd = ' '.join(hook._prepare_command())
# Check if the config has been extracted from the json
if self._config_json['namenode']:
self.assertIn("-fs {}".format(self._config_json['namenode']), cmd)
if self._config_json['job_tracker']:
self.assertIn("-jt {}".format(self._config_json['job_tracker']),
cmd)
if self._config_json['libjars']:
self.assertIn("-libjars {}".format(self._config_json['libjars']),
cmd)
if self._config_json['files']:
self.assertIn("-files {}".format(self._config_json['files']), cmd)
if self._config_json['archives']:
self.assertIn(
"-archives {}".format(self._config_json['archives']), cmd
)
self.assertIn("--hcatalog-database {}".format(self._config['hcatalog_database']), cmd)
self.assertIn("--hcatalog-table {}".format(self._config['hcatalog_table']), cmd)
# Check the regulator stuff passed by the default constructor
if self._config['verbose']:
self.assertIn("--verbose", cmd)
if self._config['num_mappers']:
self.assertIn(
"--num-mappers {}".format(self._config['num_mappers']), cmd
)
print(self._config['properties'])
for key, value in self._config['properties'].items():
self.assertIn("-D {}={}".format(key, value), cmd)
# We don't have the sqoop binary available, and this is hard to mock,
# so just accept an exception for now.
with self.assertRaises(OSError):
hook.export_table(**self._config_export)
with self.assertRaises(OSError):
hook.import_table(table='schema.table',
target_dir='/sqoop/example/path')
with self.assertRaises(OSError):
hook.import_query(query='SELECT * FROM sometable',
target_dir='/sqoop/example/path')
示例10: execute
def execute(self, context):
"""
Execute sqoop job
"""
hook = SqoopHook(conn_id=self.conn_id)
if self.type_cmd is 'export':
hook.export_table(
table=self.table,
export_dir=self.export_dir,
num_mappers=self.num_mappers)
else:
hook.import_table(
table=self.table,
target_dir=self.target_dir,
append=self.append,
type=self.type,
columns=self.columns,
num_mappers=self.num_mappers,
split_by=self.split_by,
where=self.where)
示例11: execute
def execute(self, context):
"""
Execute sqoop job
"""
hook = SqoopHook(conn_id=self.conn_id,
verbose=self.verbose,
num_mappers=self.num_mappers,
hcatalog_database=self.hcatalog_database,
hcatalog_table=self.hcatalog_table,
properties=self.properties)
if self.cmd_type == 'export':
hook.export_table(
table=self.table,
export_dir=self.export_dir,
input_null_string=self.input_null_string,
input_null_non_string=self.input_null_non_string,
staging_table=self.staging_table,
clear_staging_table=self.clear_staging_table,
enclosed_by=self.enclosed_by,
escaped_by=self.escaped_by,
input_fields_terminated_by=self.input_fields_terminated_by,
input_lines_terminated_by=self.input_lines_terminated_by,
input_optionally_enclosed_by=self.input_optionally_enclosed_by,
batch=self.batch,
relaxed_isolation=self.relaxed_isolation)
elif self.cmd_type == 'import':
if not self.table:
hook.import_table(
table=self.table,
target_dir=self.target_dir,
append=self.append,
file_type=self.file_type,
columns=self.columns,
split_by=self.split_by,
where=self.where,
direct=self.direct,
driver=self.driver)
elif not self.query:
hook.import_query(
query=self.table,
target_dir=self.target_dir,
append=self.append,
file_type=self.file_type,
split_by=self.split_by,
direct=self.direct,
driver=self.driver)
else:
raise AirflowException(
"Provide query or table parameter to import using Sqoop"
)
else:
raise AirflowException("cmd_type should be 'import' or 'export'")
示例12: test_get_export_format_argument
def test_get_export_format_argument(self):
hook = SqoopHook()
self.assertIn("--as-avrodatafile",
hook._get_export_format_argument('avro'))
self.assertIn("--as-parquetfile",
hook._get_export_format_argument('parquet'))
self.assertIn("--as-sequencefile",
hook._get_export_format_argument('sequence'))
self.assertIn("--as-textfile",
hook._get_export_format_argument('text'))
with self.assertRaises(AirflowException):
hook._get_export_format_argument('unknown')
示例13: test_get_export_format_argument
def test_get_export_format_argument(self):
"""
Tests to verify the hook get format function is building correct Sqoop command with correct format type.
"""
hook = SqoopHook()
self.assertIn("--as-avrodatafile",
hook._get_export_format_argument('avro'))
self.assertIn("--as-parquetfile",
hook._get_export_format_argument('parquet'))
self.assertIn("--as-sequencefile",
hook._get_export_format_argument('sequence'))
self.assertIn("--as-textfile",
hook._get_export_format_argument('text'))
with self.assertRaises(AirflowException):
hook._get_export_format_argument('unknown')
示例14: execute
def execute(self, context):
"""
Execute sqoop job
"""
hook = SqoopHook(conn_id=self.conn_id,
verbose=self.verbose,
num_mappers=self.num_mappers,
hcatalog_database=self.hcatalog_database,
hcatalog_table=self.hcatalog_table,
properties=self.properties)
if self.cmd_type == 'export':
hook.export_table(
table=self.table,
export_dir=self.export_dir,
input_null_string=self.input_null_string,
input_null_non_string=self.input_null_non_string,
staging_table=self.staging_table,
clear_staging_table=self.clear_staging_table,
enclosed_by=self.enclosed_by,
escaped_by=self.escaped_by,
input_fields_terminated_by=self.input_fields_terminated_by,
input_lines_terminated_by=self.input_lines_terminated_by,
input_optionally_enclosed_by=self.input_optionally_enclosed_by,
batch=self.batch,
relaxed_isolation=self.relaxed_isolation,
extra_export_options=self.extra_export_options)
elif self.cmd_type == 'import':
# add create hcatalog table to extra import options if option passed
# if new params are added to constructor can pass them in here so don't modify sqoop_hook for each param
if self.create_hcatalog_table:
self.extra_import_options['create-hcatalog-table'] = ''
if self.table and self.query:
raise AirflowException('Cannot specify query and table together. Need to specify either or.')
if self.table:
hook.import_table(
table=self.table,
target_dir=self.target_dir,
append=self.append,
file_type=self.file_type,
columns=self.columns,
split_by=self.split_by,
where=self.where,
direct=self.direct,
driver=self.driver,
extra_import_options=self.extra_import_options)
elif self.query:
hook.import_query(
query=self.query,
target_dir=self.target_dir,
append=self.append,
file_type=self.file_type,
split_by=self.split_by,
direct=self.direct,
driver=self.driver,
extra_import_options=self.extra_import_options)
else:
raise AirflowException(
"Provide query or table parameter to import using Sqoop"
)
else:
raise AirflowException("cmd_type should be 'import' or 'export'")
示例15: SqoopOperator
#.........这里部分代码省略.........
self.target_dir = target_dir
self.append = append
self.file_type = file_type
self.columns = columns
self.num_mappers = num_mappers
self.split_by = split_by
self.where = where
self.export_dir = export_dir
self.input_null_string = input_null_string
self.input_null_non_string = input_null_non_string
self.staging_table = staging_table
self.clear_staging_table = clear_staging_table
self.enclosed_by = enclosed_by
self.escaped_by = escaped_by
self.input_fields_terminated_by = input_fields_terminated_by
self.input_lines_terminated_by = input_lines_terminated_by
self.input_optionally_enclosed_by = input_optionally_enclosed_by
self.batch = batch
self.direct = direct
self.driver = driver
self.verbose = verbose
self.relaxed_isolation = relaxed_isolation
self.hcatalog_database = hcatalog_database
self.hcatalog_table = hcatalog_table
self.create_hcatalog_table = create_hcatalog_table
self.properties = properties
self.extra_import_options = extra_import_options or {}
self.extra_export_options = extra_export_options or {}
def execute(self, context):
"""
Execute sqoop job
"""
self.hook = SqoopHook(
conn_id=self.conn_id,
verbose=self.verbose,
num_mappers=self.num_mappers,
hcatalog_database=self.hcatalog_database,
hcatalog_table=self.hcatalog_table,
properties=self.properties
)
if self.cmd_type == 'export':
self.hook.export_table(
table=self.table,
export_dir=self.export_dir,
input_null_string=self.input_null_string,
input_null_non_string=self.input_null_non_string,
staging_table=self.staging_table,
clear_staging_table=self.clear_staging_table,
enclosed_by=self.enclosed_by,
escaped_by=self.escaped_by,
input_fields_terminated_by=self.input_fields_terminated_by,
input_lines_terminated_by=self.input_lines_terminated_by,
input_optionally_enclosed_by=self.input_optionally_enclosed_by,
batch=self.batch,
relaxed_isolation=self.relaxed_isolation,
extra_export_options=self.extra_export_options)
elif self.cmd_type == 'import':
# add create hcatalog table to extra import options if option passed
# if new params are added to constructor can pass them in here
# so don't modify sqoop_hook for each param
if self.create_hcatalog_table:
self.extra_import_options['create-hcatalog-table'] = ''
if self.table and self.query: