本文整理汇总了Python中mrjob.parse.HADOOP_STREAMING_JAR_RE类的典型用法代码示例。如果您正苦于以下问题:Python HADOOP_STREAMING_JAR_RE类的具体用法?Python HADOOP_STREAMING_JAR_RE怎么用?Python HADOOP_STREAMING_JAR_RE使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HADOOP_STREAMING_JAR_RE类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: hadoop_jar
def hadoop_jar(stdout, stderr, environ, *args):
if len(args) < 1:
stderr.write('RunJar jarFile [mainClass] args...\n')
return -1
jar_path = args[0]
if not os.path.exists(jar_path):
stderr.write(
'Exception in thread "main" java.io.IOException: Error opening job'
' jar: %s\n' % jar_path)
return -1
# only simulate for streaming steps
if HADOOP_STREAMING_JAR_RE.match(os.path.basename(jar_path)):
streaming_args = args[1:]
output_idx = list(streaming_args).index('-output')
assert output_idx != -1
output_dir = streaming_args[output_idx + 1]
real_output_dir = hdfs_path_to_real_path(output_dir, environ)
mock_output_dir = get_mock_hadoop_output()
if mock_output_dir is None:
stderr.write('Job failed!')
return -1
if os.path.isdir(real_output_dir):
os.rmdir(real_output_dir)
shutil.move(mock_output_dir, real_output_dir)
now = datetime.datetime.now()
stderr.write(now.strftime('Running job: job_%Y%m%d%H%M_0001\n'))
stderr.write('Job succeeded!\n')
return 0
示例2: find_hadoop_streaming_jar
def find_hadoop_streaming_jar(path):
"""Return the path of the hadoop streaming jar inside the given
directory tree, or None if we can't find it."""
for (dirpath, _, filenames) in os.walk(path):
for filename in filenames:
if HADOOP_STREAMING_JAR_RE.match(filename):
return os.path.join(dirpath, filename)
else:
return None
示例3: is_job_flow_non_streaming
def is_job_flow_non_streaming(job_flow):
"""Return True if the give job flow has steps, but not of them are
Hadoop streaming steps (for example, if the job flow is running Hive).
"""
if not job_flow.steps:
return False
for step in job_flow.steps:
if HADOOP_STREAMING_JAR_RE.match(posixpath.basename(step.jar)):
return False
# job has at least one step, and none are streaming steps
return True
示例4: hadoop_jar
def hadoop_jar(stdout, stderr, environ, *args):
if len(args) < 1:
print('RunJar jarFile [mainClass] args...', file=stderr)
return -1
jar_path = args[0]
if not os.path.exists(jar_path):
print('Exception in thread "main" java.io.IOException: Error opening'
' job jar: %s' % jar_path, file=stderr)
return -1
# use this to simulate log4j
def mock_log4j(message, level='INFO', logger='mapreduce.JOB', now=None):
now = now or datetime.datetime.now()
line = '%s %s %s: %s' % (now.strftime('%Y/%m/%d %H:%M:%S'),
level, logger, message)
print(line, file=stderr)
# simulate counters
counters = next_mock_hadoop_counters()
if counters:
num_counters = sum(len(g) for g in counters.values())
mock_log4j('Counters: %d' % num_counters)
# subsequent lines are actually part of same log record
for group, group_counters in sorted(counters.items()):
print(('\t%s' % group), file=stderr)
for counter, amount in sorted(group_counters.items()):
print(('\t\t%s=%d' % (counter, amount)), file=stderr)
# simulate output for streaming steps
if HADOOP_STREAMING_JAR_RE.match(os.path.basename(jar_path)):
streaming_args = args[1:]
output_idx = list(streaming_args).index('-output')
assert output_idx != -1
output_dir = streaming_args[output_idx + 1]
real_output_dir = hdfs_uri_to_real_path(output_dir, environ)
mock_output_dir = get_mock_hadoop_output()
if mock_output_dir is None:
mock_log4j('Job failed!')
return -1
if os.path.isdir(real_output_dir):
os.rmdir(real_output_dir)
shutil.move(mock_output_dir, real_output_dir)
now = datetime.datetime.now()
mock_log4j(now.strftime('Running job: job_%Y%m%d%H%M_0001'))
mock_log4j('Job succeeded!')
return 0
示例5: _find_hadoop_streaming_jar
def _find_hadoop_streaming_jar(self):
"""Search for the hadoop streaming jar. See
:py:meth:`_hadoop_streaming_jar_dirs` for where we search."""
for path in unique(self._hadoop_streaming_jar_dirs()):
log.info('Looking for Hadoop streaming jar in %s' % path)
streaming_jars = []
for path in self.fs.ls(path):
if HADOOP_STREAMING_JAR_RE.match(posixpath.basename(path)):
streaming_jars.append(path)
if streaming_jars:
# prefer shorter names and shallower paths
def sort_key(p):
return (len(p.split('/')),
len(posixpath.basename(p)),
p)
streaming_jars.sort(key=sort_key)
return streaming_jars[0]
return None