本文整理汇总了Java中org.apache.hadoop.mapred.JobConf.setNumMapTasks方法的典型用法代码示例。如果您正苦于以下问题:Java JobConf.setNumMapTasks方法的具体用法?Java JobConf.setNumMapTasks怎么用?Java JobConf.setNumMapTasks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.JobConf
的用法示例。
在下文中一共展示了JobConf.setNumMapTasks方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getJob
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Sets up a job conf for the given job using the given config object. Ensures
* that the correct input format is set, the mapper and and reducer class and
* the input and output keys and value classes along with any other job
* configuration.
*
* @param config
* @return JobConf representing the job to be ran
* @throws IOException
*/
private JobConf getJob(ConfigExtractor config) throws IOException {
JobConf job = new JobConf(config.getConfig(), SliveTest.class);
job.setInputFormat(DummyInputFormat.class);
FileOutputFormat.setOutputPath(job, config.getOutputPath());
job.setMapperClass(SliveMapper.class);
job.setPartitionerClass(SlivePartitioner.class);
job.setReducerClass(SliveReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormat(TextOutputFormat.class);
TextOutputFormat.setCompressOutput(job, false);
job.setNumReduceTasks(config.getReducerAmount());
job.setNumMapTasks(config.getMapAmount());
return job;
}
示例2: createCopyJob
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Creates a simple copy job.
*
* @param indirs List of input directories.
* @param outdir Output directory.
* @return JobConf initialised for a simple copy job.
* @throws Exception If an error occurs creating job configuration.
*/
static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception {
Configuration defaults = new Configuration();
JobConf theJob = new JobConf(defaults, TestJobControl.class);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0]));
theJob.setMapperClass(DataCopy.class);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
theJob.setReducerClass(DataCopy.class);
theJob.setNumMapTasks(12);
theJob.setNumReduceTasks(4);
return theJob;
}
示例3: runJob
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps,
int numReds) throws IOException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (!fs.exists(inDir)) {
fs.mkdirs(inDir);
}
String input = "The quick brown fox\n" + "has many silly\n"
+ "red fox sox\n";
for (int i = 0; i < numMaps; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
conf.setOutputCommitter(CustomOutputCommitter.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReds);
JobClient jobClient = new JobClient(conf);
RunningJob job = jobClient.submitJob(conf);
return jobClient.monitorAndPrintJob(conf, job);
}
示例4: testCombinerShouldUpdateTheReporter
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
JobConf conf = new JobConf(mrCluster.getConfig());
int numMaps = 5;
int numReds = 2;
Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
"testCombinerShouldUpdateTheReporter-in");
Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
"testCombinerShouldUpdateTheReporter-out");
createInputOutPutFolder(in, out, numMaps);
conf.setJobName("test-job-with-combiner");
conf.setMapperClass(IdentityMapper.class);
conf.setCombinerClass(MyCombinerToCheckReporter.class);
//conf.setJarByClass(MyCombinerToCheckReporter.class);
conf.setReducerClass(IdentityReducer.class);
DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
conf.setOutputCommitter(CustomOutputCommitter.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, in);
FileOutputFormat.setOutputPath(conf, out);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReds);
runJob(conf);
}
示例5: testTipFailed
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Test
public void testTipFailed() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(2);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE,
0, 0);
ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status,
reduceId, null, progress, null, null, null);
JobID jobId = new JobID();
TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
scheduler.tipFailed(taskId1);
Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(),
0.0f);
Assert.assertFalse(scheduler.waitUntilDone(1));
TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
scheduler.tipFailed(taskId0);
Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(),
0.0f);
Assert.assertTrue(scheduler.waitUntilDone(1));
}
示例6: setMapCount
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Calculate how many maps to run.
* Number of maps is bounded by a minimum of the cumulative size of the
* copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
* command line) and at most (distcp.max.map.tasks, default
* MAX_MAPS_PER_NODE * nodes in the cluster).
* @param totalBytes Count of total bytes for job
* @param job The job to configure
* @return Count of maps to run.
*/
private static int setMapCount(long totalBytes, JobConf job)
throws IOException {
int numMaps =
(int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
numMaps = Math.min(numMaps,
job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
new JobClient(job).getClusterStatus().getTaskTrackers()));
numMaps = Math.max(numMaps, 1);
job.setNumMapTasks(numMaps);
return numMaps;
}
示例7: limitNumMapTasks
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Ensures that the given number of map tasks for the given job
* configuration does not exceed the number of regions for the given table.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
// Used by tests.
public static void limitNumMapTasks(String table, JobConf job)
throws IOException {
int regions =
MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
if (job.getNumMapTasks() > regions)
job.setNumMapTasks(regions);
}
示例8: shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Test
public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
throws IOException {
Configuration cfg = UTIL.getConfiguration();
JobConf jobConf = new JobConf(cfg);
TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
assertEquals(1, jobConf.getNumMapTasks());
jobConf.setNumMapTasks(10);
TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
assertEquals(1, jobConf.getNumMapTasks());
}
示例9: submitAsMapReduce
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Based on args we submit the LoadGenerator as MR job.
* Number of MapTasks is numMapTasks
* @return exitCode for job submission
*/
private int submitAsMapReduce() {
System.out.println("Running as a MapReduce job with " +
numMapTasks + " mapTasks; Output to file " + mrOutDir);
Configuration conf = new Configuration(getConf());
// First set all the args of LoadGenerator as Conf vars to pass to MR tasks
conf.set(LG_ROOT , root.toString());
conf.setInt(LG_MAXDELAYBETWEENOPS, maxDelayBetweenOps);
conf.setInt(LG_NUMOFTHREADS, numOfThreads);
conf.set(LG_READPR, readProbs[0]+""); //Pass Double as string
conf.set(LG_WRITEPR, writeProbs[0]+""); //Pass Double as string
conf.setLong(LG_SEED, seed); //No idea what this is
conf.setInt(LG_NUMMAPTASKS, numMapTasks);
if (scriptFile == null && durations[0] <=0) {
System.err.println("When run as a MapReduce job, elapsed Time or ScriptFile must be specified");
System.exit(-1);
}
conf.setLong(LG_ELAPSEDTIME, durations[0]);
conf.setLong(LG_STARTTIME, startTime);
if (scriptFile != null) {
conf.set(LG_SCRIPTFILE , scriptFile);
}
conf.set(LG_FLAGFILE, flagFile.toString());
// Now set the necessary conf variables that apply to run MR itself.
JobConf jobConf = new JobConf(conf, LoadGenerator.class);
jobConf.setJobName("NNLoadGeneratorViaMR");
jobConf.setNumMapTasks(numMapTasks);
jobConf.setNumReduceTasks(1); // 1 reducer to collect the results
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
jobConf.setMapperClass(MapperThatRunsNNLoadGenerator.class);
jobConf.setReducerClass(ReducerThatCollectsLGdata.class);
jobConf.setInputFormat(DummyInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
// Explicitly set number of max map attempts to 1.
jobConf.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
jobConf.setSpeculativeExecution(false);
// This mapReduce job has no input but has output
FileOutputFormat.setOutputPath(jobConf, new Path(mrOutDir));
try {
JobClient.runJob(jobConf);
} catch (IOException e) {
System.err.println("Failed to run job: " + e.getMessage());
return -1;
}
return 0;
}
示例10: TestSucceedAndFailedCopyMap
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(2);
//mock creation
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
@SuppressWarnings("unchecked") // needed for mock with generic
CombineOutputCollector<K, V> mockCombineOutputCollector =
(CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
@SuppressWarnings("unchecked")
MapOutput<K, V> output = mock(MapOutput.class);
ShuffleConsumerPlugin.Context<K, V> context =
new ShuffleConsumerPlugin.Context<K, V>(
mockTaskAttemptID, job, mockFileSystem,
mockUmbilical, mockLocalDirAllocator,
mockReporter, mockCompressionCodec,
combinerClass, mockCombineOutputCollector,
mockCounter, mockCounter, mockCounter,
mockCounter, mockCounter, mockCounter,
mockTaskStatus, mockProgress, mockProgress,
mockTask, mockMapOutputFile, null);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job,
status, null, null, progress, context.getShuffledMapsCounter(),
context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
MapHost host1 = new MapHost("host1", null);
TaskAttemptID failedAttemptID = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 0), 0);
TaskAttemptID succeedAttemptID = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 1), 1);
// handle output fetch failure for failedAttemptID, part I
scheduler.hostFailed(host1.getHostName());
// handle output fetch succeed for succeedAttemptID
long bytes = (long)500 * 1024 * 1024;
scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
// handle output fetch failure for failedAttemptID, part II
// for MAPREDUCE-6361: verify no NPE exception get thrown out
scheduler.copyFailed(failedAttemptID, host1, true, false);
}
示例11: createDataJoinJob
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir), true);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
示例12: setNumMapTasks
import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
* Sets the number of map tasks for the given job configuration to the
* number of regions the given table has.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
public static void setNumMapTasks(String table, JobConf job)
throws IOException {
job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
TableName.valueOf(table)));
}