当前位置: 首页>>代码示例>>Java>>正文


Java JobConf.setNumMapTasks方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobConf.setNumMapTasks方法的典型用法代码示例。如果您正苦于以下问题:Java JobConf.setNumMapTasks方法的具体用法?Java JobConf.setNumMapTasks怎么用?Java JobConf.setNumMapTasks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.JobConf的用法示例。


在下文中一共展示了JobConf.setNumMapTasks方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Sets up a job conf for the given job using the given config object. Ensures
 * that the correct input format is set, the mapper and and reducer class and
 * the input and output keys and value classes along with any other job
 * configuration.
 * 
 * @param config
 * @return JobConf representing the job to be ran
 * @throws IOException
 */
private JobConf getJob(ConfigExtractor config) throws IOException {
  JobConf job = new JobConf(config.getConfig(), SliveTest.class);
  job.setInputFormat(DummyInputFormat.class);
  FileOutputFormat.setOutputPath(job, config.getOutputPath());
  job.setMapperClass(SliveMapper.class);
  job.setPartitionerClass(SlivePartitioner.class);
  job.setReducerClass(SliveReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormat(TextOutputFormat.class);
  TextOutputFormat.setCompressOutput(job, false);
  job.setNumReduceTasks(config.getReducerAmount());
  job.setNumMapTasks(config.getMapAmount());
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:SliveTest.java

示例2: createCopyJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Creates a simple copy job.
 * 
 * @param indirs List of input directories.
 * @param outdir Output directory.
 * @return JobConf initialised for a simple copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception {

  Configuration defaults = new Configuration();
  JobConf theJob = new JobConf(defaults, TestJobControl.class);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0]));
  theJob.setMapperClass(DataCopy.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopy.class);
  theJob.setNumMapTasks(12);
  theJob.setNumReduceTasks(4);
  return theJob;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:25,代码来源:JobControlTestUtils.java

示例3: runJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);
  
  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:TestMROldApiJobs.java

示例4: testCombinerShouldUpdateTheReporter

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
  JobConf conf = new JobConf(mrCluster.getConfig());
  int numMaps = 5;
  int numReds = 2;
  Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-in");
  Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-out");
  createInputOutPutFolder(in, out, numMaps);
  conf.setJobName("test-job-with-combiner");
  conf.setMapperClass(IdentityMapper.class);
  conf.setCombinerClass(MyCombinerToCheckReporter.class);
  //conf.setJarByClass(MyCombinerToCheckReporter.class);
  conf.setReducerClass(IdentityReducer.class);
  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, in);
  FileOutputFormat.setOutputPath(conf, out);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);
  
  runJob(conf);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:TestMRAppWithCombiner.java

示例5: testTipFailed

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Test
public void testTipFailed() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);

  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }

    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();

  TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE,
      0, 0);
  ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status,
      reduceId, null, progress, null, null, null);

  JobID jobId = new JobID();
  TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1);
  scheduler.tipFailed(taskId1);

  Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(),
      0.0f);
  Assert.assertFalse(scheduler.waitUntilDone(1));

  TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0);
  scheduler.tipFailed(taskId0);
  Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(),
      0.0f);
  Assert.assertTrue(scheduler.waitUntilDone(1));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:38,代码来源:TestShuffleScheduler.java

示例6: setMapCount

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Calculate how many maps to run.
 * Number of maps is bounded by a minimum of the cumulative size of the
 * copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
 * command line) and at most (distcp.max.map.tasks, default
 * MAX_MAPS_PER_NODE * nodes in the cluster).
 * @param totalBytes Count of total bytes for job
 * @param job The job to configure
 * @return Count of maps to run.
 */
private static int setMapCount(long totalBytes, JobConf job) 
    throws IOException {
  int numMaps =
    (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
  numMaps = Math.min(numMaps, 
      job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
        new JobClient(job).getClusterStatus().getTaskTrackers()));
  numMaps = Math.max(numMaps, 1);
  job.setNumMapTasks(numMaps);
  return numMaps;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:DistCpV1.java

示例7: limitNumMapTasks

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Ensures that the given number of map tasks for the given job
 * configuration does not exceed the number of regions for the given table.
 *
 * @param table  The table to get the region count for.
 * @param job  The current job configuration to adjust.
 * @throws IOException When retrieving the table details fails.
 */
// Used by tests.
public static void limitNumMapTasks(String table, JobConf job)
throws IOException {
  int regions =
    MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
  if (job.getNumMapTasks() > regions)
    job.setNumMapTasks(regions);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:17,代码来源:TableMapReduceUtil.java

示例8: shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Test
public void shouldNumberOfMapTaskNotExceedNumberOfRegionsForGivenTable()
    throws IOException {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
  assertEquals(1, jobConf.getNumMapTasks());

  jobConf.setNumMapTasks(10);
  TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumMapTasks(TABLE_NAME, jobConf);
  assertEquals(1, jobConf.getNumMapTasks());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:15,代码来源:TestTableMapReduceUtil.java

示例9: submitAsMapReduce

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Based on args we submit the LoadGenerator as MR job.
 * Number of MapTasks is numMapTasks
 * @return exitCode for job submission
 */
private int submitAsMapReduce() {
  
  System.out.println("Running as a MapReduce job with " + 
      numMapTasks + " mapTasks;  Output to file " + mrOutDir);


  Configuration conf = new Configuration(getConf());
  
  // First set all the args of LoadGenerator as Conf vars to pass to MR tasks

  conf.set(LG_ROOT , root.toString());
  conf.setInt(LG_MAXDELAYBETWEENOPS, maxDelayBetweenOps);
  conf.setInt(LG_NUMOFTHREADS, numOfThreads);
  conf.set(LG_READPR, readProbs[0]+""); //Pass Double as string
  conf.set(LG_WRITEPR, writeProbs[0]+""); //Pass Double as string
  conf.setLong(LG_SEED, seed); //No idea what this is
  conf.setInt(LG_NUMMAPTASKS, numMapTasks);
  if (scriptFile == null && durations[0] <=0) {
    System.err.println("When run as a MapReduce job, elapsed Time or ScriptFile must be specified");
    System.exit(-1);
  }
  conf.setLong(LG_ELAPSEDTIME, durations[0]);
  conf.setLong(LG_STARTTIME, startTime); 
  if (scriptFile != null) {
    conf.set(LG_SCRIPTFILE , scriptFile);
  }
  conf.set(LG_FLAGFILE, flagFile.toString());
  
  // Now set the necessary conf variables that apply to run MR itself.
  JobConf jobConf = new JobConf(conf, LoadGenerator.class);
  jobConf.setJobName("NNLoadGeneratorViaMR");
  jobConf.setNumMapTasks(numMapTasks);
  jobConf.setNumReduceTasks(1); // 1 reducer to collect the results

  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(IntWritable.class);

  jobConf.setMapperClass(MapperThatRunsNNLoadGenerator.class);
  jobConf.setReducerClass(ReducerThatCollectsLGdata.class);

  jobConf.setInputFormat(DummyInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  jobConf.setMaxMapAttempts(1);
  // Explicitly turn off speculative execution
  jobConf.setSpeculativeExecution(false);

  // This mapReduce job has no input but has output
  FileOutputFormat.setOutputPath(jobConf, new Path(mrOutDir));

  try {
    JobClient.runJob(jobConf);
  } catch (IOException e) {
    System.err.println("Failed to run job: " + e.getMessage());
    return -1;
  }
  return 0;
  
}
 
开发者ID:naver,项目名称:hadoop,代码行数:66,代码来源:LoadGeneratorMR.java

示例10: TestSucceedAndFailedCopyMap

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
  JobConf job = new JobConf();
  job.setNumMapTasks(2);
  //mock creation
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = job.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
      (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);
  @SuppressWarnings("unchecked")
  MapOutput<K, V> output = mock(MapOutput.class);

  ShuffleConsumerPlugin.Context<K, V> context =
      new ShuffleConsumerPlugin.Context<K, V>(
          mockTaskAttemptID, job, mockFileSystem,
          mockUmbilical, mockLocalDirAllocator,
          mockReporter, mockCompressionCodec,
          combinerClass, mockCombineOutputCollector,
          mockCounter, mockCounter, mockCounter,
          mockCounter, mockCounter, mockCounter,
          mockTaskStatus, mockProgress, mockProgress,
          mockTask, mockMapOutputFile, null);
  TaskStatus status = new TaskStatus() {
    @Override
    public boolean getIsMap() {
      return false;
    }
    @Override
    public void addFetchFailedMap(TaskAttemptID mapTaskId) {
    }
  };
  Progress progress = new Progress();
  ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job,
      status, null, null, progress, context.getShuffledMapsCounter(),
      context.getReduceShuffleBytes(), context.getFailedShuffleCounter());

  MapHost host1 = new MapHost("host1", null);
  TaskAttemptID failedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 0), 0);

  TaskAttemptID succeedAttemptID = new TaskAttemptID(
      new org.apache.hadoop.mapred.TaskID(
      new JobID("test",0), TaskType.MAP, 1), 1);

  // handle output fetch failure for failedAttemptID, part I
  scheduler.hostFailed(host1.getHostName());

  // handle output fetch succeed for succeedAttemptID
  long bytes = (long)500 * 1024 * 1024;
  scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);

  // handle output fetch failure for failedAttemptID, part II
  // for MAPREDUCE-6361: verify no NPE exception get thrown out
  scheduler.copyFailed(failedAttemptID, host1, true, false);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:70,代码来源:TestShuffleScheduler.java

示例11: createDataJoinJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
      System.out.println("Using TextInputFormat: " + args[2]);
      inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileOutputFormat: " + args[7]);
      outputFormat = SequenceFileOutputFormat.class;
      outputValueClass = getClassByName(args[7]);
    } else {
      System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
      maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
      jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job,
            SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
  }
 
开发者ID:naver,项目名称:hadoop,代码行数:59,代码来源:DataJoinJob.java

示例12: setNumMapTasks

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Sets the number of map tasks for the given job configuration to the
 * number of regions the given table has.
 *
 * @param table  The table to get the region count for.
 * @param job  The current job configuration to adjust.
 * @throws IOException When retrieving the table details fails.
 */
public static void setNumMapTasks(String table, JobConf job)
throws IOException {
  job.setNumMapTasks(MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job),
    TableName.valueOf(table)));
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:14,代码来源:TableMapReduceUtil.java


注:本文中的org.apache.hadoop.mapred.JobConf.setNumMapTasks方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。