Java FileInputFormat.setMaxInputSplitSize方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMaxInputSplitSize方法的典型用法代碼示例。如果您正苦於以下問題：Java FileInputFormat.setMaxInputSplitSize方法的具體用法？Java FileInputFormat.setMaxInputSplitSize怎麽用？Java FileInputFormat.setMaxInputSplitSize使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapreduce.lib.input.FileInputFormat的用法示例。

在下文中一共展示了FileInputFormat.setMaxInputSplitSize方法的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: runMetastoreCompareJobWithTextInput

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runMetastoreCompareJobWithTextInput(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List");

  job.setJarByClass(this.getClass());
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class);
  job.setReducerClass(Stage1PartitionCompareReducer.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
      150));


  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

開發者ID:airbnb，項目名稱:reair，代碼行數:29，代碼來源:MetastoreReplicationJob.java

示例2: runHdfsCopyJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runHdfsCopyJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 2...");

  Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");

  job.setJarByClass(this.getClass());
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage2DirectoryCopyMapper.class);
  job.setReducerClass(Stage2DirectoryCopyReducer.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
        + "\n");
  }

  return success ? 0 : 1;
}

開發者ID:airbnb，項目名稱:reair，代碼行數:39，代碼來源:MetastoreReplicationJob.java

示例3: runCommitChangeJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runCommitChangeJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 3...");

  Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");

  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage3CommitChangeMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
  }
  return success ? 0 : 1;
}

開發者ID:airbnb，項目名稱:reair，代碼行數:37，代碼來源:MetastoreReplicationJob.java

示例4: runSyncJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
                       Path output)
    throws IOException, InterruptedException, ClassNotFoundException {
  Job job = new Job(getConf(), "HDFS Sync job");
  job.setJarByClass(getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(HdfsSyncMapper.class);
  job.setReducerClass(HdfsSyncReducer.class);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  job.getConfiguration().set(SRC_PATH_CONF, source.toString());
  job.getConfiguration().set(DST_PATH_CONF, destination.toString());
  job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
          this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
  FileOutputFormat.setOutputPath(job, new Path(output.toString()));
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

開發者ID:airbnb，項目名稱:reair，代碼行數:29，代碼來源:ReplicationJob.java

示例5: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
public int run(String[] args) throws Exception {
  Configuration conf = getConf();
  if (args.length == 0) {
    System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  // check for passed parameters, otherwise use defaults
  int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
  int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
  int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
  for (int i = 0; i < args.length; i++) {
    if (args[i].equalsIgnoreCase("-depth")) {
      depth = Integer.parseInt(args[++i].trim());
    } else if (args[i].equalsIgnoreCase("-height")) {
      height = Integer.parseInt(args[++i].trim());
    } else if (args[i].equalsIgnoreCase("-width") ) {
      width = Integer.parseInt(args[++i].trim());
    }
  }
  // now set the values within conf for M/R tasks to read, this
  // will ensure values are set preventing MAPREDUCE-4678
  conf.setInt(Pentomino.WIDTH, width);
  conf.setInt(Pentomino.HEIGHT, height);
  conf.setInt(Pentomino.DEPTH, depth);
  Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, 
    OneSidedPentomino.class, Pentomino.class);
  int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
  Path output = new Path(args[0]);
  Path input = new Path(output + "_input");
  FileSystem fileSys = FileSystem.get(conf);
  try {
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    job.setJarByClass(PentMap.class);
    
    job.setJobName("dancingElephant");
    Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
    pent.initialize(width, height);
    long inputSize = createInputDirectory(fileSys, input, pent, depth);
    // for forcing the number of maps
    FileInputFormat.setMaxInputSplitSize(job, (inputSize/numMaps));
 
    // the keys are the prefix strings
    job.setOutputKeyClass(Text.class);
    // the values are puzzle solutions
    job.setOutputValueClass(Text.class);
    
    job.setMapperClass(PentMap.class);        
    job.setReducerClass(Reducer.class);
    
    job.setNumReduceTasks(1);
    
    return (job.waitForCompletion(true) ? 0 : 1);
    } finally {
    fileSys.delete(input, true);
  }
}

開發者ID:naver，項目名稱:hadoop，代碼行數:60，代碼來源:DistributedPentomino.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMaxInputSplitSize方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。