Java FileInputFormat.setMaxInputSplitSize方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMaxInputSplitSize方法的典型用法代码示例。如果您正苦于以下问题：Java FileInputFormat.setMaxInputSplitSize方法的具体用法？Java FileInputFormat.setMaxInputSplitSize怎么用？Java FileInputFormat.setMaxInputSplitSize使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileInputFormat的用法示例。

在下文中一共展示了FileInputFormat.setMaxInputSplitSize方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runMetastoreCompareJobWithTextInput

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runMetastoreCompareJobWithTextInput(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List");

  job.setJarByClass(this.getClass());
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class);
  job.setReducerClass(Stage1PartitionCompareReducer.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
      150));


  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

开发者ID:airbnb，项目名称:reair，代码行数:29，代码来源:MetastoreReplicationJob.java

示例2: runHdfsCopyJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runHdfsCopyJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 2...");

  Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");

  job.setJarByClass(this.getClass());
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage2DirectoryCopyMapper.class);
  job.setReducerClass(Stage2DirectoryCopyReducer.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
        + "\n");
  }

  return success ? 0 : 1;
}

开发者ID:airbnb，项目名称:reair，代码行数:39，代码来源:MetastoreReplicationJob.java

示例3: runCommitChangeJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runCommitChangeJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 3...");

  Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");

  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage3CommitChangeMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
  }
  return success ? 0 : 1;
}

开发者ID:airbnb，项目名称:reair，代码行数:37，代码来源:MetastoreReplicationJob.java

示例4: runSyncJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
                       Path output)
    throws IOException, InterruptedException, ClassNotFoundException {
  Job job = new Job(getConf(), "HDFS Sync job");
  job.setJarByClass(getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(HdfsSyncMapper.class);
  job.setReducerClass(HdfsSyncReducer.class);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  job.getConfiguration().set(SRC_PATH_CONF, source.toString());
  job.getConfiguration().set(DST_PATH_CONF, destination.toString());
  job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
          this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
  FileOutputFormat.setOutputPath(job, new Path(output.toString()));
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}

开发者ID:airbnb，项目名称:reair，代码行数:29，代码来源:ReplicationJob.java

示例5: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  Configuration conf = getConf();
  if (args.length == 0) {
    System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  // check for passed parameters, otherwise use defaults
  int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
  int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
  int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
  for (int i = 0; i < args.length; i++) {
    if (args[i].equalsIgnoreCase("-depth")) {
      depth = Integer.parseInt(args[++i].trim());
    } else if (args[i].equalsIgnoreCase("-height")) {
      height = Integer.parseInt(args[++i].trim());
    } else if (args[i].equalsIgnoreCase("-width") ) {
      width = Integer.parseInt(args[++i].trim());
    }
  }
  // now set the values within conf for M/R tasks to read, this
  // will ensure values are set preventing MAPREDUCE-4678
  conf.setInt(Pentomino.WIDTH, width);
  conf.setInt(Pentomino.HEIGHT, height);
  conf.setInt(Pentomino.DEPTH, depth);
  Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS, 
    OneSidedPentomino.class, Pentomino.class);
  int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
  Path output = new Path(args[0]);
  Path input = new Path(output + "_input");
  FileSystem fileSys = FileSystem.get(conf);
  try {
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    job.setJarByClass(PentMap.class);
    
    job.setJobName("dancingElephant");
    Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
    pent.initialize(width, height);
    long inputSize = createInputDirectory(fileSys, input, pent, depth);
    // for forcing the number of maps
    FileInputFormat.setMaxInputSplitSize(job, (inputSize/numMaps));
 
    // the keys are the prefix strings
    job.setOutputKeyClass(Text.class);
    // the values are puzzle solutions
    job.setOutputValueClass(Text.class);
    
    job.setMapperClass(PentMap.class);        
    job.setReducerClass(Reducer.class);
    
    job.setNumReduceTasks(1);
    
    return (job.waitForCompletion(true) ? 0 : 1);
    } finally {
    fileSys.delete(input, true);
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:60，代码来源:DistributedPentomino.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMaxInputSplitSize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。