本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMaxInputSplitSize方法的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat.setMaxInputSplitSize方法的具体用法?Java FileInputFormat.setMaxInputSplitSize怎么用?Java FileInputFormat.setMaxInputSplitSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileInputFormat
的用法示例。
在下文中一共展示了FileInputFormat.setMaxInputSplitSize方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runMetastoreCompareJobWithTextInput
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runMetastoreCompareJobWithTextInput(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException {
Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class);
job.setReducerClass(Stage1PartitionCompareReducer.class);
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.setNumReduceTasks(getConf().getInt(
ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
150));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例2: runHdfsCopyJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runHdfsCopyJob(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
LOG.info("Starting job for step 2...");
Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Stage2DirectoryCopyMapper.class);
job.setReducerClass(Stage2DirectoryCopyReducer.class);
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.setNumReduceTasks(getConf().getInt(
ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
150));
boolean success = job.waitForCompletion(true);
if (success) {
LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
+ "commands in Hive: \n\n"
+ VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
+ "\n");
}
return success ? 0 : 1;
}
示例3: runCommitChangeJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runCommitChangeJob(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
LOG.info("Starting job for step 3...");
Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Stage3CommitChangeMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.setNumReduceTasks(getConf().getInt(
ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
150));
boolean success = job.waitForCompletion(true);
if (success) {
LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
+ "commands in Hive: \n\n"
+ VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
}
return success ? 0 : 1;
}
示例4: runSyncJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
Path output)
throws IOException, InterruptedException, ClassNotFoundException {
Job job = new Job(getConf(), "HDFS Sync job");
job.setJarByClass(getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(HdfsSyncMapper.class);
job.setReducerClass(HdfsSyncReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.getConfiguration().set(SRC_PATH_CONF, source.toString());
job.getConfiguration().set(DST_PATH_CONF, destination.toString());
job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
FileOutputFormat.setOutputPath(job, new Path(output.toString()));
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例5: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
Configuration conf = getConf();
if (args.length == 0) {
System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
// check for passed parameters, otherwise use defaults
int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
for (int i = 0; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-depth")) {
depth = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-height")) {
height = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-width") ) {
width = Integer.parseInt(args[++i].trim());
}
}
// now set the values within conf for M/R tasks to read, this
// will ensure values are set preventing MAPREDUCE-4678
conf.setInt(Pentomino.WIDTH, width);
conf.setInt(Pentomino.HEIGHT, height);
conf.setInt(Pentomino.DEPTH, depth);
Class<? extends Pentomino> pentClass = conf.getClass(Pentomino.CLASS,
OneSidedPentomino.class, Pentomino.class);
int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
Path output = new Path(args[0]);
Path input = new Path(output + "_input");
FileSystem fileSys = FileSystem.get(conf);
try {
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(PentMap.class);
job.setJobName("dancingElephant");
Pentomino pent = ReflectionUtils.newInstance(pentClass, conf);
pent.initialize(width, height);
long inputSize = createInputDirectory(fileSys, input, pent, depth);
// for forcing the number of maps
FileInputFormat.setMaxInputSplitSize(job, (inputSize/numMaps));
// the keys are the prefix strings
job.setOutputKeyClass(Text.class);
// the values are puzzle solutions
job.setOutputValueClass(Text.class);
job.setMapperClass(PentMap.class);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
} finally {
fileSys.delete(input, true);
}
}