本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.FileOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java FileOutputFormat类的具体用法?Java FileOutputFormat怎么用?Java FileOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FileOutputFormat类属于org.apache.hadoop.mapreduce.lib.output包,在下文中一共展示了FileOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s [generic options] <input> <output>\n",
getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Job job = new Job(getConf(), "Text to Parquet");
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(TextToParquetMapper.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
AvroParquetOutputFormat.setSchema(job, SCHEMA);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(Group.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例2: run
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "calculating parameter";
conf.set("params",String.valueOf(params));
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(indexToCountWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CalParamsMapper.class);
job.setReducerClass(CalParamsReducer.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job,output);
job.setJarByClass(LDADriver.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("calculating parameter failed");
}
}
示例3: main
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxtemp");
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
示例4: runRandomInputGenerator
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
Integer width, Integer wrapMuplitplier) throws Exception {
LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
+ ", numNodes=" + numNodes);
Job job = Job.getInstance(getConf());
job.setJobName("Random Input Generator");
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.setInputFormatClass(GeneratorInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
job.setMapperClass(Mapper.class); //identity mapper
FileOutputFormat.setOutputPath(job, tmpOutput);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
boolean success = jobCompletion(job);
return success ? 0 : 1;
}
示例5: doVerify
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
LOG.info("Verify output dir: " + outputDir);
Job job = Job.getInstance(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例6: main
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
BasicConfigurator.configure();
Configuration conf = new Configuration();
conf.setQuietMode(true);
Job job = Job.getInstance(conf, "WordCount");
job.setJarByClass(HadoopWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));
long t = System.currentTimeMillis();
job.waitForCompletion(true);
System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
示例7: main
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperatureWithCombiner.class);
job.setJobName("Max Temperature With Combiner");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setCombinerClass(MaxTemperatureReducer.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例8: createCopyJob
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir,
Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClass(DataCopyMapper.class);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
theJob.setReducerClass(DataCopyReducer.class);
theJob.setNumReduceTasks(1);
return theJob;
}
示例9: createFailJob
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple fail job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createFailJob(Configuration conf, Path outdir,
Path... indirs) throws Exception {
FileSystem fs = outdir.getFileSystem(conf);
if (fs.exists(outdir)) {
fs.delete(outdir, true);
}
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job theJob = Job.getInstance(conf);
theJob.setJobName("Fail-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClass(FailMapper.class);
theJob.setReducerClass(Reducer.class);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
return theJob;
}
示例10: doLoad
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "load-output");
LOG.info("Load output dir: " + outputDir);
NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
Job job = Job.getInstance(conf);
job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
job.setJarByClass(this.getClass());
setMapperClass(job);
job.setInputFormatClass(NMapInputFormat.class);
job.setNumReduceTasks(0);
setJobScannerConf(job);
FileOutputFormat.setOutputPath(job, outputDir);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
TableMapReduceUtil.initCredentials(job);
assertTrue(job.waitForCompletion(true));
return job;
}
示例11: createJob
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static Job createJob(Configuration conf, Path inDir, Path outDir,
int numInputFiles, int numReds, String input) throws IOException {
Job job = Job.getInstance(conf);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (fs.exists(inDir)) {
fs.delete(inDir, true);
}
fs.mkdirs(inDir);
for (int i = 0; i < numInputFiles; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.setNumReduceTasks(numReds);
return job;
}
示例12: main
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
示例13: doVerify
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths)
throws IOException, InterruptedException, ClassNotFoundException {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
Job job = new Job(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
scan.setAuthorizations(new Authorizations(auths));
TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan,
VerifyMapper.class, NullWritable.class, NullWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
return job;
}
示例14: testEmptyJoin
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
public void testEmptyJoin() throws Exception {
Configuration conf = new Configuration();
Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer",
MapReduceTestUtil.Fake_IF.class, src));
MapReduceTestUtil.Fake_IF.setKeyClass(conf,
MapReduceTestUtil.IncomparableKey.class);
Job job = Job.getInstance(conf);
job.setInputFormatClass(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setOutputKeyClass(MapReduceTestUtil.IncomparableKey.class);
job.setOutputValueClass(NullWritable.class);
job.waitForCompletion(true);
assertTrue(job.isSuccessful());
base.getFileSystem(conf).delete(base, true);
}
示例15: testInvalidMultiMapParallelism
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //导入依赖的package包/类
/**
* Run a test with a misconfigured number of mappers.
* Expect failure.
*/
@Test
public void testInvalidMultiMapParallelism() throws Exception {
Job job = Job.getInstance();
Path inputPath = createMultiMapsInput();
Path outputPath = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
job.setMapperClass(StressMapper.class);
job.setReducerClass(CountingReducer.class);
job.setNumReduceTasks(1);
LocalJobRunner.setLocalMaxRunningMaps(job, -6);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
boolean success = job.waitForCompletion(true);
assertFalse("Job succeeded somehow", success);
}