本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.setOutputFormatClass方法的典型用法代码示例。如果您正苦于以下问题:Java LazyOutputFormat.setOutputFormatClass方法的具体用法?Java LazyOutputFormat.setOutputFormatClass怎么用?Java LazyOutputFormat.setOutputFormatClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat
的用法示例。
在下文中一共展示了LazyOutputFormat.setOutputFormatClass方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runTestLazyOutput
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
示例2: runTestLazyOutput
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = new Job(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
示例3: configureTextOutput
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
* Set up a MapReduce job to output human-readable text.
*/
protected void configureTextOutput(String destination) {
Path outPath;
outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
TextOutputFormat.setOutputPath(job, outPath);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(job, true);
}
示例4: ensureJobSet
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void ensureJobSet(Job job) {
if (job.getConfiguration().getBoolean(MULTIREDUCERS_HAVE_OUTPUT_FORMAT, false)) {
// we need to use the TextOutputFormat, since otherwise the FileOutputCommitter won't run
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(NullOutputFormat.class);
}
job.setOutputFormatClass(MultiOutputFormat.class);
job.setReducerClass(MultiReducer.class);
job.setMapperClass(MultiMapper.class);
job.setMapOutputKeyClass(PerMapperOutputKey.class);
job.setMapOutputValueClass(PerMapperOutputValue.class);
job.setSortComparatorClass(MultiComparator.class);
job.setPartitionerClass(MultiPartitioner.class);
List<Class<?>> serializations = Arrays.asList(
job.getConfiguration().getClasses(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
if (serializations.indexOf(MultiSerializer.class) == -1) {
appendTo(job, CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, MultiSerializer.class);
}
for (Class<?> aClass : job.getConfiguration().getClasses(MultiCombiner.CONF_KEY)) {
if (!aClass.equals(Reducer.class)) {
job.setCombinerClass(MultiCombiner.class);
}
}
}
示例5: runAnnotatorSort
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private int runAnnotatorSort() throws Exception {
BioJob job = BioJob.getInstance(conf);
job.setJobName("GaeaAnnotatorSortResult");
job.setJarByClass(this.getClass());
job.setMapperClass(AnnotationSortMapper.class);
job.setReducerClass(AnnotationSortReducer.class);
job.setNumReduceTasks(sampleNames.size());
job.setMapOutputKeyClass(PairWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
Path inputPath = new Path(options.getTmpPath());
Path outputPath = new Path(options.getOutputPath());
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
FileSystem fs = outputPath.getFileSystem(conf);
if(job.waitForCompletion(true)){
int loop = 0;
for (String sampleName : sampleNames){
Path outputPart = getSampleOutputPath(sampleName);
while (outputPart == null && loop < 10){
TimeUnit.MILLISECONDS.sleep(6000);
outputPart = getSampleOutputPath(sampleName);
loop ++;
}
Path outputName = new Path(options.getOutputPath() + "/" + sampleName + ".tsv");
fs.rename(outputPart, outputName);
}
return 0;
}
return 1;
}
示例6: makeJob
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince,
String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
conf.setBoolean("mapred.output.compress", false);
conf.setLong("scanSince", scanSince);
conf.set("matchPath", matchPath);
conf.set("maskPath", maskPath);
conf.set("inputPath", in.toString());
if (queue != null) {
conf.set("mapred.job.queue.name", queue);
}
conf.set("fs.permissions.umask-mode",
"007");
conf.setInt("input_path_depth", in.depth());
Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
job.setJarByClass(HDFSScanMR.class);
if (chlorineConfigFilePath != null) {
try {
job.addCacheFile(new URI(chlorineConfigFilePath));
conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
} catch (URISyntaxException e) {
LOG.error(e);
}
}
job.setMapperClass(DeepScanMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, in);
TextInputFormat.setInputDirRecursive(job, true);
TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
FileOutputFormat.setOutputPath(job, out);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
return job;
}
示例7: configureJob
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
* Job configurator
*
* @param job job instance
* @param jarByClass class of the jar
* @param mapperClass mapper
* @param reducerClass reducer
* @param commaSeparatedInputFiles input paths
* @param outputPath output
* @throws IOException I/O exception
*/
public static void configureJob(Job job, Class<?> jarByClass,
Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
String commaSeparatedInputFiles, String outputPath)
throws IOException
{
job.setJarByClass(jarByClass);
job.setJobName(jarByClass.getName());
// mapper
job.setMapperClass(mapperClass);
// reducer
job.setReducerClass(reducerClass);
// input-output is warc
job.setInputFormatClass(WARCInputFormat.class);
// prevent producing empty files
LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);
// intermediate data
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WARCWritable.class);
// output data
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(WARCWritable.class);
// set output compression to GZip
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
}
示例8: run
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step1ExtractNearDupInfo.class);
job.setJobName(Phase3Step1ExtractNearDupInfo.class.getName());
// mapper
job.setMapperClass(MapperClass.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DocumentInfo.class);
// reducer
job.setReducerClass(DeDuplicationTextOutputReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(List.class);
job.setInputFormatClass(WARCInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, DocumentInfoOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
示例9: run
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step3NearDupTuplesCreation.class);
job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName());
// mapper
job.setMapperClass(CreateTuplesMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(TreeSet.class);
job.setInputFormatClass(TextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setNumReduceTasks(0); //must be added or the mapper wont be called
return job.waitForCompletion(true) ? 0 : 1;
}
示例10: run
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step4LocalDeDuplication.class);
job.setJobName(Phase3Step4LocalDeDuplication.class.getName());
// paths
String inputPath = args[0];
// text files of ids to be deleted
String outputPath = args[1];
// input: reading max N lines for each mapper
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job, new Path(inputPath));
job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);
// mapper
job.setMapperClass(LocalGreedyDeDuplicationMapper.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
// reducer
job.setReducerClass(IDCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
示例11: run
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step2DistinctDataJob.class);
job.setJobName(Phase3Step2DistinctDataJob.class.getName());
//mapper
job.setMapperClass(RemoveRedundantDataMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//reducer
job.setReducerClass(RemoveRedundantDataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
job.setInputFormatClass(TextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
//i/o paths
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
示例12: configureSchemaOutput
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
* Set up the MapReduce job to output a schema (TBox).
*/
protected void configureSchemaOutput() {
Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
SequenceFileOutputFormat.setOutputPath(job, outPath);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(SchemaWritable.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, "schemaobj",
SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(job, true);
}
示例13: configureDerivationOutput
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
* Set up a MapReduce job to output newly derived triples.
* @param intermediate True if this is intermediate data. Outputs
* to [base]-[iteration]-[temp].
*/
protected void configureDerivationOutput(boolean intermediate) {
Path outPath;
Configuration conf = job.getConfiguration();
int iteration = MRReasoningUtils.getCurrentIteration(conf);
if (intermediate) {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration
+ MRReasoningUtils.TEMP_SUFFIX);
}
else {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration);
}
SequenceFileOutputFormat.setOutputPath(job, outPath);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
MultipleOutputs.setCountersEnabled(job, true);
// Set up an output for diagnostic info, if needed
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
}
示例14: setupReducer
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private void setupReducer(Path output, CubeSegment cubeSeg)
throws IOException {
FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
int numberOfReducers = reducerMapping.getTotalReducerNum();
if (numberOfReducers > 250) {
throw new IllegalArgumentException(
"The max reducer number for FactDistinctColumnsJob is 250, but now it is "
+ numberOfReducers
+ ", decrease 'kylin.engine.mr.uhc-reducer-count'");
}
job.setReducerClass(FactDistinctColumnsReducer.class);
job.setPartitionerClass(FactDistinctColumnPartitioner.class);
job.setNumReduceTasks(numberOfReducers);
job.getConfiguration().setInt(BatchConstants.CFG_HLL_REDUCER_NUM, reducerMapping.getCuboidRowCounterReducerNum());
// make each reducer output to respective dir
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
// prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
deletePath(job.getConfiguration(), output);
}
示例15: setupReducer
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private void setupReducer(Path output, int numberOfReducers) throws IOException {
job.setReducerClass(UHCDictionaryReducer.class);
job.setPartitionerClass(UHCDictionaryPartitioner.class);
job.setNumReduceTasks(numberOfReducers);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
//prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
deletePath(job.getConfiguration(), output);
}