本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.addNamedOutput方法的典型用法代碼示例。如果您正苦於以下問題:Java MultipleOutputs.addNamedOutput方法的具體用法?Java MultipleOutputs.addNamedOutput怎麽用?Java MultipleOutputs.addNamedOutput使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapreduce.lib.output.MultipleOutputs
的用法示例。
在下文中一共展示了MultipleOutputs.addNamedOutput方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: configureTextOutput
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
* Set up a MapReduce job to output human-readable text.
*/
protected void configureTextOutput(String destination) {
Path outPath;
outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
TextOutputFormat.setOutputPath(job, outPath);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
TextOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(job, true);
}
示例2: analyze
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
private boolean analyze(final String inputFilePath,
final String outputFilePath,
final Long startTime) throws Exception {
Configuration conf = new Configuration();
conf.setLong(Holistic.START_TIME, startTime);
conf.setLong(Holistic.EXECUTE_TIME, executeHourTime);
Job jobAnalyze = Job.getInstance(conf, "analyze");
jobAnalyze.setJarByClass(Holistic.class);
MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.NEW_OLD_CUSTOMER,
TextOutputFormat.class, KeyWrapper.class, Text.class);
MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.CUSTOMER_FLOW_KEY,
TextOutputFormat.class, KeyWrapper.class, Text.class);
MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.CYCLE,
TextOutputFormat.class, KeyWrapper.class, Text.class);
MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.IN_STORE_HOUR,
TextOutputFormat.class, KeyWrapper.class, Text.class);
jobAnalyze.setMapperClass(AnalysisMapper.class);
jobAnalyze.setReducerClass(AnalysisReducer.class);
jobAnalyze.setCombinerClass(AnalysisCombiner.class);
jobAnalyze.setOutputKeyClass(LongWritable.class);
jobAnalyze.setOutputValueClass(Text.class);
jobAnalyze.setMapOutputKeyClass(KeyWrapper.class);
jobAnalyze.setMapOutputValueClass(ValueWrapper.class);
FileInputFormat.addInputPath(jobAnalyze, new Path(inputFilePath));
FileOutputFormat.setOutputPath(jobAnalyze, new Path(outputFilePath));
return jobAnalyze.waitForCompletion(true) ;
}
示例3: setMultiOutputs
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
private void setMultiOutputs(MultipleVCFHeader mVcfHeader, BioJob job) {
// TODO Auto-generated method stub
int i = 0;
Map<Integer, String> multiOutputs = new HashMap<>();
for(int id : mVcfHeader.getFileName2ID().values()) {
multiOutputs.put(id, "SortResult" + ++i);
MultipleOutputs.addNamedOutput(job, multiOutputs.get(id), SortOutputFormat.class, NullWritable.class, VariantContextWritable.class);
}
options.setMultiOutputs(multiOutputs);
}
示例4: configureSchemaOutput
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
* Set up the MapReduce job to output a schema (TBox).
*/
protected void configureSchemaOutput() {
Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
SequenceFileOutputFormat.setOutputPath(job, outPath);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(SchemaWritable.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, "schemaobj",
SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(job, true);
}
示例5: configureDerivationOutput
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
* Set up a MapReduce job to output newly derived triples.
* @param intermediate True if this is intermediate data. Outputs
* to [base]-[iteration]-[temp].
*/
protected void configureDerivationOutput(boolean intermediate) {
Path outPath;
Configuration conf = job.getConfiguration();
int iteration = MRReasoningUtils.getCurrentIteration(conf);
if (intermediate) {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration
+ MRReasoningUtils.TEMP_SUFFIX);
}
else {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration);
}
SequenceFileOutputFormat.setOutputPath(job, outPath);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
MultipleOutputs.setCountersEnabled(job, true);
// Set up an output for diagnostic info, if needed
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
}
示例6: run
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(IOOptions.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path input = new Path(cli.getArgValueAsString(IOOptions.INPUT));
Path output = new Path(cli.getArgValueAsString(IOOptions.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(MultipleOutputsJob.class);
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setNumReduceTasks(0);
MultipleOutputs.addNamedOutput(job, "partition",
TextOutputFormat.class, Text.class, Text.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例7: setupReducer
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
private void setupReducer(Path output, CubeSegment cubeSeg)
throws IOException {
FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
int numberOfReducers = reducerMapping.getTotalReducerNum();
if (numberOfReducers > 250) {
throw new IllegalArgumentException(
"The max reducer number for FactDistinctColumnsJob is 250, but now it is "
+ numberOfReducers
+ ", decrease 'kylin.engine.mr.uhc-reducer-count'");
}
job.setReducerClass(FactDistinctColumnsReducer.class);
job.setPartitionerClass(FactDistinctColumnPartitioner.class);
job.setNumReduceTasks(numberOfReducers);
job.getConfiguration().setInt(BatchConstants.CFG_HLL_REDUCER_NUM, reducerMapping.getCuboidRowCounterReducerNum());
// make each reducer output to respective dir
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
// prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
deletePath(job.getConfiguration(), output);
}
示例8: setupReducer
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
private void setupReducer(Path output, int numberOfReducers) throws IOException {
job.setReducerClass(UHCDictionaryReducer.class);
job.setPartitionerClass(UHCDictionaryPartitioner.class);
job.setNumReduceTasks(numberOfReducers);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
//prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
deletePath(job.getConfiguration(), output);
}
示例9: run
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
public void run(String trainSet, String output, String mapPath, int movieIdSize) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("tempPath", output);
conf.set("mapPath", mapPath);
conf.setInt("movies", movieIdSize);
Job job = Job.getInstance(conf, "generate matrix");
// job.addCacheFile(new Path(mapPath).toUri());
System.out.println("the mapPath is " + mapPath);
job.setMapperClass(GenSeqMapper.class);
job.setReducerClass(GenSeqReducer.class);
job.setJarByClass(GenSeqMatrix.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
MultipleOutputs.addNamedOutput(job, "output", TextOutputFormat.class, Text.class, Text.class);
FileInputFormat.addInputPath(job, new Path(trainSet));
FileOutputFormat.setOutputPath(job, new Path(output + "/seqMatrix"));
job.waitForCompletion(true);
}
示例10: run
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
@Override
public int run(String[] args) throws Exception {
final CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
parser.printUsage(System.err);
return -1;
}
final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
final Path outputPath = new Path(output);
if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
outputPath.getFileSystem(this.getConf()).delete(outputPath, true);
final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
job.setJarByClass(this.getClass());
job.setMapperClass(PqPcaVladMapper.class);
job.setNumReduceTasks(0);
MultipleOutputs.addNamedOutput(job, "pcavlad", SequenceFileOutputFormat.class, Text.class, BytesWritable.class);
DistributedCache.createSymlink(job.getConfiguration());
DistributedCache.addCacheFile(new URI(indexerData + "#vlad-data.bin"), job.getConfiguration());
SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
job.waitForCompletion(true);
return 0;
}
示例11: stage
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
@Override
public Job stage(Path[] inputs, Path output, Configuration conf) throws Exception {
final Job job = super.stage(inputs, output, conf);
job.setOutputFormatClass(NullOutputFormat.class);
MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, NullWritable.class, Text.class);
return job;
}
示例12: runJob
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
private boolean runJob(String inDir, String outDir, boolean compressOutput) throws Exception {
Configuration conf = getConf();
conf.setBoolean("mapred.output.compress", compressOutput);
conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
Job job = Job.getInstance(conf);
job.setJarByClass(HadoopMain.class);
FileInputFormat.addInputPath(job, new Path(inDir));
FileOutputFormat.setOutputPath(job, new Path(outDir));
job.setMapperClass(HadoopMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(MultiOutputIntSumReducer.class);
// Turn off the default output ("part-..."), we don't need it
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
MultipleOutputs.addNamedOutput(job, "W", TextOutputFormat.class, Text.class, IntWritable.class);
MultipleOutputs.addNamedOutput(job, "CoocF", TextOutputFormat.class, Text.class, IntWritable.class);
MultipleOutputs.addNamedOutput(job, "CoocWF", TextOutputFormat.class, Text.class, IntWritable.class);
MultipleOutputs.addNamedOutput(job, "F", TextOutputFormat.class, Text.class, IntWritable.class);
MultipleOutputs.addNamedOutput(job, "WF", TextOutputFormat.class, Text.class, IntWritable.class);
String[] mwePaths = conf.getStrings("holing.mwe.vocabulary", "");
String mwePath = "";
if (mwePaths != null && mwePaths.length > 0 && mwePaths[0] != null) mwePath = mwePaths[0];
if (!mwePath.equals("")) job.addCacheFile(new URI(mwePath + "#mwe_voc"));
job.setJobName("lefex: Feature Extraction");
return job.waitForCompletion(true);
}
示例13: startHadoopJob
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
public static void startHadoopJob(Configuration conf) {
try {
Job job = new Job(conf, "archiventory");
// local debugging (pseudo-distributed)
// job.getConfiguration().set("mapred.job.tracker", "local");
// job.getConfiguration().set("fs.default.name", "file:///");
job.setJarByClass(Archiventory.class);
job.setMapperClass(Archiventory.ContainerItemIdentificationMapper.class);
job.setReducerClass(Archiventory.ContainerItemIdentificationReducer.class);
job.setInputFormatClass(TextInputFormat.class);
// tabular output of identification results
MultipleOutputs.addNamedOutput(job, "idtab", TextOutputFormat.class, Text.class, Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(ObjectWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(ObjectWritable.class);
TextInputFormat.addInputPath(job, new Path(config.getDirStr()));
String outpath = "output/" + System.currentTimeMillis();
FileOutputFormat.setOutputPath(job, new Path(outpath));
job.waitForCompletion(true);
System.out.print(outpath);
System.exit(0);
} catch (Exception e) {
logger.error("I/O error", e);
}
}
示例14: run
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: BinningTags <in> <out>");
ToolRunner.printGenericCommandUsage(System.err);
System.exit(2);
}
Job job = new Job(conf, "Binning Tags");
job.setJarByClass(BinningTags.class);
// Configure the MultipleOutputs by adding an output called "bins"
// With the proper output format and mapper key/value pairs
MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class,
Text.class, NullWritable.class);
// Enable the counters for the job
// If there are a significant number of different named outputs, this
// should be disabled
MultipleOutputs.setCountersEnabled(job, true);
// Map-only job
job.setNumReduceTasks(0);
job.setMapperClass(BinningMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例15: run
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; //導入方法依賴的package包/類
/**
* runs the pre-processing job. Outputs the utility matrix M
* @param args input/output folders
* @param REDUCERS number of reducers for this job
* @return the mean value of M
* @throws Exception
*/
public Double run(String[] args, final int REDUCERS) throws Exception{
Configuration conf = new Configuration();
//Save params
conf.set("mapred.textoutputformat.separator", ",");
Job job = new Job(conf, "normalisation");
//metrics
job.setNumReduceTasks(REDUCERS);
//Classes
job.setJarByClass(Preprocessing.class);
job.setMapperClass(PreprocessingMap.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(PreprocessingReduce.class);
//mapOutput,reduceOutput
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
//job.setOutputFormatClass(NullOutputFormat.class);
MultipleOutputs.addNamedOutput(job,"M", TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, "sum", TextOutputFormat.class, Text.class, Text.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]+"/M"));
if (!job.waitForCompletion(true)) return null;
//crawl results and return rmse
return getMeanFromDFS(conf, new Path(args[1]+"/M"));
}