本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.setOutputValueClass方法的典型用法代码示例。如果您正苦于以下问题:Java Job.setOutputValueClass方法的具体用法?Java Job.setOutputValueClass怎么用?Java Job.setOutputValueClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.setOutputValueClass方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
BasicConfigurator.configure();
Configuration conf = new Configuration();
conf.setQuietMode(true);
Job job = Job.getInstance(conf, "WordCount");
job.setJarByClass(HadoopWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));
long t = System.currentTimeMillis();
job.waitForCompletion(true);
System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
示例2: runRandomInputGenerator
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
Integer width, Integer wrapMuplitplier) throws Exception {
LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
+ ", numNodes=" + numNodes);
Job job = Job.getInstance(getConf());
job.setJobName("Random Input Generator");
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.setInputFormatClass(GeneratorInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
job.setMapperClass(Mapper.class); //identity mapper
FileOutputFormat.setOutputPath(job, tmpOutput);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
boolean success = jobCompletion(job);
return success ? 0 : 1;
}
示例3: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperatureWithCombiner.class);
job.setJobName("Max Temperature With Combiner");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setCombinerClass(MaxTemperatureReducer.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例4: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob() throws IOException {
final Configuration conf = new Configuration();
final Job baseJob = Job.getInstance(conf);
baseJob.setOutputKeyClass(Text.class);
baseJob.setOutputValueClass(IntWritable.class);
baseJob.setMapperClass(NewMapTokenizer.class);
baseJob.setCombinerClass(NewSummer.class);
baseJob.setReducerClass(NewSummer.class);
baseJob.setNumReduceTasks(1);
baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
baseJob, Long.MAX_VALUE);
return baseJob;
}
示例5: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Job job =Job.getInstance(conf);
job.setJobName("TermFrequencyCount");
job.setJarByClass(TF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TFMap.class);
job.setReducerClass(TFReduce.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
示例6: runJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = new Job(conf, "Input Drive running input:"+inputPath);
log.info("start running InputDriver");
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(indexToWordWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(InputMapper.class);
job.setReducerClass(InputReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, output);
boolean succeeded = job.waitForCompletion(true);
if (!succeeded) {
throw new IllegalStateException("Job failed!");
}
}
示例7: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SumMapper.class);
job.setReducerClass(SumReducer.class);
job.setJarByClass(Sum.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例8: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
job.setMapperClass(PassMapper.class);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例9: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例10: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob(String name, String base) throws IOException {
Configuration conf = new Configuration();
conf.set(Total.QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Cut.class);
// in
String in = base;
if (!base.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(CutMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Employee.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(base + "/tmp"));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Employee.class);
return job;
}
示例11: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClass(ActiveUserRunner.class);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class,
TimeOutputValue.class, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class,
// StatsUserDimension.class, TimeOutputValue.class, job);
// 设置reducer相关参数
job.setReducerClass(ActiveUserReducer.class);
job.setOutputKeyClass(StatsUserDimension.class);
job.setOutputValueClass(MapWritableValue.class);
// 设置output相关参数
job.setOutputFormatClass(TransformerOutputFormat.class);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:"
+ endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
示例12: configureMapper
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
protected void configureMapper(Job job, String tableName,
String tableClassName) {
job.setOutputKeyClass(SqoopRecord.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(getMapperClass());
}
示例13: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createSubmittableJob(String[] args) throws IOException {
Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
generatePartitions(partitionsPath);
Job job = Job.getInstance(getConf(),
getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
Configuration jobConf = job.getConfiguration();
jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
job.setJarByClass(HashTable.class);
TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
// use a TotalOrderPartitioner and reducers to group region output into hash files
job.setPartitionerClass(TotalOrderPartitioner.class);
TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
job.setReducerClass(Reducer.class); // identity reducer
job.setNumReduceTasks(tableHash.numHashFiles);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(ImmutableBytesWritable.class);
job.setOutputFormatClass(MapFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
return job;
}
示例14: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word median");
job.setJarByClass(WordMedian.class);
job.setMapperClass(WordMedianMapper.class);
job.setCombinerClass(WordMedianReducer.class);
job.setReducerClass(WordMedianReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters()
.getGroup(TaskCounter.class.getCanonicalName())
.findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
示例15: testSnapshotScanMapReduce
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
Stopwatch scanOpenTimer = new Stopwatch();
Stopwatch scanTimer = new Stopwatch();
Scan scan = getScan();
String jobName = "testSnapshotScanMapReduce";
Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(getClass());
TableMapReduceUtil.initTableSnapshotMapperJob(
this.snapshotName,
scan,
MyMapper.class,
NullWritable.class,
NullWritable.class,
job,
true,
new Path(restoreDir)
);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
scanTimer.start();
job.waitForCompletion(true);
scanTimer.stop();
Counters counters = job.getCounters();
long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
double throughput = (double)totalBytes / scanTimer.elapsedTime(TimeUnit.SECONDS);
double throughputRows = (double)numRows / scanTimer.elapsedTime(TimeUnit.SECONDS);
double throughputCells = (double)numCells / scanTimer.elapsedTime(TimeUnit.SECONDS);
System.out.println("HBase scan mapreduce: ");
System.out.println("total time to open scanner: " + scanOpenTimer.elapsedMillis() + " ms");
System.out.println("total time to scan: " + scanTimer.elapsedMillis() + " ms");
System.out.println("total bytes: " + totalBytes + " bytes ("
+ StringUtils.humanReadableInt(totalBytes) + ")");
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
System.out.println("total rows : " + numRows);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
System.out.println("total cells : " + numCells);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
}