本文整理汇总了Java中org.apache.hadoop.examples.WordCount类的典型用法代码示例。如果您正苦于以下问题:Java WordCount类的具体用法?Java WordCount怎么用?Java WordCount使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WordCount类属于org.apache.hadoop.examples包,在下文中一共展示了WordCount类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runWordCount
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runWordCount(Configuration conf
) throws IOException,
InterruptedException,
ClassNotFoundException {
final String COUNTER_GROUP = "org.apache.hadoop.mapred.Task$Counter";
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
writeFile("in/part1", "this is a test\nof word count test\ntest\n");
writeFile("in/part2", "more test");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertNull(job.getJobID());
assertTrue(job.waitForCompletion(false));
assertNotNull(job.getJobID());
String out = readFile("out/part-r-00000");
System.out.println(out);
assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n",
out);
Counters ctrs = job.getCounters();
System.out.println("Counters: " + ctrs);
long combineIn = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_INPUT_RECORDS").getValue();
long combineOut = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_OUTPUT_RECORDS").getValue();
long reduceIn = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_RECORDS").getValue();
long mapOut = ctrs.findCounter(COUNTER_GROUP,
"MAP_OUTPUT_RECORDS").getValue();
long reduceOut = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_OUTPUT_RECORDS").getValue();
long reduceGrps = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_GROUPS").getValue();
assertEquals("map out = combine in", mapOut, combineIn);
assertEquals("combine out = reduce in", combineOut, reduceIn);
assertTrue("combine in > combine out", combineIn > combineOut);
assertEquals("reduce groups = reduce out", reduceGrps, reduceOut);
String group = "Random Group";
CounterGroup ctrGrp = ctrs.getGroup(group);
assertEquals(0, ctrGrp.size());
}
示例2: getExamplesJarPath
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private String getExamplesJarPath() {
Class clazz = WordCount.class;
return clazz.getProtectionDomain().getCodeSource().getLocation().getPath();
}
示例3: runSecondarySort
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runSecondarySort(Configuration conf) throws IOException,
InterruptedException,
ClassNotFoundException {
FileSystem localFs = FileSystem.getLocal(conf);
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
TestMapReduceLocal.writeFile
("in/part1", "-1 -4\n-3 23\n5 10\n-1 -2\n-1 300\n-1 10\n4 1\n" +
"4 2\n4 10\n4 -1\n4 -10\n10 20\n10 30\n10 25\n");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setNumReduceTasks(2);
job.setMapperClass(SecondarySort.MapClass.class);
job.setReducerClass(SecondarySort.Reduce.class);
// group and partition by the first int in the pair
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(FirstGroupingComparator.class);
// the map output is IntPair, IntWritable
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
// the reduce output is Text, IntWritable
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertTrue(job.waitForCompletion(true));
String out = TestMapReduceLocal.readFile("out/part-r-00000");
assertEquals("------------------------------------------------\n" +
"4\t-10\n4\t-1\n4\t1\n4\t2\n4\t10\n" +
"------------------------------------------------\n" +
"10\t20\n10\t25\n10\t30\n", out);
out = TestMapReduceLocal.readFile("out/part-r-00001");
assertEquals("------------------------------------------------\n" +
"-3\t23\n" +
"------------------------------------------------\n" +
"-1\t-4\n-1\t-2\n-1\t10\n-1\t300\n" +
"------------------------------------------------\n" +
"5\t10\n", out);
}
示例4: runWordCount
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runWordCount(Configuration conf
) throws IOException,
InterruptedException,
ClassNotFoundException {
final String COUNTER_GROUP = "org.apache.hadoop.mapred.Task$Counter";
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
writeFile("in/part1", "this is a test\nof word count test\ntest\n");
writeFile("in/part2", "more test");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertTrue(job.waitForCompletion(false));
String out = readFile("out/part-r-00000");
System.out.println(out);
assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n",
out);
Counters ctrs = job.getCounters();
System.out.println("Counters: " + ctrs);
long combineIn = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_INPUT_RECORDS").getValue();
long combineOut = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_OUTPUT_RECORDS").getValue();
long reduceIn = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_RECORDS").getValue();
long mapOut = ctrs.findCounter(COUNTER_GROUP,
"MAP_OUTPUT_RECORDS").getValue();
long reduceOut = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_OUTPUT_RECORDS").getValue();
long reduceGrps = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_GROUPS").getValue();
assertEquals("map out = combine in", mapOut, combineIn);
assertEquals("combine out = reduce in", combineOut, reduceIn);
assertTrue("combine in > combine out", combineIn > combineOut);
assertEquals("reduce groups = reduce out", reduceGrps, reduceOut);
String group = "Random Group";
CounterGroup ctrGrp = ctrs.getGroup(group);
assertEquals(0, ctrGrp.size());
}
示例5: runWordCount
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runWordCount(Configuration conf
) throws IOException,
InterruptedException,
ClassNotFoundException {
final String COUNTER_GROUP = "org.apache.hadoop.mapred.Task$Counter";
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
writeFile("in/part1", "this is a test\nof word count test\ntest\n");
writeFile("in/part2", "more test");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertNull("job.getJobID() must be null before the job is submitted",
job.getJobID());
job.submit();
assertNotNull("job.getJobID() can't be null after the job is submitted",
job.getJobID());
assertTrue(job.waitForCompletion(false));
assertNotNull("job.getJobID() can't be null again after the job is finished",
job.getJobID());
String out = readFile("out/part-r-00000");
System.out.println(out);
assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n",
out);
Counters ctrs = job.getCounters();
System.out.println("Counters: " + ctrs);
long combineIn = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_INPUT_RECORDS").getValue();
long combineOut = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_OUTPUT_RECORDS").getValue();
long reduceIn = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_RECORDS").getValue();
long mapOut = ctrs.findCounter(COUNTER_GROUP,
"MAP_OUTPUT_RECORDS").getValue();
long reduceOut = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_OUTPUT_RECORDS").getValue();
long reduceGrps = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_GROUPS").getValue();
assertEquals("map out = combine in", mapOut, combineIn);
assertEquals("combine out = reduce in", combineOut, reduceIn);
assertTrue("combine in > combine out", combineIn > combineOut);
assertEquals("reduce groups = reduce out", reduceGrps, reduceOut);
String group = "Random Group";
CounterGroup ctrGrp = ctrs.getGroup(group);
assertEquals(0, ctrGrp.size());
}
示例6: runSecondarySort
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runSecondarySort(Configuration conf) throws IOException,
InterruptedException,
ClassNotFoundException {
FileSystem localFs = FileSystem.getLocal(conf);
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
TestMapReduceLocal.writeFile
("in/part1", "-1 -4\n-3 23\n5 10\n-1 -2\n-1 300\n-1 10\n4 1\n" +
"4 2\n4 10\n4 -1\n4 -10\n10 20\n10 30\n10 25\n");
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setNumReduceTasks(2);
job.setMapperClass(SecondarySort.MapClass.class);
job.setReducerClass(SecondarySort.Reduce.class);
// group and partition by the first int in the pair
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(FirstGroupingComparator.class);
// the map output is IntPair, IntWritable
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
// the reduce output is Text, IntWritable
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertTrue(job.waitForCompletion(true));
String out = TestMapReduceLocal.readFile("out/part-r-00000");
assertEquals("------------------------------------------------\n" +
"4\t-10\n4\t-1\n4\t1\n4\t2\n4\t10\n" +
"------------------------------------------------\n" +
"10\t20\n10\t25\n10\t30\n", out);
out = TestMapReduceLocal.readFile("out/part-r-00001");
assertEquals("------------------------------------------------\n" +
"-3\t23\n" +
"------------------------------------------------\n" +
"-1\t-4\n-1\t-2\n-1\t10\n-1\t300\n" +
"------------------------------------------------\n" +
"5\t10\n", out);
}
示例7: runWordCount
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runWordCount(Configuration conf
) throws IOException,
InterruptedException,
ClassNotFoundException {
final String COUNTER_GROUP = "org.apache.hadoop.mapreduce.TaskCounter";
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
writeFile("in/part1", "this is a test\nof word count test\ntest\n");
writeFile("in/part2", "more test");
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertTrue(job.waitForCompletion(false));
String out = readFile("out/part-r-00000");
System.out.println(out);
assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n",
out);
Counters ctrs = job.getCounters();
System.out.println("Counters: " + ctrs);
long mapIn = ctrs.findCounter(FileInputFormat.COUNTER_GROUP,
FileInputFormat.BYTES_READ).getValue();
assertTrue(mapIn != 0);
long combineIn = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_INPUT_RECORDS").getValue();
long combineOut = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_OUTPUT_RECORDS").getValue();
long reduceIn = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_RECORDS").getValue();
long mapOut = ctrs.findCounter(COUNTER_GROUP,
"MAP_OUTPUT_RECORDS").getValue();
long reduceOut = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_OUTPUT_RECORDS").getValue();
long reduceGrps = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_GROUPS").getValue();
long mergedMapOutputs = ctrs.findCounter(COUNTER_GROUP,
"MERGED_MAP_OUTPUTS").getValue();
long shuffledMaps = ctrs.findCounter(COUNTER_GROUP,
"SHUFFLED_MAPS").getValue();
assertEquals("map out = combine in", mapOut, combineIn);
assertEquals("combine out = reduce in", combineOut, reduceIn);
assertTrue("combine in > combine out", combineIn > combineOut);
assertEquals("reduce groups = reduce out", reduceGrps, reduceOut);
assertEquals("Mismatch in mergedMapOutputs", mergedMapOutputs, 2);
assertEquals("Mismatch in shuffledMaps", shuffledMaps, 2);
String group = "Random Group";
CounterGroup ctrGrp = ctrs.getGroup(group);
assertEquals(0, ctrGrp.size());
}
示例8: runWordCount
import org.apache.hadoop.examples.WordCount; //导入依赖的package包/类
private void runWordCount(Configuration conf
) throws IOException,
InterruptedException,
ClassNotFoundException {
final String COUNTER_GROUP = "org.apache.hadoop.mapred.Task$Counter";
localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true);
localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true);
writeFile("in/part1", "this is a test\nof word count test\ntest\n");
writeFile("in/part2", "more test");
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out"));
assertTrue(job.waitForCompletion(false));
String out = readFile("out/part-r-00000");
System.out.println(out);
assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n",
out);
Counters ctrs = job.getCounters();
System.out.println("Counters: " + ctrs);
long combineIn = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_INPUT_RECORDS").getValue();
long combineOut = ctrs.findCounter(COUNTER_GROUP,
"COMBINE_OUTPUT_RECORDS").getValue();
long reduceIn = ctrs.findCounter(COUNTER_GROUP,
"REDUCE_INPUT_RECORDS").getValue();
long mapOut = ctrs.findCounter(COUNTER_GROUP,
"MAP_OUTPUT_RECORDS").getValue();
assertEquals("map out = combine in", mapOut, combineIn);
assertEquals("combine out = reduce in", combineOut, reduceIn);
assertTrue("combine in > combine out", combineIn > combineOut);
String group = "Random Group";
CounterGroup ctrGrp = ctrs.getGroup(group);
assertEquals(0, ctrGrp.size());
}