Java TextInputFormat.setInputPaths方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.TextInputFormat.setInputPaths方法的典型用法代码示例。如果您正苦于以下问题：Java TextInputFormat.setInputPaths方法的具体用法？Java TextInputFormat.setInputPaths怎么用？Java TextInputFormat.setInputPaths使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.TextInputFormat的用法示例。

在下文中一共展示了TextInputFormat.setInputPaths方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(SumMapper.class);
        job.setReducerClass(SumReducer.class);

        job.setJarByClass(Sum.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }

开发者ID:yogykwan，项目名称:mapreduce-samples，代码行数:21，代码来源:Sum.java

示例2: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		job.setMapperClass(DataDividerMapper.class);
		job.setReducerClass(DataDividerReducer.class);

		job.setJarByClass(DataDividerByUser.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(Text.class);

		TextInputFormat.setInputPaths(job, new Path(args[0]));
		TextOutputFormat.setOutputPath(job, new Path(args[1]));

		job.waitForCompletion(true);
	}

开发者ID:yogykwan，项目名称:mapreduce-samples，代码行数:21，代码来源:DataDividerByUser.java

示例3: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(NormalizeMapper.class);
        job.setReducerClass(NormalizeReducer.class);

        job.setJarByClass(Normalize.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }

开发者ID:yogykwan，项目名称:mapreduce-samples，代码行数:21，代码来源:Normalize.java

示例4: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  Opts opts = new Opts();
  opts.parseArgs(WordCount.class.getName(), args);

  Job job = Job.getInstance(getConf());
  job.setJobName(WordCount.class.getName());
  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));

  job.setMapperClass(MapClass.class);

  job.setNumReduceTasks(0);

  job.setOutputFormatClass(AccumuloOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Mutation.class);
  opts.setAccumuloConfigs(job);
  job.waitForCompletion(true);
  return 0;
}

开发者ID:apache，项目名称:accumulo-examples，代码行数:24，代码来源:WordCount.java

示例5: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  String instance = args[0];
  String zookeepers = args[1];
  String user = args[2];
  String tokenFile = args[3];
  String input = args[4];
  String tableName = args[5];

  Job job = Job.getInstance(getConf());
  job.setJobName(TokenFileWordCount.class.getName());
  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.setInputPaths(job, input);

  job.setMapperClass(MapClass.class);

  job.setNumReduceTasks(0);

  job.setOutputFormatClass(AccumuloOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Mutation.class);

  // AccumuloInputFormat not used here, but it uses the same functions.
  AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
  AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
  AccumuloOutputFormat.setCreateTables(job, true);
  AccumuloOutputFormat.setDefaultTableName(job, tableName);

  job.waitForCompletion(true);
  return 0;
}

开发者ID:apache，项目名称:accumulo-examples，代码行数:35，代码来源:TokenFileWordCount.java

示例6: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
        MetaConfig config = new MetaConfig();
        config.fromArray(args);

//        LOGGER.info(config.getInput());
//        LOGGER.info(config.getOutput());

        Job job = Job.getInstance(getConf());
        job.setJobName("meta");
        job.setJarByClass(MetaDriver.class);

        // define the path
        Path mapInputPath = new Path(config.getInput());
        Path mapOutputPath = new Path(config.getOutput());

        // define the mapper
        job.setMapperClass(MetaMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.setInputPaths(job, mapInputPath);

        // define the reducer, identity reducer
        job.setNumReduceTasks(NUM_REDUCER);

        // output
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        TextOutputFormat.setOutputPath(job, mapOutputPath);

        // clean the old output
        mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);

        // run the job and wait until it complete
        return job.waitForCompletion(true) ? 0 : 1;
    }

开发者ID:at15，项目名称:tree-index，代码行数:36，代码来源:MetaDriver.java

示例7: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
    PreSortConfig config = new PreSortConfig();
    config.fromArray(args);

    Job job = Job.getInstance(getConf());
    job.setJobName("pre-sort");
    job.setJarByClass(PreSortDriver.class);

    Path mapInputPath = new Path(config.getInput());
    Path mapOutputPath = new Path(config.getOutput());
    LOGGER.info("use " + mapInputPath.toString() + " as pre-sort input ");
    LOGGER.info("use " + mapOutputPath.toString() + " as pre-sort output ");

    // define the mapper
    job.getConfiguration().set(PreSortMapper.COLUMN_INDEX_CONFIG_NAME, config.getKeyColumnAsString());
    job.setMapperClass(PreSortMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, mapInputPath);

    // define reducer
    job.setNumReduceTasks(NUM_REDUCER);

    // define the output, NOTE: we do not have reducer
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, mapOutputPath);

    // clean up the output folder
    mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);

    // run the job and wait until it complete
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:at15，项目名称:tree-index，代码行数:35，代码来源:PreSortDriver.java

示例8: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

	if (args.length != 3) {
		System.err.println("Usage: Tester <input> <bloom filter> <output>");
		System.exit(1);
	}

	Path input = new Path(args[0]);
	URI bloom = new URI(args[1]);
	Path output = new Path(args[2]);

	// TODO create the Job object, and set the jar by class
	Job job = Job.getInstance(getConf(), "Bloom Filtering");
	job.setJarByClass(MRBloomFilter.class);

	// TODO add the Bloom URI file to the cache
	job.addCacheFile(bloom);

	// TODO set the mapper class
	job.setMapperClass(BloomMapper.class);

	// TODO set the number of reduce tasks to 0
	job.setNumReduceTasks(0);

	// TODO set the input paths
	TextInputFormat.setInputPaths(job, input);

	// TODO set the output paths
	TextOutputFormat.setOutputPath(job, output);

	// TODO set the output key class to Text
	job.setOutputKeyClass(Text.class);

	// TODO set the output value class to NullWritable
	job.setOutputValueClass(NullWritable.class);
	
	// TODO execute the job via wait for completion and return 0 if successful
	return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:adamjshook，项目名称:bloomfilter-course，代码行数:41，代码来源:MRBloomFilter.java

示例9: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
	String input = args[0];
	Configuration conf = getConf();
	Job job = Job.getInstance(conf, Mapper2HbaseDemo.class.getSimpleName());
	job.setJarByClass(Mapper2HbaseDemo.class);
	job.setMapperClass(Map.class);
	job.setNumReduceTasks(0);
	job.setInputFormatClass(TextInputFormat.class);
	TextInputFormat.setInputPaths(job, input);
	job.setOutputFormatClass(NullOutputFormat.class);
	return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:micmiu，项目名称:bigdata-tutorial，代码行数:13，代码来源:Mapper2HbaseDemo.java

示例10: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
		// configure n-gram mapreduce job
		Configuration conf1 = new Configuration();
		conf1.set("textinputformat.record.delimiter", "."); // read a complete sentence as a line
		conf1.set("GRAM_NUMBER", args[2]);
		Job job1 = Job.getInstance(conf1);
		job1.setNumReduceTasks(3);
		job1.setJobName("NGram");
		job1.setJarByClass(Dispatcher.class);
		job1.setMapperClass(NGramBuilder.NGramMapper.class);
		job1.setReducerClass(NGramBuilder.NGramReducer.class);
		job1.setOutputKeyClass(Text.class);
		job1.setOutputValueClass(IntWritable.class);
		job1.setInputFormatClass(TextInputFormat.class); // default format: reads lines of text files
		job1.setOutputFormatClass(TextOutputFormat.class); // default format: key \t value
		TextInputFormat.setInputPaths(job1, new Path(args[0]));
		TextOutputFormat.setOutputPath(job1, new Path(args[1]));
		job1.waitForCompletion(true); // language model won't start to build until the n-gram library completely built
		
		// configure language model mapreduce job
		Configuration conf2 = new Configuration();
		conf2.set("THRESHOLD", args[3]);
		conf2.set("TOP_K", args[4]);
		DBConfiguration.configureDB(conf2, "com.mysql.jdbc.Driver", "jdbc:mysql://127.0.0.1:3306/tp", "root", "123456"); // establish connection with mySQL database   
		Job job2 = Job.getInstance(conf2);
		job2.setNumReduceTasks(3);
		job2.setJobName("LModel");
		job2.setJarByClass(Dispatcher.class);			
		job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar")); // putting this jar file into jre/lib/ext is recommended	
		job2.setMapperClass(LanguageModel.ModelMapper.class);
		job2.setReducerClass(LanguageModel.ModelReducer.class);
		job2.setMapOutputKeyClass(Text.class); // Mapper emits different key type than the Reducer
		job2.setMapOutputValueClass(Text.class); // Mapper emits different value type than the Reducer
		job2.setOutputKeyClass(DBOutputWritable.class);
		job2.setOutputValueClass(NullWritable.class);
		job2.setInputFormatClass(TextInputFormat.class);
		job2.setOutputFormatClass(DBOutputFormat.class);
		TextInputFormat.setInputPaths(job2, new Path(args[1]));
		DBOutputFormat.setOutput(job2, "LanguageModel", new String[] {"starter", "follower", "probability"});
		System.exit(job2.waitForCompletion(true) ? 0 : 1);
}

开发者ID:JianyangZhang，项目名称:Hot-Search-Terms，代码行数:42，代码来源:Dispatcher.java

示例11: testCombiner

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testCombiner() throws Exception {
  if (!new File(TEST_ROOT_DIR).mkdirs()) {
    throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
  }
  File in = new File(TEST_ROOT_DIR, "input");
  if (!in.mkdirs()) {
    throw new RuntimeException("Could not create test dir: " + in);
  }
  File out = new File(TEST_ROOT_DIR, "output");
  PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
  pw.println("A|a,1");
  pw.println("A|b,2");
  pw.println("B|a,3");
  pw.println("B|b,4");
  pw.println("B|c,5");
  pw.close();
  JobConf conf = new JobConf();
  conf.set("mapreduce.framework.name", "local");
  Job job = new Job(conf);
  TextInputFormat.setInputPaths(job, new Path(in.getPath()));
  TextOutputFormat.setOutputPath(job, new Path(out.getPath()));

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(LongWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setGroupingComparatorClass(GroupComparator.class);

  job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
  job.setCombinerClass(Combiner.class);
  job.getConfiguration().setInt("min.num.spills.for.combine", 0);

  job.submit();
  job.waitForCompletion(false);
  if (job.isSuccessful()) {
    Counters counters = job.getCounters();

    long combinerInputRecords = counters.findCounter(
        "org.apache.hadoop.mapreduce.TaskCounter",
        "COMBINE_INPUT_RECORDS").getValue();
    long combinerOutputRecords = counters.findCounter(
        "org.apache.hadoop.mapreduce.TaskCounter",
        "COMBINE_OUTPUT_RECORDS").getValue();
    Assert.assertTrue(combinerInputRecords > 0);
    Assert.assertTrue(combinerInputRecords > combinerOutputRecords);

    BufferedReader br = new BufferedReader(new FileReader(
        new File(out, "part-r-00000")));
    Set<String> output = new HashSet<String>();
    String line = br.readLine();
    Assert.assertNotNull(line);
    output.add(line.substring(0, 1) + line.substring(4, 5));
    line = br.readLine();
    Assert.assertNotNull(line);
    output.add(line.substring(0, 1) + line.substring(4, 5));
    line = br.readLine();
    Assert.assertNull(line);
    br.close();

    Set<String> expected = new HashSet<String>();
    expected.add("A2");
    expected.add("B5");

    Assert.assertEquals(expected, output);

  } else {
    Assert.fail("Job failed");
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:73，代码来源:TestNewCombinerGrouping.java

示例12: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception{
	
	Configuration conf = new Configuration();
	
	Job job = Job.getInstance(conf);
	job.setMapperClass(MatrixGeneratorMapper.class);
	job.setReducerClass(MatrixGeneratorReducer.class);
	
	job.setJarByClass(CoOccurrenceMatrixGenerator.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	TextInputFormat.setInputPaths(job, new Path(args[0]));
	TextOutputFormat.setOutputPath(job, new Path(args[1]));
	
	job.waitForCompletion(true);
	
}

开发者ID:yogykwan，项目名称:mapreduce-samples，代码行数:22，代码来源:CoOccurrenceMatrixGenerator.java

示例13: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
	Configuration conf1 = new Configuration();
	conf1.set("textinputformat.record.delimiter", ".");
	conf1.set("noGram", args[2]);
	
	//First Job 
    Job job1 = Job.getInstance(conf1);
    job1.setJobName("NGram");
    job1.setJarByClass(Driver.class);

    job1.setMapperClass(NGramLibraryBuilder.NGramMapper.class);
    job1.setReducerClass(NGramLibraryBuilder.NGramReducer.class);

	job1.setOutputKeyClass(Text.class);
	job1.setOutputValueClass(IntWritable.class);

	job1.setInputFormatClass(TextInputFormat.class);
	job1.setOutputFormatClass(TextOutputFormat.class);

	TextInputFormat.setInputPaths(job1, new Path(args[0]));
	TextOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);

    //Second Job 
    Configuration conf2 = new Configuration();
    conf2.set("threashold", args[3]);
    conf2.set("n", args[4]);
    DBConfiguration.configureDB(conf2,
    	     "com.mysql.jdbc.Driver",   // driver class
    	     "jdbc:mysql://10.101.0.163:8889/test", //
    	     "root",    // user name
    	     "root"); //password
	
    Job job2 = Job.getInstance(conf2);
    job2.setJobName("LanguageModel");
    job2.setJarByClass(Driver.class);
    
    job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar"));

    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(NullWritable.class);

    job2.setMapperClass(LanguageModel.Map.class);
    job2.setReducerClass(LanguageModel.Reduce.class);

    job2.setInputFormatClass(TextInputFormat.class);
	job2.setOutputFormatClass(DBOutputFormat.class);
    
	DBOutputFormat.setOutput(
		     job2,
		     "output",    // output table name
		     new String[] { "starting_phrase", "following_word", "count" }   //table columns
		     );
	
    //Path name for this job should match first job's output path name
	TextInputFormat.setInputPaths(job2, new Path(args[1]));
	System.exit(job2.waitForCompletion(true)?0:1);

}

开发者ID:yogykwan，项目名称:mapreduce-samples，代码行数:62，代码来源:Driver.java

示例14: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) {
  Opts opts = new Opts();
  opts.parseArgs(BulkIngestExample.class.getName(), args);

  Configuration conf = getConf();
  PrintStream out = null;
  try {
    Job job = Job.getInstance(conf);
    job.setJobName("bulk ingest example");
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(MapClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(ReduceClass.class);
    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    opts.setAccumuloConfigs(job);

    Connector connector = opts.getConnector();

    TextInputFormat.setInputPaths(job, new Path(opts.inputDir));
    AccumuloFileOutputFormat.setOutputPath(job, new Path(opts.workDir + "/files"));

    FileSystem fs = FileSystem.get(conf);
    out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));

    Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
    for (Text split : splits)
      out.println(Base64.getEncoder().encodeToString(TextUtil.getBytes(split)));

    job.setNumReduceTasks(splits.size() + 1);
    out.close();

    job.setPartitionerClass(RangePartitioner.class);
    RangePartitioner.setSplitFile(job, opts.workDir + "/splits.txt");

    job.waitForCompletion(true);
    Path failures = new Path(opts.workDir, "failures");
    fs.delete(failures, true);
    fs.mkdirs(new Path(opts.workDir, "failures"));
    // With HDFS permissions on, we need to make sure the Accumulo user can read/move the rfiles
    FsShell fsShell = new FsShell(conf);
    fsShell.run(new String[] {"-chmod", "-R", "777", opts.workDir});
    connector.tableOperations().importDirectory(opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);

  } catch (Exception e) {
    throw new RuntimeException(e);
  } finally {
    if (out != null)
      out.close();
  }

  return 0;
}

开发者ID:apache，项目名称:accumulo-examples，代码行数:59，代码来源:BulkIngestExample.java

示例15: testCombiner

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public void testCombiner() throws Exception {
  if (!new File(TEST_ROOT_DIR).mkdirs()) {
    throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
  }
  File in = new File(TEST_ROOT_DIR, "input");
  if (!in.mkdirs()) {
    throw new RuntimeException("Could not create test dir: " + in);
  }
  File out = new File(TEST_ROOT_DIR, "output");
  PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
  pw.println("A|a,1");
  pw.println("A|b,2");
  pw.println("B|a,3");
  pw.println("B|b,4");
  pw.println("B|c,5");
  pw.close();
  Job job = new Job();
  TextInputFormat.setInputPaths(job, new Path(in.getPath()));
  TextOutputFormat.setOutputPath(job, new Path(out.getPath()));

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(LongWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setGroupingComparatorClass(GroupComparator.class);

  job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
  job.setCombinerClass(Combiner.class);
  job.getConfiguration().setInt("min.num.spills.for.combine", 0);

  job.submit();
  job.waitForCompletion(false);
  if (job.isSuccessful()) {
    Counters counters = job.getCounters();

    long combinerInputRecords = counters.findCounter(
        "org.apache.hadoop.mapreduce.TaskCounter",
        "COMBINE_INPUT_RECORDS").getValue();
    long combinerOutputRecords = counters.findCounter(
        "org.apache.hadoop.mapreduce.TaskCounter",
        "COMBINE_OUTPUT_RECORDS").getValue();
    Assert.assertTrue(combinerInputRecords > 0);
    Assert.assertTrue(combinerInputRecords > combinerOutputRecords);

    BufferedReader br = new BufferedReader(new FileReader(
        new File(out, "part-r-00000")));
    Set<String> output = new HashSet<String>();
    String line = br.readLine();
    Assert.assertNotNull(line);
    output.add(line);
    line = br.readLine();
    Assert.assertNotNull(line);
    output.add(line);
    line = br.readLine();
    Assert.assertNull(line);
    br.close();

    Set<String> expected = new HashSet<String>();
    expected.add("A\t2");
    expected.add("B\t5");

    Assert.assertEquals(expected, output);

  } else {
    Assert.fail("Job failed");
  }
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:70，代码来源:TestNewCombinerGrouping.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.TextInputFormat.setInputPaths方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。