当前位置: 首页>>代码示例>>Java>>正文


Java TextOutputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.TextOutputFormat的典型用法代码示例。如果您正苦于以下问题:Java TextOutputFormat类的具体用法?Java TextOutputFormat怎么用?Java TextOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TextOutputFormat类属于org.apache.hadoop.mapred包,在下文中一共展示了TextOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getJob

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
/**
 * Sets up a job conf for the given job using the given config object. Ensures
 * that the correct input format is set, the mapper and and reducer class and
 * the input and output keys and value classes along with any other job
 * configuration.
 * 
 * @param config
 * @return JobConf representing the job to be ran
 * @throws IOException
 */
private JobConf getJob(ConfigExtractor config) throws IOException {
  JobConf job = new JobConf(config.getConfig(), SliveTest.class);
  job.setInputFormat(DummyInputFormat.class);
  FileOutputFormat.setOutputPath(job, config.getOutputPath());
  job.setMapperClass(SliveMapper.class);
  job.setPartitionerClass(SlivePartitioner.class);
  job.setReducerClass(SliveReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormat(TextOutputFormat.class);
  TextOutputFormat.setCompressOutput(job, false);
  job.setNumReduceTasks(config.getReducerAmount());
  job.setNumMapTasks(config.getMapAmount());
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:SliveTest.java

示例2: configure

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:67,代码来源:TestKeyFieldBasedComparator.java

示例3: addDependencyJars

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:TableMapReduceUtil.java

示例4: getOldAPIJobconf

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
                                        String input, String output)
    throws Exception {
  final JobConf jobConf = new JobConf(configuration);
  final FileSystem fs = FileSystem.get(configuration);
  if (fs.exists(new Path(output))) {
    fs.delete(new Path(output), true);
  }
  fs.close();
  jobConf.setJobName(name);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(IntWritable.class);
  jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
  jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
  jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);

  jobConf.setInputFormat(SequenceFileInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(jobConf, new Path(input));
  FileOutputFormat.setOutputPath(jobConf, new Path(output));
  return jobConf;
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:24,代码来源:OldAPICombinerTest.java

示例5: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

		JobConf conf = new JobConf(WeatherData.class);
		conf.setJobName("temp");

		// Note:- As Mapper's output types are not default so we have to define
		// the
		// following properties.
		conf.setMapOutputKeyClass(Text.class);
		conf.setMapOutputValueClass(Text.class);

		conf.setMapperClass(MaxTemperatureMapper.class);
		conf.setReducerClass(MaxTemperatureReducer.class);

		conf.setInputFormat(TextInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);

		FileInputFormat.setInputPaths(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));

		JobClient.runJob(conf);

	}
 
开发者ID:gauravdangi,项目名称:Hadoop-CaseStudies,代码行数:24,代码来源:WeatherData.java

示例6: configure

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
@Override
public void configure() {
	super.configure();
	outputFormat = new TextOutputFormat();
	Class<? extends CompressionCodec> codecClass = null;
	if (CompressEnum.NONE.name().equalsIgnoreCase(compress)) {
		codecClass = null;
	} else if (CompressEnum.GZIP.name().equalsIgnoreCase(compress)) {
		codecClass = org.apache.hadoop.io.compress.GzipCodec.class;
	} else if (CompressEnum.BZIP2.name().equalsIgnoreCase(compress)) {
		codecClass = org.apache.hadoop.io.compress.BZip2Codec.class;
	} else {
		throw new IllegalArgumentException("Unsupported compress format: "
				+ compress);
	}
	if (codecClass != null) {
		this.outputFormat.setOutputCompressorClass(jobConf, codecClass);
	}
}
 
开发者ID:DTStack,项目名称:jlogstash-output-plugin,代码行数:21,代码来源:HdfsTextOutputFormat.java

示例7: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	JobConf conf = new JobConf(WordCountOldAPI.class);
	conf.setJobName("old wordcount");

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(IntWritable.class);

	conf.setMapperClass(Map.class);
	conf.setCombinerClass(Reduce.class);
	conf.setReducerClass(Reduce.class);

	conf.setInputFormat(TextInputFormat.class);
	conf.setOutputFormat(TextOutputFormat.class);

	FileInputFormat.setInputPaths(conf, new Path(args[0]));
	FileOutputFormat.setOutputPath(conf, new Path(args[1]));

	JobClient.runJob(conf);
}
 
开发者ID:zirpins,项目名称:bdelab,代码行数:20,代码来源:WordCountOldAPI.java

示例8: run

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void run(String[] args) throws Exception
{

  JobConf conf = new JobConf(this.getClass());
  conf.setJobName("wordcount");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);

  conf.setMapperClass(Map.class);
  conf.setCombinerClass(Reduce.class);
  conf.setReducerClass(Reduce.class);

  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(conf, new Path(args[0]));
  FileOutputFormat.setOutputPath(conf, new Path(args[1]));

  JobClient.runJob(conf);
}
 
开发者ID:apache,项目名称:apex-malhar,代码行数:22,代码来源:WordCount.java

示例9: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	if (args.length != 1) {
		System.err.println("Usage: mapred1 outputpath");
		System.exit(0);
	}
	
	JobConf conf = new JobConf(mapred1.class);
	conf.setJobName("mapred1");
	
	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(IntWritable.class);
	 	
	conf.setMapperClass(Map.class);
	conf.setReducerClass(Reduce.class);
	
	conf.set("es.nodes", "10.149.3.3:9200");
	conf.setInputFormat(EsInputFormat.class);       
	conf.set("es.resource", "kb/doc");  
	conf.set("es.query", "{\"query\":{\"query_string\":{\"fields\":[\"article_dc_title\"],\"query\":\"IN HET ZUIDEN\"}}}");
	conf.setOutputFormat(TextOutputFormat.class);
	
	FileOutputFormat.setOutputPath(conf, new Path(args[0]));
	 	
	JobClient.runJob(conf);
}
 
开发者ID:NLeSC,项目名称:benchmarking-elasticsearch,代码行数:26,代码来源:mapred1.java

示例10: fillInWordCountMRJobConf

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void fillInWordCountMRJobConf(JobConf conf) {
  String input = "select n_comment from tpch.nation";

  conf.setJobName("samplejob-wordcount");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);

  conf.setMapperClass(Map.class);
  conf.setCombinerClass(Reduce.class);
  conf.setReducerClass(Reduce.class);

  conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);
  com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input);
  setRandomOutputDir(conf);
}
 
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:18,代码来源:TestMiniClusterController.java

示例11: IDMappingJob

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void IDMappingJob(String[] args) throws  IOException {

		JobConf job = new JobConf();
		new GenericOptionsParser(job, args);
		job.setJarByClass(HybridDriver.class);
		job.setJobName("Converting binary similarity scores to text");
		job.setMapperClass(IDMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setNumReduceTasks(0);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		Path inputPath = new Path(OUTPUT_DIR);
		job.setInputFormat(SequenceFileInputFormat.class);
		SequenceFileInputFormat.setInputPaths(job, inputPath);
		Path outputPath = new Path("SimilarityScores"); 
		job.setOutputFormat(TextOutputFormat.class);
		SequenceFileOutputFormat.setOutputPath(job, outputPath);
		FileSystem.get(job).delete(outputPath, true);
		HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
		JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); 
	}
 
开发者ID:mahaucsb,项目名称:pss,代码行数:24,代码来源:HybridDriver.java

示例12: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(DuplicateGraph.class);

    job.setJobName(DuplicateGraph.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(0);
    JobClient.runJob(job);
}
 
开发者ID:pregelix,项目名称:pregelix,代码行数:17,代码来源:DuplicateGraph.java

示例13: runParsing

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void runParsing(String inputPath, String outputPath) throws IOException {
  JobConf conf = new JobConf(Hits.class);

  // Delete folders
  FileSystem.get(conf).delete(new Path("wiki"), true);

  // Input / Mapper
  FileInputFormat.setInputPaths(conf, new Path(inputPath));
  conf.setInputFormat(DataInputFormat.class);
  conf.setMapperClass(DataParserMapper.class);

  // Output / Reducer
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));
  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setReducerClass(DataParserReducer.class);

  JobClient.runJob(conf);
}
 
开发者ID:becherd,项目名称:verteilteWebInf,代码行数:21,代码来源:Hits.java

示例14: getLinkIn

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void getLinkIn(String inputPath, String outputPath) throws IOException {
  JobConf conf = new JobConf(Hits.class);

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);

  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(conf, new Path(inputPath));
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));

  conf.setMapperClass(FromPagesMapper.class);
  conf.setReducerClass(FromPagesReducer.class);

  JobClient.runJob(conf);
}
 
开发者ID:becherd,项目名称:verteilteWebInf,代码行数:18,代码来源:Hits.java

示例15: initialize

import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
private void initialize(String inputPath1, String inputPath2, String outputPath)
    throws IOException {
  JobConf conf = new JobConf(Hits.class);

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);

  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.addInputPath(conf, new Path(inputPath1));
  FileInputFormat.addInputPath(conf, new Path(inputPath2));
  FileOutputFormat.setOutputPath(conf, new Path(outputPath));

  conf.setMapperClass(InitAuthHubMapper.class);
  conf.setReducerClass(InitAuthHubReducer.class);

  JobClient.runJob(conf);
}
 
开发者ID:becherd,项目名称:verteilteWebInf,代码行数:20,代码来源:Hits.java


注:本文中的org.apache.hadoop.mapred.TextOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。