本文整理汇总了Java中org.apache.hadoop.mapred.TextOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java TextOutputFormat类的具体用法?Java TextOutputFormat怎么用?Java TextOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TextOutputFormat类属于org.apache.hadoop.mapred包,在下文中一共展示了TextOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getJob
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
/**
* Sets up a job conf for the given job using the given config object. Ensures
* that the correct input format is set, the mapper and and reducer class and
* the input and output keys and value classes along with any other job
* configuration.
*
* @param config
* @return JobConf representing the job to be ran
* @throws IOException
*/
private JobConf getJob(ConfigExtractor config) throws IOException {
JobConf job = new JobConf(config.getConfig(), SliveTest.class);
job.setInputFormat(DummyInputFormat.class);
FileOutputFormat.setOutputPath(job, config.getOutputPath());
job.setMapperClass(SliveMapper.class);
job.setPartitionerClass(SlivePartitioner.class);
job.setReducerClass(SliveReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormat(TextOutputFormat.class);
TextOutputFormat.setCompressOutput(job, false);
job.setNumReduceTasks(config.getReducerAmount());
job.setNumMapTasks(config.getMapAmount());
return job;
}
示例2: configure
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
示例3: addDependencyJars
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
/**
* @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
*/
public static void addDependencyJars(JobConf job) throws IOException {
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
job,
// when making changes here, consider also mapreduce.TableMapReduceUtil
// pull job classes
job.getMapOutputKeyClass(),
job.getMapOutputValueClass(),
job.getOutputKeyClass(),
job.getOutputValueClass(),
job.getPartitionerClass(),
job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
job.getCombinerClass());
}
示例4: getOldAPIJobconf
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
String input, String output)
throws Exception {
final JobConf jobConf = new JobConf(configuration);
final FileSystem fs = FileSystem.get(configuration);
if (fs.exists(new Path(output))) {
fs.delete(new Path(output), true);
}
fs.close();
jobConf.setJobName(name);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(jobConf, new Path(input));
FileOutputFormat.setOutputPath(jobConf, new Path(output));
return jobConf;
}
示例5: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WeatherData.class);
conf.setJobName("temp");
// Note:- As Mapper's output types are not default so we have to define
// the
// following properties.
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setMapperClass(MaxTemperatureMapper.class);
conf.setReducerClass(MaxTemperatureReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
示例6: configure
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
@Override
public void configure() {
super.configure();
outputFormat = new TextOutputFormat();
Class<? extends CompressionCodec> codecClass = null;
if (CompressEnum.NONE.name().equalsIgnoreCase(compress)) {
codecClass = null;
} else if (CompressEnum.GZIP.name().equalsIgnoreCase(compress)) {
codecClass = org.apache.hadoop.io.compress.GzipCodec.class;
} else if (CompressEnum.BZIP2.name().equalsIgnoreCase(compress)) {
codecClass = org.apache.hadoop.io.compress.BZip2Codec.class;
} else {
throw new IllegalArgumentException("Unsupported compress format: "
+ compress);
}
if (codecClass != null) {
this.outputFormat.setOutputCompressorClass(jobConf, codecClass);
}
}
示例7: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WordCountOldAPI.class);
conf.setJobName("old wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
示例8: run
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void run(String[] args) throws Exception
{
JobConf conf = new JobConf(this.getClass());
conf.setJobName("wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
示例9: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
if (args.length != 1) {
System.err.println("Usage: mapred1 outputpath");
System.exit(0);
}
JobConf conf = new JobConf(mapred1.class);
conf.setJobName("mapred1");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setReducerClass(Reduce.class);
conf.set("es.nodes", "10.149.3.3:9200");
conf.setInputFormat(EsInputFormat.class);
conf.set("es.resource", "kb/doc");
conf.set("es.query", "{\"query\":{\"query_string\":{\"fields\":[\"article_dc_title\"],\"query\":\"IN HET ZUIDEN\"}}}");
conf.setOutputFormat(TextOutputFormat.class);
FileOutputFormat.setOutputPath(conf, new Path(args[0]));
JobClient.runJob(conf);
}
示例10: fillInWordCountMRJobConf
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void fillInWordCountMRJobConf(JobConf conf) {
String input = "select n_comment from tpch.nation";
conf.setJobName("samplejob-wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input);
setRandomOutputDir(conf);
}
示例11: IDMappingJob
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void IDMappingJob(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJarByClass(HybridDriver.class);
job.setJobName("Converting binary similarity scores to text");
job.setMapperClass(IDMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path(OUTPUT_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPath);
Path outputPath = new Path("SimilarityScores");
job.setOutputFormat(TextOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
FileSystem.get(job).delete(outputPath, true);
HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}
示例12: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
JobConf job = new JobConf(DuplicateGraph.class);
job.setJobName(DuplicateGraph.class.getSimpleName());
job.setMapperClass(MapRecordOnly.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setNumReduceTasks(0);
JobClient.runJob(job);
}
示例13: runParsing
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void runParsing(String inputPath, String outputPath) throws IOException {
JobConf conf = new JobConf(Hits.class);
// Delete folders
FileSystem.get(conf).delete(new Path("wiki"), true);
// Input / Mapper
FileInputFormat.setInputPaths(conf, new Path(inputPath));
conf.setInputFormat(DataInputFormat.class);
conf.setMapperClass(DataParserMapper.class);
// Output / Reducer
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setReducerClass(DataParserReducer.class);
JobClient.runJob(conf);
}
示例14: getLinkIn
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
public void getLinkIn(String inputPath, String outputPath) throws IOException {
JobConf conf = new JobConf(Hits.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setMapperClass(FromPagesMapper.class);
conf.setReducerClass(FromPagesReducer.class);
JobClient.runJob(conf);
}
示例15: initialize
import org.apache.hadoop.mapred.TextOutputFormat; //导入依赖的package包/类
private void initialize(String inputPath1, String inputPath2, String outputPath)
throws IOException {
JobConf conf = new JobConf(Hits.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.addInputPath(conf, new Path(inputPath1));
FileInputFormat.addInputPath(conf, new Path(inputPath2));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setMapperClass(InitAuthHubMapper.class);
conf.setReducerClass(InitAuthHubReducer.class);
JobClient.runJob(conf);
}