本文整理汇总了Java中org.apache.hadoop.mapred.TextOutputFormat.setOutputPath方法的典型用法代码示例。如果您正苦于以下问题:Java TextOutputFormat.setOutputPath方法的具体用法?Java TextOutputFormat.setOutputPath怎么用?Java TextOutputFormat.setOutputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.TextOutputFormat
的用法示例。
在下文中一共展示了TextOutputFormat.setOutputPath方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: CartesianCommentComparison <in> <out>");
ToolRunner.printGenericCommandUsage(System.err);
System.exit(2);
}
// Configure the join type
JobConf conf = new JobConf("Cartesian Product");
conf.setJarByClass(CartesianCommentComparison.class);
conf.setMapperClass(CartesianMapper.class);
conf.setNumReduceTasks(0);
conf.setInputFormat(CartesianInputFormat.class);
// Configure the input format
CartesianInputFormat.setLeftInputInfo(conf, TextInputFormat.class, args[0]);
CartesianInputFormat.setRightInputInfo(conf, TextInputFormat.class, args[0]);
TextOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
RunningJob job = JobClient.runJob(conf);
while (!job.isComplete()) {
Thread.sleep(1000);
}
return job.isSuccessful() ? 0 : 1;
}
示例2: createJobConf
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
protected JobConf createJobConf() throws Exception {
JobConf jobConf = KafkaETLJob.createJobConf("SimpleKafakETL", _topic, _props, getClass());
jobConf.setMapperClass(SimpleKafkaETLMapper.class);
KafkaETLInputFormat.setInputPaths(jobConf, new Path(_input));
jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setOutputFormat(TextOutputFormat.class);
TextOutputFormat.setCompressOutput(jobConf, false);
Path output = new Path(_output);
FileSystem fs = output.getFileSystem(jobConf);
if (fs.exists(output)) fs.delete(output);
TextOutputFormat.setOutputPath(jobConf, output);
jobConf.setNumReduceTasks(0);
return jobConf;
}
示例3: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
DataSet<Tuple2<Text, LongWritable>> words =
text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
.groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
// Set up Hadoop Output Format
HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
// Output & Execute
words.output(hadoopOutputFormat).setParallelism(1);
env.execute("Hadoop Compat WordCount");
}
示例4: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
DataSet<Tuple2<Text, LongWritable>> words =
text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
.groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
// Set up Hadoop Output Format
HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
// Output & Execute
words.output(hadoopOutputFormat).setParallelism(1);
env.execute("Hadoop Compat WordCount");
}
示例5: getPlan
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public Plan getPlan(String... args) {
// parse job parameters
int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
String dataInput = (args.length > 1 ? args[1] : "");
String output = (args.length > 2 ? args[2] : "");
HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
new TextInputFormat(), new JobConf(), "Input Lines");
TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
MapOperator mapper = MapOperator.builder(new TokenizeLine())
.input(source)
.name("Tokenize Lines")
.build();
ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
.input(mapper)
.name("Count Words")
.build();
HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
Plan plan = new Plan(out, "Hadoop OutputFormat Example");
plan.setDefaultParallelism(numSubTasks);
return plan;
}
示例6: run
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 4) {
printUsage();
}
Path userPath = new Path(args[0]);
Path commentPath = new Path(args[1]);
Path outputDir = new Path(args[2]);
String joinType = args[3];
JobConf conf = new JobConf("CompositeJoin");
conf.setJarByClass(CompositeUserJoin.class);
conf.setMapperClass(CompositeMapper.class);
conf.setNumReduceTasks(0);
// Set the input format class to a CompositeInputFormat class.
// The CompositeInputFormat will parse all of our input files and output
// records to our mapper.
conf.setInputFormat(CompositeInputFormat.class);
// The composite input format join expression will set how the records
// are going to be read in, and in what input format.
conf.set("mapred.join.expr", CompositeInputFormat.compose(joinType,
KeyValueTextInputFormat.class, userPath, commentPath));
TextOutputFormat.setOutputPath(conf, outputDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
RunningJob job = JobClient.runJob(conf);
while (!job.isComplete()) {
Thread.sleep(1000);
}
return job.isSuccessful() ? 0 : 1;
}
示例7: main
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
JetInstance client = Jet.newJetClient();
String inputPath = args[0];
String outputPath = args[1] + "_" + System.currentTimeMillis();
DAG dag = new DAG();
JobConf conf = new JobConf();
conf.setOutputFormat(TextOutputFormat.class);
conf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(conf, new Path(inputPath));
TextOutputFormat.setOutputPath(conf, new Path(outputPath));
Vertex producer = dag.newVertex("reader", readHdfsP(conf,
(k, v) -> v.toString())).localParallelism(3);
Vertex tokenizer = dag.newVertex("tokenizer",
flatMapP((String line) -> {
StringTokenizer s = new StringTokenizer(line);
return () -> s.hasMoreTokens() ? s.nextToken() : null;
})
);
// word -> (word, count)
Vertex accumulate = dag.newVertex("accumulate", accumulateByKeyP(wholeItem(), counting()));
// (word, count) -> (word, count)
Vertex combine = dag.newVertex("combine", combineByKeyP(counting()));
Vertex consumer = dag.newVertex("writer", writeHdfsP(conf, entryKey(), entryValue())).localParallelism(1);
dag.edge(between(producer, tokenizer))
.edge(between(tokenizer, accumulate)
.partitioned(wholeItem(), HASH_CODE))
.edge(between(accumulate, combine)
.distributed()
.partitioned(entryKey()))
.edge(between(combine, consumer));
JobConfig config = new JobConfig();
config.addClass(JetWordCount.class);
try {
long start = System.currentTimeMillis();
client.newJob(dag, config).join();
System.out.println("Time=" + (System.currentTimeMillis() - start));
} finally {
client.shutdown();
}
}
示例8: run
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Get current configuration.
Configuration conf = getConf();
// Parse command line arguments.
String inputPaths = args[0];
String outputPath = args[1];
JobConf job = new JobConf(conf);
// Set input path.
if (inputPaths.length() > 0) {
List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
.split(inputPaths));
for (String segmentPath : segmentPaths) {
LOG.info("Adding input path " + segmentPath);
FileInputFormat.addInputPath(job, new Path(segmentPath));
}
} else {
System.err.println("No input path found.");
return 1;
}
// Set output path.
if (outputPath.length() > 0) {
LOG.info("Setting output path to " + outputPath);
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Compress output to boost performance.
TextOutputFormat.setCompressOutput(job, true);
TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
} else {
System.err.println("No output path found.");
return 1;
}
// Load other classes from same jar as this class.
job.setJarByClass(OutputToText.class);
// Input is Hadoop sequence file format.
job.setInputFormat(SequenceFileInputFormat.class);
// Output is text format for import into database later.
job.setOutputFormat(TextOutputFormat.class);
// Set the output data types.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// Use custom mapper class.
job.setMapperClass(OutputToTextMapper.class);
// Use standard reducer class.
job.setReducerClass(IdentityReducer.class);
if (JobClient.runJob(job).isSuccessful())
return 0;
else
return 1;
}
示例9: run
import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Get current configuration.
Configuration conf = getConf();
// Parse command line arguments.
String inputPaths = args[0];
String outputPath = args[1];
JobConf job = new JobConf(conf);
// Set input paths.
if (inputPaths.length() > 0) {
List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
.split(inputPaths));
for (String segmentPath : segmentPaths) {
LOG.info("Adding input path " + segmentPath);
FileInputFormat.addInputPath(job, new Path(segmentPath));
}
} else {
System.err.println("No input path found.");
return 1;
}
// Set output path.
if (outputPath.length() > 0) {
LOG.info("Setting output path to " + outputPath);
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Compress output to boost performance.
TextOutputFormat.setCompressOutput(job, true);
TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
} else {
System.err.println("No output path found.");
return 1;
}
// Load other classes from same jar as this class.
job.setJarByClass(SegmentCombiner.class);
// Input is Hadoop sequence file format.
job.setInputFormat(SequenceFileInputFormat.class);
// Output to text file format.
job.setOutputFormat(TextOutputFormat.class);
// Set the output data types.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// Use custom mapper class.
job.setMapperClass(SegmentCombinerMapper.class);
// Use standard reducer class.
job.setReducerClass(LongSumReducer.class);
if (JobClient.runJob(job).isSuccessful())
return 0;
else
return 1;
}