当前位置: 首页>>代码示例>>Java>>正文


Java TextOutputFormat.setOutputPath方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.TextOutputFormat.setOutputPath方法的典型用法代码示例。如果您正苦于以下问题:Java TextOutputFormat.setOutputPath方法的具体用法?Java TextOutputFormat.setOutputPath怎么用?Java TextOutputFormat.setOutputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.TextOutputFormat的用法示例。


在下文中一共展示了TextOutputFormat.setOutputPath方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		System.err.println("Usage: CartesianCommentComparison <in> <out>");
		ToolRunner.printGenericCommandUsage(System.err);
		System.exit(2);
	}

	// Configure the join type
	JobConf conf = new JobConf("Cartesian Product");
	conf.setJarByClass(CartesianCommentComparison.class);
	conf.setMapperClass(CartesianMapper.class);
	conf.setNumReduceTasks(0);
	conf.setInputFormat(CartesianInputFormat.class);
	// Configure the input format
	CartesianInputFormat.setLeftInputInfo(conf, TextInputFormat.class, args[0]);
	CartesianInputFormat.setRightInputInfo(conf, TextInputFormat.class, args[0]);
	TextOutputFormat.setOutputPath(conf, new Path(args[1]));
	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(Text.class);
	RunningJob job = JobClient.runJob(conf);
	while (!job.isComplete()) {
		Thread.sleep(1000);
	}
	return job.isSuccessful() ? 0 : 1;
}
 
开发者ID:geftimov,项目名称:hadoop-map-reduce-patterns,代码行数:27,代码来源:CartesianCommentComparison.java

示例2: createJobConf

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
protected JobConf createJobConf() throws Exception {
	JobConf jobConf = KafkaETLJob.createJobConf("SimpleKafakETL", _topic, _props, getClass());
	
	jobConf.setMapperClass(SimpleKafkaETLMapper.class);
	KafkaETLInputFormat.setInputPaths(jobConf, new Path(_input));
	
	jobConf.setOutputKeyClass(LongWritable.class);
	jobConf.setOutputValueClass(Text.class);
	jobConf.setOutputFormat(TextOutputFormat.class);
	TextOutputFormat.setCompressOutput(jobConf, false);
	Path output = new Path(_output);
	FileSystem fs = output.getFileSystem(jobConf);
	if (fs.exists(output)) fs.delete(output);
	TextOutputFormat.setOutputPath(jobConf, output);
	
	jobConf.setNumReduceTasks(0);
	return jobConf;
}
 
开发者ID:yanfang724,项目名称:hadoop-consumer,代码行数:19,代码来源:SimpleKafkaETLJob.java

示例3: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
	TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	DataSet<Tuple2<Text, LongWritable>> words =
			text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
				.groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
			new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
	hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
	TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));

	// Output & Execute
	words.output(hadoopOutputFormat).setParallelism(1);
	env.execute("Hadoop Compat WordCount");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:33,代码来源:HadoopMapredCompatWordCount.java

示例4: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}
	
	final String inputPath = args[0];
	final String outputPath = args[1];
	
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	// Set up the Hadoop Input Format
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
	TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
	
	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
	
	DataSet<Tuple2<Text, LongWritable>> words = 
			text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
				.groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
	
	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = 
			new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
	hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
	TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
	
	// Output & Execute
	words.output(hadoopOutputFormat).setParallelism(1);
	env.execute("Hadoop Compat WordCount");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:33,代码来源:HadoopMapredCompatWordCount.java

示例5: getPlan

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public Plan getPlan(String... args) {
	// parse job parameters
	int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
	String dataInput = (args.length > 1 ? args[1] : "");
	String output    = (args.length > 2 ? args[2] : "");

	HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
			new TextInputFormat(), new JobConf(), "Input Lines");
	TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));


	MapOperator mapper = MapOperator.builder(new TokenizeLine())
			.input(source)
			.name("Tokenize Lines")
			.build();
	ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
			.input(mapper)
			.name("Count Words")
			.build();
	HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
	TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));

	Plan plan = new Plan(out, "Hadoop OutputFormat Example");
	plan.setDefaultParallelism(numSubTasks);
	return plan;
}
 
开发者ID:citlab,项目名称:vs.msc.ws14,代码行数:28,代码来源:WordCountWithOutputFormat.java

示例6: run

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 4) {
		printUsage();
	}
	Path userPath = new Path(args[0]);
	Path commentPath = new Path(args[1]);
	Path outputDir = new Path(args[2]);
	String joinType = args[3];
	JobConf conf = new JobConf("CompositeJoin");
	conf.setJarByClass(CompositeUserJoin.class);
	conf.setMapperClass(CompositeMapper.class);
	conf.setNumReduceTasks(0);
	// Set the input format class to a CompositeInputFormat class.
	// The CompositeInputFormat will parse all of our input files and output
	// records to our mapper.
	conf.setInputFormat(CompositeInputFormat.class);
	// The composite input format join expression will set how the records
	// are going to be read in, and in what input format.
	conf.set("mapred.join.expr", CompositeInputFormat.compose(joinType,
			KeyValueTextInputFormat.class, userPath, commentPath));
	TextOutputFormat.setOutputPath(conf, outputDir);
	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(Text.class);
	RunningJob job = JobClient.runJob(conf);
	while (!job.isComplete()) {
		Thread.sleep(1000);
	}
	return job.isSuccessful() ? 0 : 1;
}
 
开发者ID:geftimov,项目名称:hadoop-map-reduce-patterns,代码行数:31,代码来源:CompositeUserJoin.java

示例7: main

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    JetInstance client = Jet.newJetClient();

    String inputPath = args[0];
    String outputPath = args[1] + "_" + System.currentTimeMillis();

    DAG dag = new DAG();
    JobConf conf = new JobConf();
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(conf, new Path(inputPath));
    TextOutputFormat.setOutputPath(conf, new Path(outputPath));

    Vertex producer = dag.newVertex("reader", readHdfsP(conf,
            (k, v) -> v.toString())).localParallelism(3);

    Vertex tokenizer = dag.newVertex("tokenizer",
            flatMapP((String line) -> {
                StringTokenizer s = new StringTokenizer(line);
                return () -> s.hasMoreTokens() ? s.nextToken() : null;
            })
    );

    // word -> (word, count)
    Vertex accumulate = dag.newVertex("accumulate", accumulateByKeyP(wholeItem(), counting()));

    // (word, count) -> (word, count)
    Vertex combine = dag.newVertex("combine", combineByKeyP(counting()));
    Vertex consumer = dag.newVertex("writer", writeHdfsP(conf, entryKey(), entryValue())).localParallelism(1);

    dag.edge(between(producer, tokenizer))
       .edge(between(tokenizer, accumulate)
               .partitioned(wholeItem(), HASH_CODE))
       .edge(between(accumulate, combine)
               .distributed()
               .partitioned(entryKey()))
       .edge(between(combine, consumer));

    JobConfig config = new JobConfig();
    config.addClass(JetWordCount.class);

    try {
        long start = System.currentTimeMillis();
        client.newJob(dag, config).join();
        System.out.println("Time=" + (System.currentTimeMillis() - start));

    } finally {
        client.shutdown();
    }
}
 
开发者ID:hazelcast,项目名称:big-data-benchmark,代码行数:51,代码来源:JetWordCount.java

示例8: run

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
	// Get current configuration.
	Configuration conf = getConf();

	// Parse command line arguments.
	String inputPaths = args[0];
	String outputPath = args[1];

	JobConf job = new JobConf(conf);

	// Set input path.
	if (inputPaths.length() > 0) {
		List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
				.split(inputPaths));

		for (String segmentPath : segmentPaths) {
			LOG.info("Adding input path " + segmentPath);
			FileInputFormat.addInputPath(job, new Path(segmentPath));
		}
	} else {
		System.err.println("No input path found.");
		return 1;
	}

	// Set output path.
	if (outputPath.length() > 0) {
		LOG.info("Setting output path to " + outputPath);
		TextOutputFormat.setOutputPath(job, new Path(outputPath));
		// Compress output to boost performance.
		TextOutputFormat.setCompressOutput(job, true);
		TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
	} else {
		System.err.println("No output path found.");
		return 1;
	}

	// Load other classes from same jar as this class.
	job.setJarByClass(OutputToText.class);

	// Input is Hadoop sequence file format.
	job.setInputFormat(SequenceFileInputFormat.class);

	// Output is text format for import into database later.
	job.setOutputFormat(TextOutputFormat.class);

	// Set the output data types.
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	// Use custom mapper class.
	job.setMapperClass(OutputToTextMapper.class);

	// Use standard reducer class.
	job.setReducerClass(IdentityReducer.class);

	if (JobClient.runJob(job).isSuccessful())
		return 0;
	else
		return 1;
}
 
开发者ID:rossf7,项目名称:wikireverse,代码行数:61,代码来源:OutputToText.java

示例9: run

import org.apache.hadoop.mapred.TextOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
	// Get current configuration.
	Configuration conf = getConf();

	// Parse command line arguments.
	String inputPaths = args[0];
	String outputPath = args[1];

	JobConf job = new JobConf(conf);

	// Set input paths.
	if (inputPaths.length() > 0) {
		List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
				.split(inputPaths));

		for (String segmentPath : segmentPaths) {
			LOG.info("Adding input path " + segmentPath);
			FileInputFormat.addInputPath(job, new Path(segmentPath));
		}
	} else {
		System.err.println("No input path found.");
		return 1;
	}

	// Set output path.
	if (outputPath.length() > 0) {
		LOG.info("Setting output path to " + outputPath);
		TextOutputFormat.setOutputPath(job, new Path(outputPath));
		// Compress output to boost performance.
		TextOutputFormat.setCompressOutput(job, true);
		TextOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
	} else {
		System.err.println("No output path found.");
		return 1;
	}

	// Load other classes from same jar as this class.
	job.setJarByClass(SegmentCombiner.class);

	// Input is Hadoop sequence file format.
	job.setInputFormat(SequenceFileInputFormat.class);

	// Output to text file format.
	job.setOutputFormat(TextOutputFormat.class);

	// Set the output data types.
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);

	// Use custom mapper class.
	job.setMapperClass(SegmentCombinerMapper.class);

	// Use standard reducer class.
	job.setReducerClass(LongSumReducer.class);

	if (JobClient.runJob(job).isSuccessful())
		return 0;
	else
		return 1;
}
 
开发者ID:rossf7,项目名称:elasticrawl-examples,代码行数:61,代码来源:SegmentCombiner.java


注:本文中的org.apache.hadoop.mapred.TextOutputFormat.setOutputPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。