當前位置: 首頁>>代碼示例>>Java>>正文


Java SequenceFileInputFormat.addInputPath方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapred.SequenceFileInputFormat.addInputPath方法的典型用法代碼示例。如果您正苦於以下問題:Java SequenceFileInputFormat.addInputPath方法的具體用法?Java SequenceFileInputFormat.addInputPath怎麽用?Java SequenceFileInputFormat.addInputPath使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.mapred.SequenceFileInputFormat的用法示例。


在下文中一共展示了SequenceFileInputFormat.addInputPath方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: SetSeqFileInputOutput

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void SetSeqFileInputOutput(JobConf job, String inputPaths, Path output) throws IOException {
	job.setInputFormat(SequenceFileInputFormat.class);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	SequenceFileOutputFormat.setOutputPath(job, output);

	// Expand input pattern.
	FileSystem fs = FileSystem.get(job);
	String[] paths = inputPaths.split(",");
	for (String p : paths) {
		int lastslash = p.lastIndexOf("/");
		if (lastslash < 0) {
			p = "./" + p;
			lastslash = 1;
		}
		String parent = p.substring(0, lastslash);
		p = p.substring(lastslash + 1);
		// Each path is treated as a pattern.
		p = p.replace("\\", "\\\\");
		p = p.replace(".", "\\.");
		p = p.replace("*", ".*");
		p = p.replace("?", ".");
		LOG.info("Use pattern:" + p);
		Pattern re = Pattern.compile(p);
		// List all files.
		FileStatus[] files = fs.listStatus(new Path(parent));
		for (FileStatus f : files) {
			if (re.matcher(f.getPath().getName()).matches()) {
				SequenceFileInputFormat.addInputPath(job, f.getPath());
				LOG.info("Adding input:" + f.getPath());
			}
		}
	}
}
 
開發者ID:thunlp,項目名稱:THUTag,代碼行數:34,代碼來源:MapReduceHelper.java

示例2: setInputOutput

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static JobConf setInputOutput(JobConf job, Path inputPath, Path outputPath)
		throws IOException {
	job.setInputFormat(NonSplitableSequenceInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, inputPath);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(MultiSeqOutput.class);
	MultiSeqOutput.setOutputPath(job, outputPath);
	return job;
}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:10,代碼來源:CosinePartitioning.java

示例3: main

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based on vector lengths.
 */
public static void main(String[] args) throws IOException {

	JobConf job = new JobConf();
	new GenericOptionsParser(job, args);
	job.setJobName(LengthSortMain.class.getSimpleName());
	job.setJarByClass(LengthSortMain.class);
	job.setMapperClass(LengthSortMapper.class);
	job.setMapOutputKeyClass(FloatWritable.class);
	job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

	job.setPartitionerClass(LengthRangePartitioner.class);

	job.setReducerClass(LengthSortReducer.class);
	job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
			SortDriver.NUM_REDUCE_VALUE));
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(FeatureWeightArrayWritable.class);
	//
	// set input & output
	//
	String inputDir = SortDriver.INPUT_DIR;
	if (inputDir == null) {
		throw new UnsupportedOperationException("ERROR: input path not set");
	}
	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
	Path outputPath = new Path(SortDriver.OUTPUT_DIR);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	FileOutputFormat.setOutputPath(job, outputPath);

	//
	// run
	//
	JobSubmitter.run(job, "Sort By Vector Lenghts",-1);
}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:41,代碼來源:LengthSortMain.java

示例4: main

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting. Some of the produced partitions might be
 * merged later to reflect the number of partitions chosen by the user.
 */
public static void main(String[] args) throws IOException {

	JobConf job = new JobConf();
	new GenericOptionsParser(job, args);
	job.setJobName("NormSort");
	job.setJarByClass(NormSortMain.class);
	job.setMapperClass(NormSortMapper.class);
	job.setMapOutputKeyClass(FloatWritable.class);
	job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

	job.setPartitionerClass(NormRangePartitioner.class);

	job.setReducerClass(NormSortReducer.class);
	job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
			SortDriver.NUM_REDUCE_VALUE));
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(FeatureWeightArrayWritable.class);
	//
	// set input & output
	//
	String inputDir = SortDriver.INPUT_DIR;
	if (inputDir == null) {
		throw new UnsupportedOperationException("ERROR: input path not set");
	}
	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
	Path outputPath = new Path(SortDriver.OUTPUT_DIR);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	FileOutputFormat.setOutputPath(job, outputPath);
	//
	// run
	//
	JobSubmitter.run(job,"Sort By p-norm",-1);
}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:41,代碼來源:NormSortMain.java

示例5: main

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based signatures.
 */
public static void main(String[] args) throws IOException {

	JobConf job = new JobConf();
	new GenericOptionsParser(job, args);
	job.setJobName(SigSortMain.class.getSimpleName());
	job.setJarByClass(SigSortMain.class);
	job.setMapperClass(SigSortMapper.class);
	job.setMapOutputKeyClass(BitSignature.class);
	job.setMapOutputValueClass(LongWritable.class);

	job.setPartitionerClass(SigRangePartitioner.class);

	job.setReducerClass(SigSortReducer.class);
	job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
			SortDriver.NUM_REDUCE_VALUE));
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(BitSignature.class);
	//
	// set input & output
	//
	String inputDir = SortDriver.INPUT_DIR;
	if (inputDir == null) {
		throw new UnsupportedOperationException("ERROR: input path not set");
	}
	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
	Path outputPath = new Path(OUTPUT_PATH);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	FileOutputFormat.setOutputPath(job, outputPath);

	//
	// run
	//
	JobSubmitter.run(job,"Sort By Signature Bytes",-1);
}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:41,代碼來源:SigSortMain.java

示例6: main

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting.
 */
public static void main(String[] args) throws IOException {

	JobConf job = new JobConf();
	new GenericOptionsParser(job, args);
	// ToolRunner.printGenericCommandUsage(System.out);
	job.setJobName(MaxwSortMain.class.getSimpleName());
	job.setJarByClass(MaxwSortMain.class);
	job.setMapperClass(MaxwSortMapper.class);
	job.setMapOutputKeyClass(FloatWritable.class);
	job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

	job.setPartitionerClass(MaxwRangePartitioner.class);

	job.setReducerClass(MaxwSortReducer.class);
	job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
			SortDriver.NUM_REDUCE_VALUE));
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(FeatureWeightArrayWritable.class);
	//
	// set input & output
	//
	String inputDir = SortDriver.INPUT_DIR;
	if (inputDir == null) {
		throw new UnsupportedOperationException("ERROR: input path not set");
	}
	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
	Path outputPath = new Path(SortDriver.OUTPUT_DIR);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	FileOutputFormat.setOutputPath(job, outputPath);
	//
	// run
	//
	JobSubmitter.run(job,"Sort By infinity-Norm",-1);
}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:41,代碼來源:MaxwSortMain.java

示例7: setSeqFileInputOutput

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void setSeqFileInputOutput(JobConf job, Path input, Path output) {
	job.setInputFormat(SequenceFileInputFormat.class);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	SequenceFileInputFormat.addInputPath(job, input);
	SequenceFileOutputFormat.setOutputPath(job, output);
}
 
開發者ID:thunlp,項目名稱:THUTag,代碼行數:7,代碼來源:MapReduceHelper.java

示例8: main

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
	JobConf job = new JobConf();
	job.setJobName("InvertedIndexDriver-BuildII");
	job.setJarByClass(InvertedIndexDriver.class);
	GenericOptionsParser gop = new GenericOptionsParser(job, args);
	args = gop.getRemainingArgs();

	if (args.length != 2)
		printUsage();
	//
	// Job1
	//

	job.setMapperClass(InvertedMapper.class);
	job.setReducerClass(InvertedReducer.class);
	job.setNumReduceTasks(4);
	job.setMapOutputKeyClass(LongWritable.class);
	job.setMapOutputValueClass(DocWeight.class);
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(DocWeightArrayWritable.class);

	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path(args[0]));
	job.setOutputFormat(SequenceFileOutputFormat.class);
	Path interPath = new Path("inverted");
	FileSystem.get(job).delete(interPath, true);
	SequenceFileOutputFormat.setOutputPath(job, interPath);

	HybridDriver.run(job);

	//
	// Collect statistics
	//

	//
	// Job2
	//
	job = new JobConf(new Configuration());
	job.setJarByClass(InvertedIndexDriver.class);
	job.setJobName("InvertedIndexDriver-Similarity (SII)");
	job.setMapperClass(InvertedSimMapper.class);
	job.setReducerClass(InvertedSimReducer.class);
	job.setNumReduceTasks(5);
	job.setInputFormat(SequenceFileInputFormat.class);
	SequenceFileInputFormat.addInputPath(job, new Path("inverted"));

	job.setOutputFormat(SequenceFileOutputFormat.class);
	Path outputPath = new Path(args[1]);
	FileSystem.get(job).delete(outputPath, true);
	SequenceFileOutputFormat.setOutputPath(job, outputPath);

	job.setOutputKeyClass(DocDocWritable.class);
	job.setOutputValueClass(FloatWritable.class);
	long t = System.currentTimeMillis();
	HybridDriver.run(job);
	System.out.println("Job took " + (System.currentTimeMillis() - t) + " millisec.");

}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:59,代碼來源:InvertedIndexDriver.java


注:本文中的org.apache.hadoop.mapred.SequenceFileInputFormat.addInputPath方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。