本文整理匯總了Java中org.apache.hadoop.mapred.SequenceFileInputFormat.addInputPath方法的典型用法代碼示例。如果您正苦於以下問題:Java SequenceFileInputFormat.addInputPath方法的具體用法?Java SequenceFileInputFormat.addInputPath怎麽用?Java SequenceFileInputFormat.addInputPath使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapred.SequenceFileInputFormat
的用法示例。
在下文中一共展示了SequenceFileInputFormat.addInputPath方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: SetSeqFileInputOutput
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void SetSeqFileInputOutput(JobConf job, String inputPaths, Path output) throws IOException {
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, output);
// Expand input pattern.
FileSystem fs = FileSystem.get(job);
String[] paths = inputPaths.split(",");
for (String p : paths) {
int lastslash = p.lastIndexOf("/");
if (lastslash < 0) {
p = "./" + p;
lastslash = 1;
}
String parent = p.substring(0, lastslash);
p = p.substring(lastslash + 1);
// Each path is treated as a pattern.
p = p.replace("\\", "\\\\");
p = p.replace(".", "\\.");
p = p.replace("*", ".*");
p = p.replace("?", ".");
LOG.info("Use pattern:" + p);
Pattern re = Pattern.compile(p);
// List all files.
FileStatus[] files = fs.listStatus(new Path(parent));
for (FileStatus f : files) {
if (re.matcher(f.getPath().getName()).matches()) {
SequenceFileInputFormat.addInputPath(job, f.getPath());
LOG.info("Adding input:" + f.getPath());
}
}
}
}
示例2: setInputOutput
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static JobConf setInputOutput(JobConf job, Path inputPath, Path outputPath)
throws IOException {
job.setInputFormat(NonSplitableSequenceInputFormat.class);
SequenceFileInputFormat.addInputPath(job, inputPath);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(MultiSeqOutput.class);
MultiSeqOutput.setOutputPath(job, outputPath);
return job;
}
示例3: main
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
* Sets the job configurations including the mapper and reducer classes to
* do the sorting based on vector lengths.
*/
public static void main(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJobName(LengthSortMain.class.getSimpleName());
job.setJarByClass(LengthSortMain.class);
job.setMapperClass(LengthSortMapper.class);
job.setMapOutputKeyClass(FloatWritable.class);
job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
job.setPartitionerClass(LengthRangePartitioner.class);
job.setReducerClass(LengthSortReducer.class);
job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
SortDriver.NUM_REDUCE_VALUE));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(FeatureWeightArrayWritable.class);
//
// set input & output
//
String inputDir = SortDriver.INPUT_DIR;
if (inputDir == null) {
throw new UnsupportedOperationException("ERROR: input path not set");
}
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
Path outputPath = new Path(SortDriver.OUTPUT_DIR);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
//
// run
//
JobSubmitter.run(job, "Sort By Vector Lenghts",-1);
}
示例4: main
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
* Main method sets the job configurations including the mapper and reducer
* classes to do the sorting. Some of the produced partitions might be
* merged later to reflect the number of partitions chosen by the user.
*/
public static void main(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJobName("NormSort");
job.setJarByClass(NormSortMain.class);
job.setMapperClass(NormSortMapper.class);
job.setMapOutputKeyClass(FloatWritable.class);
job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
job.setPartitionerClass(NormRangePartitioner.class);
job.setReducerClass(NormSortReducer.class);
job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
SortDriver.NUM_REDUCE_VALUE));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(FeatureWeightArrayWritable.class);
//
// set input & output
//
String inputDir = SortDriver.INPUT_DIR;
if (inputDir == null) {
throw new UnsupportedOperationException("ERROR: input path not set");
}
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
Path outputPath = new Path(SortDriver.OUTPUT_DIR);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
//
// run
//
JobSubmitter.run(job,"Sort By p-norm",-1);
}
示例5: main
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
* Sets the job configurations including the mapper and reducer classes to
* do the sorting based signatures.
*/
public static void main(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJobName(SigSortMain.class.getSimpleName());
job.setJarByClass(SigSortMain.class);
job.setMapperClass(SigSortMapper.class);
job.setMapOutputKeyClass(BitSignature.class);
job.setMapOutputValueClass(LongWritable.class);
job.setPartitionerClass(SigRangePartitioner.class);
job.setReducerClass(SigSortReducer.class);
job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
SortDriver.NUM_REDUCE_VALUE));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(BitSignature.class);
//
// set input & output
//
String inputDir = SortDriver.INPUT_DIR;
if (inputDir == null) {
throw new UnsupportedOperationException("ERROR: input path not set");
}
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
Path outputPath = new Path(OUTPUT_PATH);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
//
// run
//
JobSubmitter.run(job,"Sort By Signature Bytes",-1);
}
示例6: main
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
/**
* Main method sets the job configurations including the mapper and reducer
* classes to do the sorting.
*/
public static void main(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
// ToolRunner.printGenericCommandUsage(System.out);
job.setJobName(MaxwSortMain.class.getSimpleName());
job.setJarByClass(MaxwSortMain.class);
job.setMapperClass(MaxwSortMapper.class);
job.setMapOutputKeyClass(FloatWritable.class);
job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
job.setPartitionerClass(MaxwRangePartitioner.class);
job.setReducerClass(MaxwSortReducer.class);
job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
SortDriver.NUM_REDUCE_VALUE));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(FeatureWeightArrayWritable.class);
//
// set input & output
//
String inputDir = SortDriver.INPUT_DIR;
if (inputDir == null) {
throw new UnsupportedOperationException("ERROR: input path not set");
}
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
Path outputPath = new Path(SortDriver.OUTPUT_DIR);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
//
// run
//
JobSubmitter.run(job,"Sort By infinity-Norm",-1);
}
示例7: setSeqFileInputOutput
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void setSeqFileInputOutput(JobConf job, Path input, Path output) {
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileInputFormat.addInputPath(job, input);
SequenceFileOutputFormat.setOutputPath(job, output);
}
示例8: main
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
JobConf job = new JobConf();
job.setJobName("InvertedIndexDriver-BuildII");
job.setJarByClass(InvertedIndexDriver.class);
GenericOptionsParser gop = new GenericOptionsParser(job, args);
args = gop.getRemainingArgs();
if (args.length != 2)
printUsage();
//
// Job1
//
job.setMapperClass(InvertedMapper.class);
job.setReducerClass(InvertedReducer.class);
job.setNumReduceTasks(4);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DocWeight.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(DocWeightArrayWritable.class);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(args[0]));
job.setOutputFormat(SequenceFileOutputFormat.class);
Path interPath = new Path("inverted");
FileSystem.get(job).delete(interPath, true);
SequenceFileOutputFormat.setOutputPath(job, interPath);
HybridDriver.run(job);
//
// Collect statistics
//
//
// Job2
//
job = new JobConf(new Configuration());
job.setJarByClass(InvertedIndexDriver.class);
job.setJobName("InvertedIndexDriver-Similarity (SII)");
job.setMapperClass(InvertedSimMapper.class);
job.setReducerClass(InvertedSimReducer.class);
job.setNumReduceTasks(5);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path("inverted"));
job.setOutputFormat(SequenceFileOutputFormat.class);
Path outputPath = new Path(args[1]);
FileSystem.get(job).delete(outputPath, true);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
job.setOutputKeyClass(DocDocWritable.class);
job.setOutputValueClass(FloatWritable.class);
long t = System.currentTimeMillis();
HybridDriver.run(job);
System.out.println("Job took " + (System.currentTimeMillis() - t) + " millisec.");
}