本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileInputFormat类的具体用法?Java SequenceFileInputFormat怎么用?Java SequenceFileInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SequenceFileInputFormat类属于org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了SequenceFileInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "calculating parameter";
conf.set("params",String.valueOf(params));
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(indexToCountWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CalParamsMapper.class);
job.setReducerClass(CalParamsReducer.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job,output);
job.setJarByClass(LDADriver.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("calculating parameter failed");
}
}
示例2: Run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void Run(String input, Configuration conf)
throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(conf);
// job.setJobName(Hdfs2es.class.getName());
job.setJarByClass(Hdfs2es.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(MapTask.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(EsOutputFormat.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(input));
job.setSpeculativeExecution(false);
job.waitForCompletion(true);
}
示例3: joinAs
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static void joinAs(String jointype,
Class<? extends SimpleCheckerMapBase<?>> map,
Class<? extends SimpleCheckerReduceBase> reduce) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
conf.setInt("testdatamerge.sources", srcs);
Job job = Job.getInstance(conf);
job.setInputFormatClass(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(map);
job.setReducerClass(reduce);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
if ("outer".equals(jointype)) {
checkOuterConsistency(job, src);
}
base.getFileSystem(conf).delete(base, true);
}
示例4: configureJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
示例5: getJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
protected static Job getJob(String jobname, Configuration inputConf,
String inputpath, String outputpath)
throws Exception {
final Configuration conf = new Configuration(inputConf);
conf.set("fileoutputpath", outputpath);
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputpath))) {
fs.delete(new Path(outputpath), true);
}
fs.close();
final Job job = Job.getInstance(conf, jobname);
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputpath));
FileOutputFormat.setOutputPath(job, new Path(outputpath));
return job;
}
示例6: getJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private Job getJob(Configuration conf, String jobName,
String inputpath, String outputpath) throws IOException {
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputpath))) {
fs.delete(new Path(outputpath), true);
}
fs.close();
final Job job = Job.getInstance(conf, jobName);
job.setJarByClass(NonSortTestMR.class);
job.setMapperClass(NonSortTestMR.Map.class);
job.setReducerClass(NonSortTestMR.KeyHashSumReduce.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputpath));
FileOutputFormat.setOutputPath(job, new Path(outputpath));
return job;
}
示例7: getCompressJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static Job getCompressJob(String jobname, Configuration conf,
String inputpath, String outputpath)
throws Exception {
Job job = Job.getInstance(conf, jobname + "-CompressMapperJob");
job.setJarByClass(CompressMapper.class);
job.setMapperClass(TextCompressMapper.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
// if output file exists ,delete it
final FileSystem hdfs = FileSystem.get(new ScenarioConfiguration());
if (hdfs.exists(new Path(outputpath))) {
hdfs.delete(new Path(outputpath), true);
}
hdfs.close();
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputpath));
FileOutputFormat.setOutputPath(job, new Path(outputpath));
return job;
}
示例8: KVJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public KVJob(String jobname, Configuration conf,
Class<?> keyclass, Class<?> valueclass,
String inputpath, String outputpath) throws Exception {
job = Job.getInstance(conf, jobname);
job.setJarByClass(KVJob.class);
job.setMapperClass(KVJob.ValueMapper.class);
job.setOutputKeyClass(keyclass);
job.setMapOutputValueClass(valueclass);
if (conf.get(TestConstants.NATIVETASK_KVTEST_CREATEFILE).equals("true")) {
final FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(inputpath), true);
fs.close();
final TestInputFile testfile = new TestInputFile(Integer.valueOf(conf.get(
TestConstants.FILESIZE_KEY, "1000")),
keyclass.getName(), valueclass.getName(), conf);
StopWatch sw = new StopWatch().start();
testfile.createSequenceTestFile(inputpath);
LOG.info("Created test file " + inputpath + " in "
+ sw.now(TimeUnit.MILLISECONDS) + "ms");
}
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputpath));
FileOutputFormat.setOutputPath(job, new Path(outputpath));
}
示例9: genBigItemMap
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private boolean genBigItemMap(String input, String output) throws IOException, ClassNotFoundException,
InterruptedException {
Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(InverseMapper.class);
job.setReducerClass(ItemBigRebasingReducer.class);
job.setNumReduceTasks(1);
return job.waitForCompletion(true);
}
示例10: run
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public void run(Configuration conf, Path matrixInputPath,
String meanSpanFileName, Path matrixOutputPath) throws IOException,
InterruptedException, ClassNotFoundException {
conf.set(MEANSPANOPTION, meanSpanFileName);
Job job = new Job(conf);
job.setJobName("Norm2Job");
job.setJarByClass(Norm2Job.class);
FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
matrixInputPath = fs.makeQualified(matrixInputPath);
matrixOutputPath = fs.makeQualified(matrixOutputPath);
FileInputFormat.addInputPath(job, matrixInputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, matrixOutputPath);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.submit();
job.waitForCompletion(true);
}
示例11: runIteration
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
int iterationNumber, int maxIterations, int numReduceTasks)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Iteration %d of %d, input path: %s",
iterationNumber, maxIterations, modelInput);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setMapperClass(CachingCVB0Mapper.class);
job.setCombinerClass(VectorSumReducer.class);
job.setReducerClass(VectorSumReducer.class);
job.setNumReduceTasks(numReduceTasks);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, corpusInput);
FileOutputFormat.setOutputPath(job, modelOutput);
setModelPaths(job, modelInput);
HadoopUtil.delete(conf, modelOutput);
if (!job.waitForCompletion(true)) {
throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
iterationNumber));
}
}
示例12: runJob
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void runJob(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf, "UnitVectorizerJob");
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(UnitVectorizerMapper.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(UnitVectorizerJob.class);
boolean succeeded = job.waitForCompletion(true);
if (!succeeded) {
throw new IllegalStateException("Job failed!");
}
}
示例13: calculatePerplexity
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "Calculating perplexity for " + modelPath;
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CachingCVB0PerplexityMapper.class);
job.setMapperClass(CachingCVB0PerplexityMapper.class);
job.setCombinerClass(DualDoubleSumReducer.class);
job.setReducerClass(DualDoubleSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(DoubleWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, corpusPath);
Path outputPath = perplexityPath(modelPath.getParent(), iteration);
FileOutputFormat.setOutputPath(job, outputPath);
setModelPaths(job, modelPath);
HadoopUtil.delete(conf, outputPath);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
}
return readPerplexity(conf, modelPath.getParent(), iteration);
}
示例14: postProcessMR
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
/**
* Process as a map reduce job. The numberOfReduceTasks is set to the number of clusters present in the
* output. So that each cluster's vector is written in its own part file.
*
* @param conf
* The hadoop configuration.
* @param input
* The output path provided to the clustering algorithm, whose would be post processed. Hint : The
* path of the directory containing clusters-*-final and clusteredPoints.
* @param output
* The post processed data would be stored at this path.
*/
private static void postProcessMR(Configuration conf, Path input, Path output) throws IOException,
InterruptedException,
ClassNotFoundException {
Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(ClusterOutputPostProcessorMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(VectorWritable.class);
job.setReducerClass(ClusterOutputPostProcessorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(VectorWritable.class);
int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf);
job.setNumReduceTasks(numberOfClusters);
job.setJarByClass(ClusterOutputPostProcessorDriver.class);
FileInputFormat.addInputPath(job, new Path(input, new Path("clusteredPoints")));
FileOutputFormat.setOutputPath(job, output);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("ClusterOutputPostProcessor Job failed processing " + input);
}
}
示例15: writeTopicModel
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
throws IOException, InterruptedException, ClassNotFoundException {
String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CVB0Driver.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(CVB0TopicTermVectorNormalizerMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, modelInput);
FileOutputFormat.setOutputPath(job, output);
job.submit();
return job;
}