Java SequenceFileInputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat类的典型用法代码示例。如果您正苦于以下问题：Java SequenceFileInputFormat类的具体用法？Java SequenceFileInputFormat怎么用？Java SequenceFileInputFormat使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

SequenceFileInputFormat类属于org.apache.hadoop.mapreduce.lib.input包，在下文中一共展示了SequenceFileInputFormat类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}

开发者ID:huyang1，项目名称:LDA，代码行数:25，代码来源:CalParamDriver.java

示例2: Run

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void Run(String input, Configuration conf) 
            throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(conf);
//        job.setJobName(Hdfs2es.class.getName());
        job.setJarByClass(Hdfs2es.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        
        job.setMapperClass(MapTask.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(EsOutputFormat.class);
        
        job.setNumReduceTasks(0);
        
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        
        FileInputFormat.addInputPath(job, new Path(input));
        
        
        job.setSpeculativeExecution(false);
        job.waitForCompletion(true);
    }

开发者ID:chaopengio，项目名称:elasticsearch-mapreduce，代码行数:25，代码来源:Hdfs2es.java

示例3: joinAs

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static void joinAs(String jointype, 
    Class<? extends SimpleCheckerMapBase<?>> map, 
    Class<? extends SimpleCheckerReduceBase> reduce) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  conf.setInt("testdatamerge.sources", srcs);
  Job job = Job.getInstance(conf);
  job.setInputFormatClass(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(map);
  job.setReducerClass(reduce);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  if ("outer".equals(jointype)) {
    checkOuterConsistency(job, src);
  }
  base.getFileSystem(conf).delete(base, true);
}

开发者ID:naver，项目名称:hadoop，代码行数:27，代码来源:TestJoinDatamerge.java

示例4: configureJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:19，代码来源:SampleUploader.java

示例5: getJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
protected static Job getJob(String jobname, Configuration inputConf,
                            String inputpath, String outputpath)
    throws Exception {
  final Configuration conf = new Configuration(inputConf);
  conf.set("fileoutputpath", outputpath);
  final FileSystem fs = FileSystem.get(conf);
  if (fs.exists(new Path(outputpath))) {
    fs.delete(new Path(outputpath), true);
  }
  fs.close();
  final Job job = Job.getInstance(conf, jobname);
  job.setJarByClass(WordCount.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  FileInputFormat.addInputPath(job, new Path(inputpath));
  FileOutputFormat.setOutputPath(job, new Path(outputpath));
  return job;
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:23，代码来源:CombinerTest.java

示例6: getJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private Job getJob(Configuration conf, String jobName,
                   String inputpath, String outputpath) throws IOException {
  final FileSystem fs = FileSystem.get(conf);
  if (fs.exists(new Path(outputpath))) {
    fs.delete(new Path(outputpath), true);
  }
  fs.close();
  final Job job = Job.getInstance(conf, jobName);
  job.setJarByClass(NonSortTestMR.class);
  job.setMapperClass(NonSortTestMR.Map.class);
  job.setReducerClass(NonSortTestMR.KeyHashSumReduce.class);
  job.setOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputValueClass(LongWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  FileInputFormat.addInputPath(job, new Path(inputpath));
  FileOutputFormat.setOutputPath(job, new Path(outputpath));
  return job;
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:21，代码来源:NonSortTest.java

示例7: getCompressJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static Job getCompressJob(String jobname, Configuration conf,
                                 String inputpath, String outputpath)
  throws Exception {
  Job job = Job.getInstance(conf, jobname + "-CompressMapperJob");
  job.setJarByClass(CompressMapper.class);
  job.setMapperClass(TextCompressMapper.class);
  job.setOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  // if output file exists ,delete it
  final FileSystem hdfs = FileSystem.get(new ScenarioConfiguration());
  if (hdfs.exists(new Path(outputpath))) {
    hdfs.delete(new Path(outputpath), true);
  }
  hdfs.close();
  job.setInputFormatClass(SequenceFileInputFormat.class);
  FileInputFormat.addInputPath(job, new Path(inputpath));
  FileOutputFormat.setOutputPath(job, new Path(outputpath));
  return job;
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:20，代码来源:CompressMapper.java

示例8: KVJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public KVJob(String jobname, Configuration conf,
             Class<?> keyclass, Class<?> valueclass,
             String inputpath, String outputpath) throws Exception {
  job = Job.getInstance(conf, jobname);
  job.setJarByClass(KVJob.class);
  job.setMapperClass(KVJob.ValueMapper.class);
  job.setOutputKeyClass(keyclass);
  job.setMapOutputValueClass(valueclass);
  
  if (conf.get(TestConstants.NATIVETASK_KVTEST_CREATEFILE).equals("true")) {
    final FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(inputpath), true);
    fs.close();
    final TestInputFile testfile = new TestInputFile(Integer.valueOf(conf.get(
        TestConstants.FILESIZE_KEY, "1000")),
        keyclass.getName(), valueclass.getName(), conf);
    StopWatch sw = new StopWatch().start();
    testfile.createSequenceTestFile(inputpath);
    LOG.info("Created test file " + inputpath + " in "
        + sw.now(TimeUnit.MILLISECONDS) + "ms");
  }
  job.setInputFormatClass(SequenceFileInputFormat.class);
  FileInputFormat.addInputPath(job, new Path(inputpath));
  FileOutputFormat.setOutputPath(job, new Path(outputpath));
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:26，代码来源:KVJob.java

示例9: genBigItemMap

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private boolean genBigItemMap(String input, String output) throws IOException, ClassNotFoundException,
		InterruptedException {
	Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
	job.setJarByClass(TopPIoverHadoop.class);

	job.setInputFormatClass(SequenceFileInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(IntWritable.class);

	FileInputFormat.addInputPath(job, new Path(input));
	FileOutputFormat.setOutputPath(job, new Path(output));

	job.setMapperClass(InverseMapper.class);
	job.setReducerClass(ItemBigRebasingReducer.class);
	job.setNumReduceTasks(1);

	return job.waitForCompletion(true);
}

开发者ID:slide-lig，项目名称:TopPI，代码行数:20，代码来源:TopPIoverHadoop.java

示例10: run

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public void run(Configuration conf, Path matrixInputPath,
    String meanSpanFileName, Path matrixOutputPath) throws IOException,
    InterruptedException, ClassNotFoundException {
  conf.set(MEANSPANOPTION, meanSpanFileName);
  Job job = new Job(conf);
  job.setJobName("Norm2Job");
  job.setJarByClass(Norm2Job.class);
  FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
  matrixInputPath = fs.makeQualified(matrixInputPath);
  matrixOutputPath = fs.makeQualified(matrixOutputPath);
  FileInputFormat.addInputPath(job, matrixInputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileOutputFormat.setOutputPath(job, matrixOutputPath);
  job.setMapperClass(MyMapper.class);
  job.setReducerClass(MyReducer.class);
  job.setNumReduceTasks(1);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.submit();
  job.waitForCompletion(true);
}

开发者ID:SiddharthMalhotra，项目名称:sPCA，代码行数:23，代码来源:Norm2Job.java

示例11: runIteration

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
                                int iterationNumber, int maxIterations, int numReduceTasks)
  throws IOException, ClassNotFoundException, InterruptedException {
  String jobName = String.format("Iteration %d of %d, input path: %s",
      iterationNumber, maxIterations, modelInput);
  log.info("About to run: " + jobName);
  Job job = new Job(conf, jobName);
  job.setJarByClass(CVB0Driver.class);
  job.setMapperClass(CachingCVB0Mapper.class);
  job.setCombinerClass(VectorSumReducer.class);
  job.setReducerClass(VectorSumReducer.class);
  job.setNumReduceTasks(numReduceTasks);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(VectorWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileInputFormat.addInputPath(job, corpusInput);
  FileOutputFormat.setOutputPath(job, modelOutput);
  setModelPaths(job, modelInput);
  HadoopUtil.delete(conf, modelOutput);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
        iterationNumber));
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Ave，代码行数:26，代码来源:CVB0Driver.java

示例12: runJob

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
public static void runJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
  
  Configuration conf = new Configuration();
  Job job = new Job(conf, "UnitVectorizerJob");
  
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(VectorWritable.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(UnitVectorizerMapper.class);
  job.setNumReduceTasks(0);
  
  FileInputFormat.addInputPath(job, input);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(UnitVectorizerJob.class);

  boolean succeeded = job.waitForCompletion(true);
  if (!succeeded) {
    throw new IllegalStateException("Job failed!");
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Ave，代码行数:24，代码来源:UnitVectorizerJob.java

示例13: calculatePerplexity

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
  throws IOException, ClassNotFoundException, InterruptedException {
  String jobName = "Calculating perplexity for " + modelPath;
  log.info("About to run: " + jobName);
  Job job = new Job(conf, jobName);
  job.setJarByClass(CachingCVB0PerplexityMapper.class);
  job.setMapperClass(CachingCVB0PerplexityMapper.class);
  job.setCombinerClass(DualDoubleSumReducer.class);
  job.setReducerClass(DualDoubleSumReducer.class);
  job.setNumReduceTasks(1);
  job.setOutputKeyClass(DoubleWritable.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileInputFormat.addInputPath(job, corpusPath);
  Path outputPath = perplexityPath(modelPath.getParent(), iteration);
  FileOutputFormat.setOutputPath(job, outputPath);
  setModelPaths(job, modelPath);
  HadoopUtil.delete(conf, outputPath);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
  }
  return readPerplexity(conf, modelPath.getParent(), iteration);
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Max，代码行数:25，代码来源:CVB0Driver.java

示例14: postProcessMR

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
/**
 * Process as a map reduce job. The numberOfReduceTasks is set to the number of clusters present in the
 * output. So that each cluster's vector is written in its own part file.
 * 
 * @param conf
 *          The hadoop configuration.
 * @param input
 *          The output path provided to the clustering algorithm, whose would be post processed. Hint : The
 *          path of the directory containing clusters-*-final and clusteredPoints.
 * @param output
 *          The post processed data would be stored at this path.
 */
private static void postProcessMR(Configuration conf, Path input, Path output) throws IOException,
                                                                              InterruptedException,
                                                                              ClassNotFoundException {
  Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(ClusterOutputPostProcessorMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(VectorWritable.class);
  job.setReducerClass(ClusterOutputPostProcessorReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(VectorWritable.class);
  int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf);
  job.setNumReduceTasks(numberOfClusters);
  job.setJarByClass(ClusterOutputPostProcessorDriver.class);
  
  FileInputFormat.addInputPath(job, new Path(input, new Path("clusteredPoints")));
  FileOutputFormat.setOutputPath(job, output);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("ClusterOutputPostProcessor Job failed processing " + input);
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Ave，代码行数:35，代码来源:ClusterOutputPostProcessorDriver.java

示例15: writeTopicModel

import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; //导入依赖的package包/类
private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
  log.info("About to run: " + jobName);
  Job job = new Job(conf, jobName);
  job.setJarByClass(CVB0Driver.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(CVB0TopicTermVectorNormalizerMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(VectorWritable.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileInputFormat.addInputPath(job, modelInput);
  FileOutputFormat.setOutputPath(job, output);
  job.submit();
  return job;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:18，代码来源:CVB0Driver.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。