当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFileOutputFormat.setOutputPath方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputPath方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputPath方法的具体用法?Java SequenceFileOutputFormat.setOutputPath怎么用?Java SequenceFileOutputFormat.setOutputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat的用法示例。


在下文中一共展示了SequenceFileOutputFormat.setOutputPath方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    final Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setInputFormatClass(ElementInputFormat.class);
    job.setMapperClass(AMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:21,代码来源:InputFormatTest.java

示例2: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  if (args.length != 2) {
    System.out.printf("Usage: CreateSequenceFile <input dir> <output dir>\n");
    return -1;
  }

  Job job = new Job(getConf());
  job.setJarByClass(CreateSequenceFile.class);
  job.setJobName("Create Sequence File");
  
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  FileInputFormat.setInputPaths(job, new Path(args[0]));
  SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
  
  FileOutputFormat.setCompressOutput(job,true);
  FileOutputFormat.setOutputCompressorClass(job,SnappyCodec.class);
  
  SequenceFileOutputFormat.setOutputCompressionType(job,
  CompressionType.BLOCK);
  boolean success = job.waitForCompletion(true);
  return success ? 0 : 1;
}
 
开发者ID:mellowonpsx,项目名称:cloudera-homework,代码行数:27,代码来源:CreateSequenceFile.java

示例3: createJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
		throws IOException
{
	final Job job = new Job(config);

	job.setInputFormatClass(SequenceFileInputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(BytesWritable.class);
	job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);

	SequenceFileInputFormat.setInputPaths(job, inputPaths);
	SequenceFileOutputFormat.setOutputPath(job, outputPath);
	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

	if (metadata != null)
		MetadataConfiguration.setMetadata(metadata, job.getConfiguration());

	return job;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:22,代码来源:TextBytesJobUtil.java

示例4: total

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void total(String name, String in, String out)
		throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	conf.set(QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Total.class);

	// in
	if (!in.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(TotalMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	// reduce
	job.setCombinerClass(TotalReducer.class);
	job.setReducerClass(TotalReducer.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(out));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.waitForCompletion(true);
}
 
开发者ID:amritbhat786,项目名称:DocIT,代码行数:32,代码来源:Total.java

示例5: createJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(String name, String base) throws IOException {
	Configuration conf = new Configuration();
	conf.set(Total.QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Cut.class);

	// in
	String in = base;
	if (!base.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(CutMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Employee.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(base + "/tmp"));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Employee.class);

	return job;
}
 
开发者ID:amritbhat786,项目名称:DocIT,代码行数:28,代码来源:Cut.java

示例6: runPartitionerJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
private int runPartitionerJob() throws Exception
{
  Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
  Configuration partitionerConf = partitionerJob.getConfiguration();
  partitionerConf.set("mapred.map.tasks.speculative.execution", "false");

  configurePartitionerJob(partitionerJob);
  
  List<Path> inputPaths = new ArrayList<Path>();
  SortedSet<String> languages = new TreeSet<String>();
  FileSystem fs = FileSystem.get(partitionerConf);
  Path parent = new Path(partitionerConf.get("wikipedia.input"));
  listFiles(parent, fs, inputPaths, languages);
  
  System.out.println("Input files in " + parent + ":" + inputPaths.size());
  Path[] inputPathsArray = new Path[inputPaths.size()];
  inputPaths.toArray(inputPathsArray);
  
  System.out.println("Languages:" + languages.size());

  // setup input format
  
  WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
  
  partitionerJob.setMapperClass(WikipediaPartitioner.class);
  partitionerJob.setNumReduceTasks(0);

  // setup output format
  partitionerJob.setMapOutputKeyClass(Text.class);
  partitionerJob.setMapOutputValueClass(Article.class);
  partitionerJob.setOutputKeyClass(Text.class);
  partitionerJob.setOutputValueClass(Article.class);
  partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
  Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
  SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
  SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
  SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
  
  return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:apache,项目名称:accumulo-wikisearch,代码行数:41,代码来源:WikipediaPartitionedIngester.java

示例7: startJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
 * creates and submits a job, updates file index and job index
 */
private Job startJob(String jobName, Set<String> lostFiles, Priority priority,
    long detectTime)
throws IOException, InterruptedException, ClassNotFoundException {
  Path inDir = new Path(JOB_NAME_PREFIX + "/in/" + jobName);
  Path outDir = new Path(JOB_NAME_PREFIX + "/out/" + jobName);
  List<String> filesInJob = createInputFile(
      jobName, inDir, lostFiles);
  if (filesInJob.isEmpty()) return null;

  Configuration jobConf = new Configuration(getConf());
  DistBlockIntegrityMonitor.updateBlockFixerMapreduceConfigs(jobConf, BLOCKFIXER);
  RaidUtils.parseAndSetOptions(jobConf, priority.configOption);
  Job job = new Job(jobConf, jobName);
  job.getConfiguration().set(CORRUPT_FILE_DETECT_TIME, Long.toString(detectTime));
  configureJob(job, this.RECONSTRUCTOR_CLASS);
  job.setJarByClass(getClass());
  job.setMapperClass(ReconstructionMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(ReconstructionInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  ReconstructionInputFormat.setInputPaths(job, inDir);
  SequenceFileOutputFormat.setOutputPath(job, outDir);
  

  submitJob(job, filesInJob, priority);
  List<LostFileInfo> fileInfos =
    updateFileIndex(jobName, filesInJob, priority);
  // The implementation of submitJob() need not update jobIndex.
  // So check if the job exists in jobIndex before updating jobInfos.
  if (jobIndex.containsKey(job)) {
    jobIndex.put(job, fileInfos);
  }
  numJobsRunning.incrementAndGet();
  return job;
}
 
开发者ID:rhli,项目名称:hadoop-EAR,代码行数:42,代码来源:DistBlockIntegrityMonitor.java

示例8: initTabToSeqFileJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initTabToSeqFileJob(Job job, String intable, String outpath, String auths) throws AccumuloSecurityException {

    Configuration conf = job.getConfiguration();
    String username = conf.get(USERNAME);
    String password = conf.get(PASSWORD);
    String instance = conf.get(INSTANCE);
    String zookeepers = conf.get(ZOOKEEPERS);

    System.out.println("Zookeepers are " + auths);

    if (zookeepers != null) {
      AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers);
    } else {
      throw new IllegalArgumentException("Must specify either mock or zookeepers");
    }

    AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password));
    AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths));
    AccumuloInputFormat.setInputTableName(job, intable);
    job.setInputFormatClass(AccumuloInputFormat.class);
    job.setMapOutputKeyClass(CompositeType.class);
    job.setMapOutputValueClass(TripleCard.class);

    // OUTPUT
    SequenceFileOutputFormat.setOutputPath(job, new Path(outpath));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(CompositeType.class);
    job.setOutputValueClass(TripleCard.class);

  }
 
开发者ID:apache,项目名称:incubator-rya,代码行数:31,代码来源:JoinSelectStatsUtil.java

示例9: initJoinMRJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass,
    String outPath, String auths) throws AccumuloSecurityException {

  MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass);
  MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass);
  job.setMapOutputKeyClass(CompositeType.class);
  job.setMapOutputValueClass(TripleCard.class);

  SequenceFileOutputFormat.setOutputPath(job, new Path(outPath));
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(TripleEntry.class);
  job.setOutputValueClass(CardList.class);

}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:15,代码来源:JoinSelectStatsUtil.java

示例10: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    String outpath = conf.get(OUTPUTPATH);
    
    Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
    
    MultipleInputs.addInputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()), 
            SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
    MultipleInputs.addInputPath(job,new Path(SPOOUT.getAbsolutePath()) , 
            SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
    job.setMapOutputKeyClass(CompositeType.class);
    job.setMapOutputValueClass(TripleCard.class);

    tempDir = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "");
    SequenceFileOutputFormat.setOutputPath(job, new Path(tempDir.getAbsolutePath()));
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(TripleEntry.class);
    job.setOutputValueClass(CardList.class);


    job.setSortComparatorClass(JoinSelectSortComparator.class);
    job.setGroupingComparatorClass(JoinSelectGroupComparator.class);
    job.setPartitionerClass(JoinSelectPartitioner.class);
    job.setReducerClass(JoinReducer.class);
    job.setNumReduceTasks(32);
    job.waitForCompletion(true);
    
    return job.isSuccessful() ? 0 : 1;          
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:34,代码来源:JoinSelectStatisticsTest.java

示例11: initTabToSeqFileJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initTabToSeqFileJob(Job job, String intable, String outpath) throws AccumuloSecurityException, IOException {
   
   Configuration conf = job.getConfiguration();
  
   String username = conf.get(USERNAME);
   System.out.println("Username is " + username);
   String password = conf.get(PASSWORD);
   String instance = conf.get(INSTANCE);
   System.out.println("Instance is " + instance);
   
  
   AccumuloInputFormat.setMockInstance(job, instance);
   AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password));
   AccumuloInputFormat.setInputTableName(job, intable);
   
   job.setInputFormatClass(AccumuloInputFormat.class);
   job.setMapOutputKeyClass(CompositeType.class);
   job.setMapOutputValueClass(TripleCard.class);

   System.out.println("Outpath is " + outpath);
   
   // OUTPUT
   if(outpath.equals("spo")) {
       SPOOUT = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "spo");
       SequenceFileOutputFormat.setOutputPath(job, new Path(SPOOUT.getAbsolutePath()));
   } else {
       PROSPECTSOUT = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "prospects");
       SequenceFileOutputFormat.setOutputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()));
   }
   job.setOutputFormatClass(SequenceFileOutputFormat.class);
   job.setOutputKeyClass(CompositeType.class);
   job.setOutputValueClass(TripleCard.class);
    
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:35,代码来源:JoinSelectStatisticsTest.java

示例12: configureSchemaOutput

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj",
        SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:17,代码来源:AbstractReasoningTool.java

示例13: configureDerivationOutput

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
 * Set up a MapReduce job to output newly derived triples.
 * @param   intermediate    True if this is intermediate data. Outputs
 *                          to [base]-[iteration]-[temp].
 */
protected void configureDerivationOutput(boolean intermediate) {
    Path outPath;
    Configuration conf = job.getConfiguration();
    int iteration = MRReasoningUtils.getCurrentIteration(conf);
    if (intermediate) {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration
            + MRReasoningUtils.TEMP_SUFFIX);
    }
    else {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration);
    }
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
        SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    // Set up an output for diagnostic info, if needed
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:34,代码来源:AbstractReasoningTool.java

示例14: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
    PreSortConfig config = new PreSortConfig();
    config.fromArray(args);

    Job job = Job.getInstance(getConf());
    job.setJobName("pre-sort");
    job.setJarByClass(PreSortDriver.class);

    Path mapInputPath = new Path(config.getInput());
    Path mapOutputPath = new Path(config.getOutput());
    LOGGER.info("use " + mapInputPath.toString() + " as pre-sort input ");
    LOGGER.info("use " + mapOutputPath.toString() + " as pre-sort output ");

    // define the mapper
    job.getConfiguration().set(PreSortMapper.COLUMN_INDEX_CONFIG_NAME, config.getKeyColumnAsString());
    job.setMapperClass(PreSortMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, mapInputPath);

    // define reducer
    job.setNumReduceTasks(NUM_REDUCER);

    // define the output, NOTE: we do not have reducer
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, mapOutputPath);

    // clean up the output folder
    mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);

    // run the job and wait until it complete
    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:at15,项目名称:tree-index,代码行数:35,代码来源:PreSortDriver.java

示例15: setupOutput

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
    if (null != operation.getCompressionCodec()) {
        if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
            LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
        } else {
            SequenceFileOutputFormat.setCompressOutput(job, true);
            SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        }
    }
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:14,代码来源:AccumuloSampleDataForSplitPointsJobFactory.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。