当前位置: 首页>>代码示例>>Java>>正文


Java JobConf.setOutputFormat方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobConf.setOutputFormat方法的典型用法代码示例。如果您正苦于以下问题:Java JobConf.setOutputFormat方法的具体用法?Java JobConf.setOutputFormat怎么用?Java JobConf.setOutputFormat使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.JobConf的用法示例。


在下文中一共展示了JobConf.setOutputFormat方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testInputFormat

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:23,代码来源:TestTableInputFormat.java

示例2: getJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Sets up a job conf for the given job using the given config object. Ensures
 * that the correct input format is set, the mapper and and reducer class and
 * the input and output keys and value classes along with any other job
 * configuration.
 * 
 * @param config
 * @return JobConf representing the job to be ran
 * @throws IOException
 */
private JobConf getJob(ConfigExtractor config) throws IOException {
  JobConf job = new JobConf(config.getConfig(), SliveTest.class);
  job.setInputFormat(DummyInputFormat.class);
  FileOutputFormat.setOutputPath(job, config.getOutputPath());
  job.setMapperClass(SliveMapper.class);
  job.setPartitionerClass(SlivePartitioner.class);
  job.setReducerClass(SliveReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormat(TextOutputFormat.class);
  TextOutputFormat.setCompressOutput(job, false);
  job.setNumReduceTasks(config.getReducerAmount());
  job.setNumMapTasks(config.getMapAmount());
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:SliveTest.java

示例3: initTableReduceJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
  Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
  boolean addDependencyJars) throws IOException {
  job.setOutputFormat(TableOutputFormat.class);
  job.setReducerClass(reducer);
  job.set(TableOutputFormat.OUTPUT_TABLE, table);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Put.class);
  job.setStrings("io.serializations", job.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName());
  if (partitioner == HRegionPartitioner.class) {
    job.setPartitionerClass(HRegionPartitioner.class);
    int regions =
      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
    if (job.getNumReduceTasks() > regions) {
      job.setNumReduceTasks(regions);
    }
  } else if (partitioner != null) {
    job.setPartitionerClass(partitioner);
  }
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  initCredentials(job);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:39,代码来源:TableMapReduceUtil.java

示例4: setOutput

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
private static DBConfiguration setOutput(JobConf job, String tableName) {
  job.setOutputFormat(DBOutputFormat.class);
  job.setReduceSpeculativeExecution(false);

  DBConfiguration dbConf = new DBConfiguration(job);
  
  dbConf.setOutputTableName(tableName);
  return dbConf;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:10,代码来源:DBOutputFormat.java

示例5: setOutputFormatClass

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Set the underlying output format for LazyOutputFormat.
 * @param job the {@link JobConf} to modify
 * @param theClass the underlying class
 */
@SuppressWarnings("unchecked")
public static void  setOutputFormatClass(JobConf job, 
    Class<? extends OutputFormat> theClass) {
    job.setOutputFormat(LazyOutputFormat.class);
    job.setClass("mapreduce.output.lazyoutputformat.outputformat", theClass, OutputFormat.class);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:12,代码来源:LazyOutputFormat.java

示例6: doTestWithMapReduce

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
    String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
    int expectedNumSplits, boolean shutdownCluster) throws Exception {

  //create the table and snapshot
  createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);

  if (shutdownCluster) {
    util.shutdownMiniHBaseCluster();
  }

  try {
    // create the job
    JobConf jobConf = new JobConf(util.getConfiguration());

    jobConf.setJarByClass(util.getClass());
    org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(jobConf,
      TestTableSnapshotInputFormat.class);

    TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
      TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, jobConf, true, tableDir);

    jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
    jobConf.setNumReduceTasks(1);
    jobConf.setOutputFormat(NullOutputFormat.class);

    RunningJob job = JobClient.runJob(jobConf);
    Assert.assertTrue(job.isSuccessful());
  } finally {
    if (!shutdownCluster) {
      util.getHBaseAdmin().deleteSnapshot(snapshotName);
      util.deleteTable(tableName);
    }
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:37,代码来源:TestTableSnapshotInputFormat.java

示例7: submitAsMapReduce

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Based on args we submit the LoadGenerator as MR job.
 * Number of MapTasks is numMapTasks
 * @return exitCode for job submission
 */
private int submitAsMapReduce() {
  
  System.out.println("Running as a MapReduce job with " + 
      numMapTasks + " mapTasks;  Output to file " + mrOutDir);


  Configuration conf = new Configuration(getConf());
  
  // First set all the args of LoadGenerator as Conf vars to pass to MR tasks

  conf.set(LG_ROOT , root.toString());
  conf.setInt(LG_MAXDELAYBETWEENOPS, maxDelayBetweenOps);
  conf.setInt(LG_NUMOFTHREADS, numOfThreads);
  conf.set(LG_READPR, readProbs[0]+""); //Pass Double as string
  conf.set(LG_WRITEPR, writeProbs[0]+""); //Pass Double as string
  conf.setLong(LG_SEED, seed); //No idea what this is
  conf.setInt(LG_NUMMAPTASKS, numMapTasks);
  if (scriptFile == null && durations[0] <=0) {
    System.err.println("When run as a MapReduce job, elapsed Time or ScriptFile must be specified");
    System.exit(-1);
  }
  conf.setLong(LG_ELAPSEDTIME, durations[0]);
  conf.setLong(LG_STARTTIME, startTime); 
  if (scriptFile != null) {
    conf.set(LG_SCRIPTFILE , scriptFile);
  }
  conf.set(LG_FLAGFILE, flagFile.toString());
  
  // Now set the necessary conf variables that apply to run MR itself.
  JobConf jobConf = new JobConf(conf, LoadGenerator.class);
  jobConf.setJobName("NNLoadGeneratorViaMR");
  jobConf.setNumMapTasks(numMapTasks);
  jobConf.setNumReduceTasks(1); // 1 reducer to collect the results

  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(IntWritable.class);

  jobConf.setMapperClass(MapperThatRunsNNLoadGenerator.class);
  jobConf.setReducerClass(ReducerThatCollectsLGdata.class);

  jobConf.setInputFormat(DummyInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  jobConf.setMaxMapAttempts(1);
  // Explicitly turn off speculative execution
  jobConf.setSpeculativeExecution(false);

  // This mapReduce job has no input but has output
  FileOutputFormat.setOutputPath(jobConf, new Path(mrOutDir));

  try {
    JobClient.runJob(jobConf);
  } catch (IOException e) {
    System.err.println("Failed to run job: " + e.getMessage());
    return -1;
  }
  return 0;
  
}
 
开发者ID:naver,项目名称:hadoop,代码行数:66,代码来源:LoadGeneratorMR.java

示例8: setupPipesJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
private static void setupPipesJob(JobConf conf) throws IOException {
  // default map output types to Text
  if (!getIsJavaMapper(conf)) {
    conf.setMapRunnerClass(PipesMapRunner.class);
    // Save the user's partitioner and hook in our's.
    setJavaPartitioner(conf, conf.getPartitionerClass());
    conf.setPartitionerClass(PipesPartitioner.class);
  }
  if (!getIsJavaReducer(conf)) {
    conf.setReducerClass(PipesReducer.class);
    if (!getIsJavaRecordWriter(conf)) {
      conf.setOutputFormat(NullOutputFormat.class);
    }
  }
  String textClassname = Text.class.getName();
  setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
  setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
  setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
  setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);
  
  // Use PipesNonJavaInputFormat if necessary to handle progress reporting
  // from C++ RecordReaders ...
  if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
    conf.setClass(Submitter.INPUT_FORMAT, 
                  conf.getInputFormat().getClass(), InputFormat.class);
    conf.setInputFormat(PipesNonJavaInputFormat.class);
  }
  
  String exec = getExecutable(conf);
  if (exec == null) {
    throw new IllegalArgumentException("No application program defined.");
  }
  // add default debug script only when executable is expressed as
  // <path>#<executable>
  if (exec.contains("#")) {
    // set default gdb commands for map and reduce task 
    String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
    setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT,defScript);
    setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT,defScript);
  }
  URI[] fileCache = DistributedCache.getCacheFiles(conf);
  if (fileCache == null) {
    fileCache = new URI[1];
  } else {
    URI[] tmp = new URI[fileCache.length+1];
    System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
    fileCache = tmp;
  }
  try {
    fileCache[0] = new URI(exec);
  } catch (URISyntaxException e) {
    IOException ie = new IOException("Problem parsing execable URI " + exec);
    ie.initCause(e);
    throw ie;
  }
  DistributedCache.setCacheFiles(fileCache, conf);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:58,代码来源:Submitter.java

示例9: createDataJoinJob

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
      System.out.println("Using TextInputFormat: " + args[2]);
      inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
      System.out.println("Using SequenceFileOutputFormat: " + args[7]);
      outputFormat = SequenceFileOutputFormat.class;
      outputValueClass = getClassByName(args[7]);
    } else {
      System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
      maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
      jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job,
            SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
  }
 
开发者ID:naver,项目名称:hadoop,代码行数:59,代码来源:DataJoinJob.java

示例10: main

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    JetInstance client = Jet.newJetClient();

    String inputPath = args[0];
    String outputPath = args[1] + "_" + System.currentTimeMillis();

    DAG dag = new DAG();
    JobConf conf = new JobConf();
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(conf, new Path(inputPath));
    TextOutputFormat.setOutputPath(conf, new Path(outputPath));

    Vertex producer = dag.newVertex("reader", readHdfsP(conf,
            (k, v) -> v.toString())).localParallelism(3);

    Vertex tokenizer = dag.newVertex("tokenizer",
            flatMapP((String line) -> {
                StringTokenizer s = new StringTokenizer(line);
                return () -> s.hasMoreTokens() ? s.nextToken() : null;
            })
    );

    // word -> (word, count)
    Vertex accumulate = dag.newVertex("accumulate", accumulateByKeyP(wholeItem(), counting()));

    // (word, count) -> (word, count)
    Vertex combine = dag.newVertex("combine", combineByKeyP(counting()));
    Vertex consumer = dag.newVertex("writer", writeHdfsP(conf, entryKey(), entryValue())).localParallelism(1);

    dag.edge(between(producer, tokenizer))
       .edge(between(tokenizer, accumulate)
               .partitioned(wholeItem(), HASH_CODE))
       .edge(between(accumulate, combine)
               .distributed()
               .partitioned(entryKey()))
       .edge(between(combine, consumer));

    JobConfig config = new JobConfig();
    config.addClass(JetWordCount.class);

    try {
        long start = System.currentTimeMillis();
        client.newJob(dag, config).join();
        System.out.println("Time=" + (System.currentTimeMillis() - start));

    } finally {
        client.shutdown();
    }
}
 
开发者ID:hazelcast,项目名称:big-data-benchmark,代码行数:51,代码来源:JetWordCount.java


注:本文中的org.apache.hadoop.mapred.JobConf.setOutputFormat方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。