当前位置: 首页>>代码示例>>Java>>正文


Java Job.getInstance方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.getInstance方法的典型用法代码示例。如果您正苦于以下问题:Java Job.getInstance方法的具体用法?Java Job.getInstance怎么用?Java Job.getInstance使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.Job的用法示例。


在下文中一共展示了Job.getInstance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
开发者ID:yogykwan,项目名称:mapreduce-samples,代码行数:21,代码来源:UnitSum.java

示例2: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
开发者ID:aadishgoel2013,项目名称:Hadoop-Codes,代码行数:17,代码来源:MaxTempDriver.java

示例3: doVerify

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:27,代码来源:IntegrationTestLoadAndVerify.java

示例4: testEmptyOutput

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:TestMRCJCFileOutputCommitter.java

示例5: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSum");
  job.setJarByClass(TeraChecksum.class);
  job.setMapperClass(ChecksumMapper.class);
  job.setReducerClass(ChecksumReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Unsigned16.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:20,代码来源:TeraChecksum.java

示例6: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		job.setMapperClass(DataDividerMapper.class);
		job.setReducerClass(DataDividerReducer.class);

		job.setJarByClass(DataDividerByUser.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(Text.class);

		TextInputFormat.setInputPaths(job, new Path(args[0]));
		TextOutputFormat.setOutputPath(job, new Path(args[1]));

		job.waitForCompletion(true);
	}
 
开发者ID:yogykwan,项目名称:mapreduce-samples,代码行数:21,代码来源:DataDividerByUser.java

示例7: testSplitSampler

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Verify SplitSampler contract, that an equal number of records are taken
 * from the first splits.
 */
@Test
@SuppressWarnings("unchecked") // IntWritable comparator not typesafe
public void testSplitSampler() throws Exception {
  final int TOT_SPLITS = 15;
  final int NUM_SPLITS = 5;
  final int STEP_SAMPLE = 5;
  final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
  InputSampler.Sampler<IntWritable,NullWritable> sampler =
    new InputSampler.SplitSampler<IntWritable,NullWritable>(
        NUM_SAMPLES, NUM_SPLITS);
  int inits[] = new int[TOT_SPLITS];
  for (int i = 0; i < TOT_SPLITS; ++i) {
    inits[i] = i * STEP_SAMPLE;
  }
  Job ignored = Job.getInstance();
  Object[] samples = sampler.getSample(
      new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
  assertEquals(NUM_SAMPLES, samples.length);
  Arrays.sort(samples, new IntWritable.Comparator());
  for (int i = 0; i < NUM_SAMPLES; ++i) {
    assertEquals(i, ((IntWritable)samples[i]).get());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:28,代码来源:TestInputSampler.java

示例8: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        configuration.set("dictionary", args[2]);

        Job job = Job.getInstance(configuration);
        job.setJarByClass(SentimentAnalysis.class);
        job.setMapperClass(SentimentSplit.class);
        job.setReducerClass(SentimentCollection.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
 
开发者ID:yogykwan,项目名称:mapreduce-samples,代码行数:17,代码来源:SentimentAnalysis.java

示例9: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClass(AveragePageCount.class);
    bookJob.setMapperClass(TextMapper.class);
    bookJob.setReducerClass(AverageReduce.class);
    bookJob.setOutputKeyClass(Text.class);
    bookJob.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}
 
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:16,代码来源:AveragePageCount.java

示例10: testMapFileOutputCommitterInternal

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testMapFileOutputCommitterInternal(int version)
    throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
      version);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());    
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeMapFileOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:TestFileOutputCommitter.java

示例11: testWithConf

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testWithConf(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException, URISyntaxException {
  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);


  Job job = Job.getInstance(conf);
  job.setMapperClass(DistributedCacheCheckerMapper.class);
  job.setReducerClass(DistributedCacheCheckerReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
    new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  assertTrue(job.waitForCompletion(false));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:TestMRWithDistributedCache.java

示例12: testGzipWithTwoInputs

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Test using the gzip codec with two input files.
 */
@Test (timeout=5000)
public void testGzipWithTwoInputs() throws Exception {
  CompressionCodec gzip = new GzipCodec();
  localFs.delete(workDir, true);
  Job job = Job.getInstance(defaultConf);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job.getConfiguration(), 5);
  ReflectionUtils.setConf(gzip, job.getConfiguration());
  FileInputFormat.setInputPaths(job, workDir);
  // Create files with fixed length records with 5 byte long records.
  writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, 
      "one  two  threefour five six  seveneightnine ten  ");
  writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
      "ten  nine eightsevensix  five four threetwo  one  ");
  List<InputSplit> splits = format.getSplits(job);
  assertEquals("compressed splits == 2", 2, splits.size());
  FileSplit tmp = (FileSplit) splits.get(0);
  if (tmp.getPath().getName().equals("part2.txt.gz")) {
    splits.set(0, splits.get(1));
    splits.set(1, tmp);
  }
  List<String> results = readSplit(format, splits.get(0), job);
  assertEquals("splits[0] length", 10, results.size());
  assertEquals("splits[0][5]", "six  ", results.get(5));
  results = readSplit(format, splits.get(1), job);
  assertEquals("splits[1] length", 10, results.size());
  assertEquals("splits[1][0]", "ten  ", results.get(0));
  assertEquals("splits[1][1]", "nine ", results.get(1));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:33,代码来源:TestFixedLengthInputFormat.java

示例13: createJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Create Job object for submitting it, with all the configuration
 *
 * @return Reference to job object.
 * @throws IOException - Exception if any
 */
private Job createJob() throws IOException {
  String jobName = "distcp";
  String userChosenName = getConf().get(JobContext.JOB_NAME);
  if (userChosenName != null)
    jobName += ": " + userChosenName;
  Job job = Job.getInstance(getConf());
  job.setJobName(jobName);
  job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
  job.setJarByClass(CopyMapper.class);
  configureOutputFormat(job);

  job.setMapperClass(CopyMapper.class);
  job.setNumReduceTasks(0);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setOutputFormatClass(CopyOutputFormat.class);
  job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
  job.getConfiguration().set(JobContext.NUM_MAPS,
                String.valueOf(inputOptions.getMaxMaps()));

  if (inputOptions.getSslConfigurationFile() != null) {
    setupSSLConfig(job);
  }

  inputOptions.appendToConf(job.getConfiguration());
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:34,代码来源:DistCp.java

示例14: total

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void total(String name, String in, String out)
		throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	conf.set(QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Total.class);

	// in
	if (!in.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(TotalMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	// reduce
	job.setCombinerClass(TotalReducer.class);
	job.setReducerClass(TotalReducer.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(out));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.waitForCompletion(true);
}
 
开发者ID:amritbhat786,项目名称:DocIT,代码行数:32,代码来源:Total.java

示例15: testCommitter

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // write output
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
  writeOutput(theRecordWriter, tContext);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);

  // validate output
  File expectedFile = new File(new Path(outDir, partFile).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:37,代码来源:TestMRCJCFileOutputCommitter.java


注:本文中的org.apache.hadoop.mapreduce.Job.getInstance方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。