当前位置: 首页>>代码示例>>Java>>正文


Java Job类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job的典型用法代码示例。如果您正苦于以下问题:Java Job类的具体用法?Java Job怎么用?Java Job使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Job类属于org.apache.hadoop.mapreduce包,在下文中一共展示了Job类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:myziyue,项目名称:learn-to-hadoop,代码行数:23,代码来源:MaxTemperatureWithCombiner.java

示例2: main

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
开发者ID:lzmhhh123,项目名称:Wikipedia-Index,代码行数:26,代码来源:TF_IDF.java

示例3: createCopyJob

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:25,代码来源:MapReduceTestUtil.java

示例4: main

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
开发者ID:aadishgoel2013,项目名称:Hadoop-Codes,代码行数:17,代码来源:MaxTempDriver.java

示例5: configureInputFormat

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:26,代码来源:TestImportJob.java

示例6: call

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  ugi.doAs(
    new PrivilegedExceptionAction<Job>() {
      public Job run() throws IOException, ClassNotFoundException,
                              InterruptedException {
        job.setMapperClass(LoadMapper.class);
        job.setReducerClass(LoadReducer.class);
        job.setNumReduceTasks(jobdesc.getNumberReduces());
        job.setMapOutputKeyClass(GridmixKey.class);
        job.setMapOutputValueClass(GridmixRecord.class);
        job.setSortComparatorClass(LoadSortComparator.class);
        job.setGroupingComparatorClass(SpecGroupingComparator.class);
        job.setInputFormatClass(LoadInputFormat.class);
        job.setOutputFormatClass(RawBytesOutputFormat.class);
        job.setPartitionerClass(DraftPartitioner.class);
        job.setJarByClass(LoadJob.class);
        job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
        FileOutputFormat.setOutputPath(job, outdir);
        job.submit();
        return job;
      }
    });

  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:27,代码来源:LoadJob.java

示例7: testChainMapNoOuptut

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
/**
 * Tests one of the maps consuming output.
 * 
 * @throws Exception
 */
public void testChainMapNoOuptut() throws Exception {
  Configuration conf = createJobConf();
  String expectedOutput = "";

  Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
  job.setJobName("chain");

  ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
      LongWritable.class, Text.class, null);

  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
      .readOutput(outDir, conf));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:TestChainErrors.java

示例8: testAddDependencyJars

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
/**
 * Look for jars we expect to be on the classpath by name.
 */
@Test
public void testAddDependencyJars() throws Exception {
  Job job = new Job();
  TableMapReduceUtil.addDependencyJars(job);
  String tmpjars = job.getConfiguration().get("tmpjars");

  // verify presence of modules
  assertTrue(tmpjars.contains("hbase-common"));
  assertTrue(tmpjars.contains("hbase-protocol"));
  assertTrue(tmpjars.contains("hbase-client"));
  assertTrue(tmpjars.contains("hbase-hadoop-compat"));
  assertTrue(tmpjars.contains("hbase-server"));

  // verify presence of 3rd party dependencies.
  assertTrue(tmpjars.contains("zookeeper"));
  assertTrue(tmpjars.contains("netty"));
  assertTrue(tmpjars.contains("protobuf"));
  assertTrue(tmpjars.contains("guava"));
  assertTrue(tmpjars.contains("htrace"));
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:24,代码来源:IntegrationTestTableMapReduceUtil.java

示例9: main

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
/**
 * Main entry point.
 *
 * @param args The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    System.exit(-1);
  }
  Job job = createSubmittableJob(conf, otherArgs);
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:28,代码来源:CellCounter.java

示例10: main

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "test");
	
	job.setMapperClass(testMapper.class);
	job.setPartitionerClass(testPartitioner.class);
	job.setReducerClass(testReducer.class);
	job.setNumReduceTasks(10);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
开发者ID:aadishgoel2013,项目名称:Hadoop-Codes,代码行数:19,代码来源:testDriver.java

示例11: configueAvroMergeJob

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:17,代码来源:MergeJob.java

示例12: configureJob

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:SampleUploader.java

示例13: testInputFormat

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:24,代码来源:TestTableInputFormat.java

示例14: runIncrementalPELoad

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
    RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException,
    InterruptedException, ClassNotFoundException {
  Job job = new Job(conf, "testLocalMRIncrementalLoad");
  job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());
  setupRandomGeneratorMapper(job);
  HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
  FileOutputFormat.setOutputPath(job, outDir);

  assertFalse(util.getTestFileSystem().exists(outDir)) ;

  assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:TestHFileOutputFormat2.java

示例15: doMapReduce

import org.apache.hadoop.mapreduce.Job; //导入依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:27,代码来源:PerformanceEvaluation.java


注:本文中的org.apache.hadoop.mapreduce.Job类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。