当前位置: 首页>>代码示例>>Java>>正文


Java Job.setNumReduceTasks方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.setNumReduceTasks方法的典型用法代码示例。如果您正苦于以下问题:Java Job.setNumReduceTasks方法的具体用法?Java Job.setNumReduceTasks怎么用?Java Job.setNumReduceTasks使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.Job的用法示例。


在下文中一共展示了Job.setNumReduceTasks方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Job job = new Job(getConf(), "Text to Parquet");
    job.setJarByClass(getClass());
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(TextToParquetMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    AvroParquetOutputFormat.setSchema(job, SCHEMA);
    job.setOutputKeyClass(Void.class);
    job.setOutputValueClass(Group.class);
    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:mumuhadoop,项目名称:mumu-parquet,代码行数:22,代码来源:MapReduceParquetMapReducer.java

示例2: configureJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:SampleUploader.java

示例3: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSum");
  job.setJarByClass(TeraChecksum.class);
  job.setMapperClass(ChecksumMapper.class);
  job.setReducerClass(ChecksumReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Unsigned16.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:20,代码来源:TeraChecksum.java

示例4: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 
	
	
  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);
    
  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);
    
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:UserNamePermission.java

示例5: createJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob() throws IOException {
  final Configuration conf = new Configuration();
  final Job baseJob = Job.getInstance(conf);
  baseJob.setOutputKeyClass(Text.class);
  baseJob.setOutputValueClass(IntWritable.class);
  baseJob.setMapperClass(NewMapTokenizer.class);
  baseJob.setCombinerClass(NewSummer.class);
  baseJob.setReducerClass(NewSummer.class);
  baseJob.setNumReduceTasks(1);
  baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
  baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
  baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
      baseJob, Long.MAX_VALUE);
  return baseJob;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:TestJobCounters.java

示例6: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJobName(NAME + "_" + tableName);
  job.setJarByClass(Export.class);
  // Set optional scan parameters
  Scan s = getConfiguredScanForJob(conf, args);
  IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
  // No reducers.  Just write straight to output files.
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Result.class);
  FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:27,代码来源:Export.java

示例7: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:30,代码来源:CellCounter.java

示例8: doVerify

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
    ClassNotFoundException {
  job = new Job(getConf());

  job.setJobName("Link Verifier");
  job.setNumReduceTasks(numReducers);
  job.setJarByClass(getClass());

  setJobScannerConf(job);

  Scan scan = new Scan();
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);
  scan.setCaching(10000);
  scan.setCacheBlocks(false);
  String[] split = labels.split(COMMA);

  scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
      split[(this.labelIndex * 2) + 1]));

  TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

  job.setReducerClass(VerifyReducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, outputDir);
  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:33,代码来源:IntegrationTestBigLinkedListWithVisibility.java

示例9: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	this.processArgs(conf, args);

	Job job = Job.getInstance(conf, "analyser_logdata");

	// 设置本地提交job,集群运行,需要代码
	// File jarFile = EJob.createTempJar("target/classes");
	// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
	// 设置本地提交job,集群运行,需要代码结束

	job.setJarByClass(AnalyserLogDataRunner.class);
	job.setMapperClass(AnalyserLogDataMapper.class);
	job.setMapOutputKeyClass(NullWritable.class);
	job.setMapOutputValueClass(Put.class);
	// 设置reducer配置
	// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
	// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
	// null, job);
	// 2. 本地运行,要求参数addDependencyJars为false
	TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
			null, false);
	job.setNumReduceTasks(0);

	// 设置输入路径
	this.setJobInputPaths(job);
	return job.waitForCompletion(true) ? 0 : -1;
}
 
开发者ID:liuhaozzu,项目名称:big_data,代码行数:30,代码来源:AnalyserLogDataRunner.java

示例10: runJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void runJob(Path input, Path output, String vectorClassName,Configuration config)
  throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = config;
  conf.set("vector.implementation.class.name", vectorClassName);
  Job job = new Job(conf, "Input Driver running over input: " + input);
	 
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(VectorWritable.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(InputMapper.class);   
  job.setNumReduceTasks(0);
  job.setJarByClass(InputDriver.class);
  
  FileInputFormat.addInputPath(job, input);
  FileOutputFormat.setOutputPath(job, output);
  
  job.waitForCompletion(true);
}
 
开发者ID:PacktPublishing,项目名称:HBase-High-Performance-Cookbook,代码行数:19,代码来源:InputDriver.java

示例11: testScanFromConfiguration

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Tests an MR Scan initialized from properties set in the Configuration.
 * 
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScanFromConfiguration(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
  c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
  c.set(KEY_STARTROW, start != null ? start : "");
  c.set(KEY_LASTROW, last != null ? last : "");

  if (start != null) {
    c.set(TableInputFormat.SCAN_ROW_START, start);
  }

  if (stop != null) {
    c.set(TableInputFormat.SCAN_ROW_STOP, stop);
  }

  Job job = new Job(c, jobName);
  job.setMapperClass(ScanMapper.class);
  job.setReducerClass(ScanReducer.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(ImmutableBytesWritable.class);
  job.setInputFormatClass(TableInputFormat.class);
  job.setNumReduceTasks(1);
  FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
  TableMapReduceUtil.addDependencyJars(job);
  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:37,代码来源:TestTableInputFormatScanBase.java

示例12: configureNumReduceTasks

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
protected int configureNumReduceTasks(Job job) throws IOException {
  if (job.getNumReduceTasks() < 1) {
    job.setNumReduceTasks(1);
  }
  return job.getNumReduceTasks();
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:8,代码来源:PGBulkloadExportJob.java

示例13: runSpecTest

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:40,代码来源:TestSpeculativeExecution.java

示例14: doTestWithMapReduce

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
    String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
    int expectedNumSplits, boolean shutdownCluster) throws Exception {

  //create the table and snapshot
  createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);

  if (shutdownCluster) {
    util.shutdownMiniHBaseCluster();
  }

  try {
    // create the job
    Job job = new Job(util.getConfiguration());
    Scan scan = new Scan(startRow, endRow); // limit the scan

    job.setJarByClass(util.getClass());
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
      TestTableSnapshotInputFormat.class);

    TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
      scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, job, true, tableDir);

    job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);

    Assert.assertTrue(job.waitForCompletion(true));
  } finally {
    if (!shutdownCluster) {
      util.getHBaseAdmin().deleteSnapshot(snapshotName);
      util.deleteTable(tableName);
    }
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:37,代码来源:TestTableSnapshotInputFormat.java

示例15: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createSubmittableJob(String[] args) throws IOException {
  Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
  generatePartitions(partitionsPath);
  
  Job job = Job.getInstance(getConf(),
        getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
  Configuration jobConf = job.getConfiguration();
  jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
  job.setJarByClass(HashTable.class);

  TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
      HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
  
  // use a TotalOrderPartitioner and reducers to group region output into hash files
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
  job.setReducerClass(Reducer.class);  // identity reducer
  job.setNumReduceTasks(tableHash.numHashFiles);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(ImmutableBytesWritable.class);
  job.setOutputFormatClass(MapFileOutputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
  
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:26,代码来源:HashTable.java


注:本文中的org.apache.hadoop.mapreduce.Job.setNumReduceTasks方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。