本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.setNumReduceTasks方法的典型用法代码示例。如果您正苦于以下问题:Java Job.setNumReduceTasks方法的具体用法?Java Job.setNumReduceTasks怎么用?Java Job.setNumReduceTasks使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.setNumReduceTasks方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s [generic options] <input> <output>\n",
getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Job job = new Job(getConf(), "Text to Parquet");
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(TextToParquetMapper.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
AvroParquetOutputFormat.setSchema(job, SCHEMA);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(Group.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例2: configureJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
示例3: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSum");
job.setJarByClass(TeraChecksum.class);
job.setMapperClass(ChecksumMapper.class);
job.setReducerClass(ChecksumReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Unsigned16.class);
// force a single reducer
job.setNumReduceTasks(1);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例4: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例5: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob() throws IOException {
final Configuration conf = new Configuration();
final Job baseJob = Job.getInstance(conf);
baseJob.setOutputKeyClass(Text.class);
baseJob.setOutputValueClass(IntWritable.class);
baseJob.setMapperClass(NewMapTokenizer.class);
baseJob.setCombinerClass(NewSummer.class);
baseJob.setReducerClass(NewSummer.class);
baseJob.setNumReduceTasks(1);
baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1);
baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50");
baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize(
baseJob, Long.MAX_VALUE);
return baseJob;
}
示例6: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJobName(NAME + "_" + tableName);
job.setJarByClass(Export.class);
// Set optional scan parameters
Scan s = getConfiguredScanForJob(conf, args);
IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
// No reducers. Just write straight to output files.
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Result.class);
FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
return job;
}
示例7: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
String reportSeparatorString = (args.length > 2) ? args[2]: ":";
conf.set("ReportSeparator", reportSeparatorString);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(CellCounter.class);
Scan scan = getConfiguredScanForJob(conf, args);
TableMapReduceUtil.initTableMapperJob(tableName, scan,
CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
job.setNumReduceTasks(1);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setReducerClass(IntSumReducer.class);
return job;
}
示例8: doVerify
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
ClassNotFoundException {
job = new Job(getConf());
job.setJobName("Link Verifier");
job.setNumReduceTasks(numReducers);
job.setJarByClass(getClass());
setJobScannerConf(job);
Scan scan = new Scan();
scan.addColumn(FAMILY_NAME, COLUMN_PREV);
scan.setCaching(10000);
scan.setCacheBlocks(false);
String[] split = labels.split(COMMA);
scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
split[(this.labelIndex * 2) + 1]));
TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
job.setReducerClass(VerifyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputDir);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例9: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "analyser_logdata");
// 设置本地提交job,集群运行,需要代码
// File jarFile = EJob.createTempJar("target/classes");
// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
// 设置本地提交job,集群运行,需要代码结束
job.setJarByClass(AnalyserLogDataRunner.class);
job.setMapperClass(AnalyserLogDataMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Put.class);
// 设置reducer配置
// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
// null, job);
// 2. 本地运行,要求参数addDependencyJars为false
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
示例10: runJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void runJob(Path input, Path output, String vectorClassName,Configuration config)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = config;
conf.set("vector.implementation.class.name", vectorClassName);
Job job = new Job(conf, "Input Driver running over input: " + input);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(VectorWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(InputMapper.class);
job.setNumReduceTasks(0);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
job.waitForCompletion(true);
}
示例11: testScanFromConfiguration
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Tests an MR Scan initialized from properties set in the Configuration.
*
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
protected void testScanFromConfiguration(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
"To" + (stop != null ? stop.toUpperCase() : "Empty");
Configuration c = new Configuration(TEST_UTIL.getConfiguration());
c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
c.set(KEY_STARTROW, start != null ? start : "");
c.set(KEY_LASTROW, last != null ? last : "");
if (start != null) {
c.set(TableInputFormat.SCAN_ROW_START, start);
}
if (stop != null) {
c.set(TableInputFormat.SCAN_ROW_STOP, stop);
}
Job job = new Job(c, jobName);
job.setMapperClass(ScanMapper.class);
job.setReducerClass(ScanReducer.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(ImmutableBytesWritable.class);
job.setInputFormatClass(TableInputFormat.class);
job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
TableMapReduceUtil.addDependencyJars(job);
assertTrue(job.waitForCompletion(true));
}
示例12: configureNumReduceTasks
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
protected int configureNumReduceTasks(Job job) throws IOException {
if (job.getNumReduceTasks() < 1) {
job.setNumReduceTasks(1);
}
return job.getNumReduceTasks();
}
示例13: runSpecTest
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private Job runSpecTest(boolean mapspec, boolean redspec)
throws IOException, ClassNotFoundException, InterruptedException {
Path first = createTempFile("specexec_map_input1", "a\nz");
Path secnd = createTempFile("specexec_map_input2", "a\nz");
Configuration conf = mrCluster.getConfig();
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
TestSpecEstimator.class,
TaskRuntimeEstimator.class);
Job job = Job.getInstance(conf);
job.setJarByClass(TestSpeculativeExecution.class);
job.setMapperClass(SpeculativeMapper.class);
job.setReducerClass(SpeculativeReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, first);
FileInputFormat.addInputPath(job, secnd);
FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
// Delete output directory if it exists.
try {
localFs.delete(TEST_OUT_DIR,true);
} catch (IOException e) {
// ignore
}
// Creates the Job Configuration
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setMaxMapAttempts(2);
job.submit();
return job;
}
示例14: doTestWithMapReduce
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
int expectedNumSplits, boolean shutdownCluster) throws Exception {
//create the table and snapshot
createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
if (shutdownCluster) {
util.shutdownMiniHBaseCluster();
}
try {
// create the job
Job job = new Job(util.getConfiguration());
Scan scan = new Scan(startRow, endRow); // limit the scan
job.setJarByClass(util.getClass());
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
TestTableSnapshotInputFormat.class);
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
NullWritable.class, job, true, tableDir);
job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(NullOutputFormat.class);
Assert.assertTrue(job.waitForCompletion(true));
} finally {
if (!shutdownCluster) {
util.getHBaseAdmin().deleteSnapshot(snapshotName);
util.deleteTable(tableName);
}
}
}
示例15: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createSubmittableJob(String[] args) throws IOException {
Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
generatePartitions(partitionsPath);
Job job = Job.getInstance(getConf(),
getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
Configuration jobConf = job.getConfiguration();
jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
job.setJarByClass(HashTable.class);
TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
// use a TotalOrderPartitioner and reducers to group region output into hash files
job.setPartitionerClass(TotalOrderPartitioner.class);
TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
job.setReducerClass(Reducer.class); // identity reducer
job.setNumReduceTasks(tableHash.numHashFiles);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(ImmutableBytesWritable.class);
job.setOutputFormatClass(MapFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
return job;
}