本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.getInstance方法的典型用法代码示例。如果您正苦于以下问题:Java Job.getInstance方法的具体用法?Java Job.getInstance怎么用?Java Job.getInstance使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.getInstance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
示例2: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxtemp");
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
示例3: doVerify
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
LOG.info("Verify output dir: " + outputDir);
Job job = Job.getInstance(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例4: testEmptyOutput
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void testEmptyOutput() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// Do not write any output
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
FileUtil.fullyDelete(new File(outDir.toString()));
}
示例5: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSum");
job.setJarByClass(TeraChecksum.class);
job.setMapperClass(ChecksumMapper.class);
job.setReducerClass(ChecksumReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Unsigned16.class);
// force a single reducer
job.setNumReduceTasks(1);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例6: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(DataDividerMapper.class);
job.setReducerClass(DataDividerReducer.class);
job.setJarByClass(DataDividerByUser.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例7: testSplitSampler
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Verify SplitSampler contract, that an equal number of records are taken
* from the first splits.
*/
@Test
@SuppressWarnings("unchecked") // IntWritable comparator not typesafe
public void testSplitSampler() throws Exception {
final int TOT_SPLITS = 15;
final int NUM_SPLITS = 5;
final int STEP_SAMPLE = 5;
final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
InputSampler.Sampler<IntWritable,NullWritable> sampler =
new InputSampler.SplitSampler<IntWritable,NullWritable>(
NUM_SAMPLES, NUM_SPLITS);
int inits[] = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i * STEP_SAMPLE;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(
new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable)samples[i]).get());
}
}
示例8: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
configuration.set("dictionary", args[2]);
Job job = Job.getInstance(configuration);
job.setJarByClass(SentimentAnalysis.class);
job.setMapperClass(SentimentSplit.class);
job.setReducerClass(SentimentCollection.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例9: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration con = new Configuration();
Job bookJob = Job.getInstance(con, "Average Page Count");
bookJob.setJarByClass(AveragePageCount.class);
bookJob.setMapperClass(TextMapper.class);
bookJob.setReducerClass(AverageReduce.class);
bookJob.setOutputKeyClass(Text.class);
bookJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
if (bookJob.waitForCompletion(true)) {
System.exit(0);
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:16,代码来源:AveragePageCount.java
示例10: testMapFileOutputCommitterInternal
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testMapFileOutputCommitterInternal(int version)
throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeMapFileOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
示例11: testWithConf
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testWithConf(Configuration conf) throws IOException,
InterruptedException, ClassNotFoundException, URISyntaxException {
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second =
makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third =
makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth =
makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(conf);
job.setMapperClass(DistributedCacheCheckerMapper.class);
job.setReducerClass(DistributedCacheCheckerReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
job.submit();
assertTrue(job.waitForCompletion(false));
}
示例12: testGzipWithTwoInputs
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Test using the gzip codec with two input files.
*/
@Test (timeout=5000)
public void testGzipWithTwoInputs() throws Exception {
CompressionCodec gzip = new GzipCodec();
localFs.delete(workDir, true);
Job job = Job.getInstance(defaultConf);
FixedLengthInputFormat format = new FixedLengthInputFormat();
format.setRecordLength(job.getConfiguration(), 5);
ReflectionUtils.setConf(gzip, job.getConfiguration());
FileInputFormat.setInputPaths(job, workDir);
// Create files with fixed length records with 5 byte long records.
writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip,
"one two threefour five six seveneightnine ten ");
writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip,
"ten nine eightsevensix five four threetwo one ");
List<InputSplit> splits = format.getSplits(job);
assertEquals("compressed splits == 2", 2, splits.size());
FileSplit tmp = (FileSplit) splits.get(0);
if (tmp.getPath().getName().equals("part2.txt.gz")) {
splits.set(0, splits.get(1));
splits.set(1, tmp);
}
List<String> results = readSplit(format, splits.get(0), job);
assertEquals("splits[0] length", 10, results.size());
assertEquals("splits[0][5]", "six ", results.get(5));
results = readSplit(format, splits.get(1), job);
assertEquals("splits[1] length", 10, results.size());
assertEquals("splits[1][0]", "ten ", results.get(0));
assertEquals("splits[1][1]", "nine ", results.get(1));
}
示例13: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Create Job object for submitting it, with all the configuration
*
* @return Reference to job object.
* @throws IOException - Exception if any
*/
private Job createJob() throws IOException {
String jobName = "distcp";
String userChosenName = getConf().get(JobContext.JOB_NAME);
if (userChosenName != null)
jobName += ": " + userChosenName;
Job job = Job.getInstance(getConf());
job.setJobName(jobName);
job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
job.setJarByClass(CopyMapper.class);
configureOutputFormat(job);
job.setMapperClass(CopyMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(CopyOutputFormat.class);
job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
job.getConfiguration().set(JobContext.NUM_MAPS,
String.valueOf(inputOptions.getMaxMaps()));
if (inputOptions.getSslConfigurationFile() != null) {
setupSSLConfig(job);
}
inputOptions.appendToConf(job.getConfiguration());
return job;
}
示例14: total
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void total(String name, String in, String out)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set(QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Total.class);
// in
if (!in.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(TotalMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
// reduce
job.setCombinerClass(TotalReducer.class);
job.setReducerClass(TotalReducer.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(out));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.waitForCompletion(true);
}
示例15: testCommitter
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
File expectedFile = new File(new Path(outDir, partFile).toString());
StringBuffer expectedOutput = new StringBuffer();
expectedOutput.append(key1).append('\t').append(val1).append("\n");
expectedOutput.append(val1).append("\n");
expectedOutput.append(val2).append("\n");
expectedOutput.append(key2).append("\n");
expectedOutput.append(key1).append("\n");
expectedOutput.append(key2).append('\t').append(val2).append("\n");
String output = UtilsForTests.slurp(expectedFile);
assertEquals(output, expectedOutput.toString());
FileUtil.fullyDelete(new File(outDir.toString()));
}