本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.setOutputFormatClass方法的典型用法代码示例。如果您正苦于以下问题:Java Job.setOutputFormatClass方法的具体用法?Java Job.setOutputFormatClass怎么用?Java Job.setOutputFormatClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.setOutputFormatClass方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: configureJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
String tableName = args[0];
String columnFamily = args[1];
System.out.println("****" + tableName);
conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
conf.set(TableInputFormat.INPUT_TABLE, tableName);
conf.set("index.tablename", tableName);
conf.set("index.familyname", columnFamily);
String[] fields = new String[args.length - 2];
System.arraycopy(args, 2, fields, 0, fields.length);
conf.setStrings("index.fields", fields);
Job job = new Job(conf, tableName);
job.setJarByClass(IndexBuilder.class);
job.setMapperClass(Map.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(MultiTableOutputFormat.class);
return job;
}
示例2: runRandomInputGenerator
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
Integer width, Integer wrapMuplitplier) throws Exception {
LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
+ ", numNodes=" + numNodes);
Job job = Job.getInstance(getConf());
job.setJobName("Random Input Generator");
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.setInputFormatClass(GeneratorInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
job.setMapperClass(Mapper.class); //identity mapper
FileOutputFormat.setOutputPath(job, tmpOutput);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
boolean success = jobCompletion(job);
return success ? 0 : 1;
}
示例3: runJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = new Job(conf, "Input Drive running input:"+inputPath);
log.info("start running InputDriver");
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(indexToWordWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(InputMapper.class);
job.setReducerClass(InputReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, output);
boolean succeeded = job.waitForCompletion(true);
if (!succeeded) {
throw new IllegalStateException("Job failed!");
}
}
示例4: configueAvroMergeJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
throws IOException {
LOG.info("Trying to merge avro files");
final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
throw new IOException("Invalid schema for input directories. Schema for old data: ["
+ oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
}
LOG.debug("Avro Schema:" + oldPathSchema);
job.setInputFormatClass(AvroInputFormat.class);
job.setOutputFormatClass(AvroOutputFormat.class);
job.setMapperClass(MergeAvroMapper.class);
job.setReducerClass(MergeAvroReducer.class);
AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
示例5: main
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("MaxThreeLabel");
job.setJarByClass(MaxThreeLabel.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(MaxThreeLabelMap.class);
job.setReducerClass(MaxThreeLabelReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
示例6: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createJob()
throws IOException {
Configuration conf = getConf();
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf, "test");
job.setNumReduceTasks(1);
job.setJarByClass(CredentialsTestJob.class);
job.setNumReduceTasks(1);
job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
job.setInputFormatClass(SleepJob.SleepInputFormat.class);
job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("test job");
FileInputFormat.addInputPath(job, new Path("ignored"));
return job;
}
示例7: createJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob(String name, String base) throws IOException {
Configuration conf = new Configuration();
conf.set(Total.QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Cut.class);
// in
String in = base;
if (!base.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(CutMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Employee.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(base + "/tmp"));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Employee.class);
return job;
}
示例8: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClass(ActiveUserRunner.class);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class,
TimeOutputValue.class, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class,
// StatsUserDimension.class, TimeOutputValue.class, job);
// 设置reducer相关参数
job.setReducerClass(ActiveUserReducer.class);
job.setOutputKeyClass(StatsUserDimension.class);
job.setOutputValueClass(MapWritableValue.class);
// 设置output相关参数
job.setOutputFormatClass(TransformerOutputFormat.class);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:"
+ endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
示例9: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public Job createSubmittableJob(String[] args) throws IOException {
Job job = super.createSubmittableJob(args);
// Call my class instead.
job.setJarByClass(WALMapperSearcher.class);
job.setMapperClass(WALMapperSearcher.class);
job.setOutputFormatClass(NullOutputFormat.class);
return job;
}
示例10: setOutput
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static DBConfiguration setOutput(Job job,
String tableName) throws IOException {
job.setOutputFormatClass(DBOutputFormat.class);
ConfigurationHelper.setJobReduceSpeculativeExecution(job, false);
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
dbConf.setOutputTableName(tableName);
return dbConf;
}
示例11: testKilledJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testKilledJob(String fileName,
Class<? extends OutputFormat> output, String[] exclude) throws Exception {
Path outDir = getNewOutputDir();
Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir);
job.setOutputFormatClass(output);
job.submit();
// wait for the setup to be completed
while (job.setupProgress() != 1.0f) {
UtilsForTests.waitFor(100);
}
job.killJob(); // kill the job
assertFalse("Job did not get kill", job.waitForCompletion(true));
if (fileName != null) {
Path testFile = new Path(outDir, fileName);
assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs
.exists(testFile));
}
// check if the files from the missing set exists
for (String ex : exclude) {
Path file = new Path(outDir, ex);
assertFalse("File " + file + " should not be present for killed job "
+ job.getJobID(), fs.exists(file));
}
}
示例12: loadHCatTable
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public List<HCatRecord> loadHCatTable(String dbName,
String tableName, Map<String, String> partKeyMap,
HCatSchema tblSchema, List<HCatRecord> records)
throws Exception {
Job job = new Job(conf, "HCat load job");
job.setJarByClass(this.getClass());
job.setMapperClass(HCatWriterMapper.class);
// Just writ 10 lines to the file to drive the mapper
Path path = new Path(fs.getWorkingDirectory(),
"mapreduce/HCatTableIndexInput");
job.getConfiguration()
.setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
int writeCount = records.size();
recsToLoad.clear();
recsToLoad.addAll(records);
createInputFile(path, writeCount);
// input/output settings
HCatWriterMapper.setWrittenRecordCount(0);
FileInputFormat.setInputPaths(job, path);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(HCatOutputFormat.class);
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName,
partKeyMap);
HCatOutputFormat.setOutput(job, outputJobInfo);
HCatOutputFormat.setSchema(job, tblSchema);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
SqoopHCatUtilities.addJars(job, new SqoopOptions());
boolean success = job.waitForCompletion(true);
if (!success) {
throw new IOException("Loading HCatalog table with test records failed");
}
utils.invokeOutputCommitterForLocalMode(job);
LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
return recsToLoad;
}
示例13: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(configuration, "com.romanysik.util.Transposer");
job.setJarByClass(MRNMF.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(TMapper.class);
job.setReducerClass(TReducer.class);
job.waitForCompletion(true);
}
示例14: createSubmittableJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createSubmittableJob(String[] args) throws IOException {
FileSystem fs = sourceHashDir.getFileSystem(getConf());
if (!fs.exists(sourceHashDir)) {
throw new IOException("Source hash dir not found: " + sourceHashDir);
}
HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
LOG.info("Read source hash manifest: " + tableHash);
LOG.info("Read " + tableHash.partitions.size() + " partition keys");
if (!tableHash.tableName.equals(sourceTableName)) {
LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
+ tableHash.tableName + " but job is reading from: " + sourceTableName);
}
if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ " should be 1 more than the number of partition keys. However, the manifest file "
+ " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
+ " found in the partitions file is " + tableHash.partitions.size());
}
Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
int dataSubdirCount = 0;
for (FileStatus file : fs.listStatus(dataDir)) {
if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
dataSubdirCount++;
}
}
if (dataSubdirCount != tableHash.numHashFiles) {
throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
+ " should be 1 more than the number of partition keys. However, the number of data dirs"
+ " found is " + dataSubdirCount + " but the number of partition keys"
+ " found in the partitions file is " + tableHash.partitions.size());
}
Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
"syncTable_" + sourceTableName + "-" + targetTableName));
Configuration jobConf = job.getConfiguration();
job.setJarByClass(HashTable.class);
jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
if (sourceZkCluster != null) {
jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
}
if (targetZkCluster != null) {
jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
}
jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
SyncMapper.class, null, null, job);
job.setNumReduceTasks(0);
if (dryRun) {
job.setOutputFormatClass(NullOutputFormat.class);
} else {
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
targetZkCluster, null, null);
// would be nice to add an option for bulk load instead
}
return job;
}
示例15: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(configuration, "com.romanysik.matrixmultiplication.MM3");
job.setJarByClass(MRNMF.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(MM3Mapper.class);
job.setReducerClass(MM3Reducer.class);
job.waitForCompletion(true);
}