本文整理匯總了Java中org.apache.hadoop.mapred.JobConf.setMapOutputKeyClass方法的典型用法代碼示例。如果您正苦於以下問題:Java JobConf.setMapOutputKeyClass方法的具體用法?Java JobConf.setMapOutputKeyClass怎麽用?Java JobConf.setMapOutputKeyClass使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapred.JobConf
的用法示例。
在下文中一共展示了JobConf.setMapOutputKeyClass方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: initMultiTableSnapshotMapperJob
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
/**
* Sets up the job for reading from one or more multiple table snapshots, with one or more scans
* per snapshot.
* It bypasses hbase servers and read directly from snapshot files.
*
* @param snapshotScans map of snapshot name to scans on that snapshot.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
*/
public static void initMultiTableSnapshotMapperJob(Map<String, Collection<Scan>> snapshotScans,
Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass,
JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException {
MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
job.setInputFormat(MultiTableSnapshotInputFormat.class);
if (outputValueClass != null) {
job.setMapOutputValueClass(outputValueClass);
}
if (outputKeyClass != null) {
job.setMapOutputKeyClass(outputKeyClass);
}
job.setMapperClass(mapper);
if (addDependencyJars) {
addDependencyJars(job);
}
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
}
示例2: initTableMapJob
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
/**
* Use this before submitting a TableMap job. It will
* appropriately set up the JobConf.
*
* @param table The table name to read from.
* @param columns The columns to scan.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job configuration to adjust.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
*/
public static void initTableMapJob(String table, String columns,
Class<? extends TableMap> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass, JobConf job, boolean addDependencyJars,
Class<? extends InputFormat> inputFormat) {
job.setInputFormat(inputFormat);
job.setMapOutputValueClass(outputValueClass);
job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
job.setStrings("io.serializations", job.get("io.serializations"),
MutationSerialization.class.getName(), ResultSerialization.class.getName());
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {
try {
addDependencyJars(job);
} catch (IOException e) {
e.printStackTrace();
}
}
try {
initCredentials(job);
} catch (IOException ioe) {
// just spit out the stack trace? really?
ioe.printStackTrace();
}
}
示例3: createDataJoinJob
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
public static JobConf createDataJoinJob(String args[]) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir), true);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
示例4: addMapper
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
/**
* Adds a Mapper class to the chain job's JobConf.
* <p>
* It has to be specified how key and values are passed from one element of
* the chain to the next, by value or by reference. If a Mapper leverages the
* assumed semantics that the key and values are not modified by the collector
* 'by value' must be used. If the Mapper does not expect this semantics, as
* an optimization to avoid serialization and deserialization 'by reference'
* can be used.
* <p>
* For the added Mapper the configuration given for it,
* <code>mapperConf</code>, have precedence over the job's JobConf. This
* precedence is in effect when the task is running.
* <p>
* IMPORTANT: There is no need to specify the output key/value classes for the
* ChainMapper, this is done by the addMapper for the last mapper in the chain
* <p>
*
* @param job job's JobConf to add the Mapper class.
* @param klass the Mapper class to add.
* @param inputKeyClass mapper input key class.
* @param inputValueClass mapper input value class.
* @param outputKeyClass mapper output key class.
* @param outputValueClass mapper output value class.
* @param byValue indicates if key/values should be passed by value
* to the next Mapper in the chain, if any.
* @param mapperConf a JobConf with the configuration for the Mapper
* class. It is recommended to use a JobConf without default values using the
* <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.
*/
public static <K1, V1, K2, V2> void addMapper(JobConf job,
Class<? extends Mapper<K1, V1, K2, V2>> klass,
Class<? extends K1> inputKeyClass,
Class<? extends V1> inputValueClass,
Class<? extends K2> outputKeyClass,
Class<? extends V2> outputValueClass,
boolean byValue, JobConf mapperConf) {
job.setMapperClass(ChainMapper.class);
job.setMapOutputKeyClass(outputKeyClass);
job.setMapOutputValueClass(outputValueClass);
Chain.addMapper(true, job, klass, inputKeyClass, inputValueClass,
outputKeyClass, outputValueClass, byValue, mapperConf);
}