本文整理汇总了Java中org.apache.avro.mapreduce.AvroJob.setInputKeySchema方法的典型用法代码示例。如果您正苦于以下问题:Java AvroJob.setInputKeySchema方法的具体用法?Java AvroJob.setInputKeySchema怎么用?Java AvroJob.setInputKeySchema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.mapreduce.AvroJob
的用法示例。
在下文中一共展示了AvroJob.setInputKeySchema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setMapOutputKeyClass(WeatherSubset.class);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.setPartitionerClass(WeatherPartitioner.class);
job.setGroupingComparatorClass(WeatherSubsetGroupingComparator.class);
job.setSortComparatorClass(WeatherSubsetSortComparator.class);
return job.waitForCompletion(true);
}
示例2: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例3: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, Weather.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例4: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = getNewestSchemaFromSource(job);
AvroJob.setInputKeySchema(job, newestSchema);
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例5: initialiseInput
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void initialiseInput(final Job job, final MapReduce operation) throws IOException {
if (null == avroSchemaFilePath) {
throw new IllegalArgumentException("Avro schema file path has not been set");
}
final Schema schema = new Parser().parse(new File(avroSchemaFilePath));
AvroJob.setInputKeySchema(job, schema);
job.setInputFormatClass(AvroKeyInputFormat.class);
for (final Map.Entry<String, String> entry : operation.getInputMapperPairs().entrySet()) {
if (entry.getValue().contains(job.getConfiguration().get(MAPPER_GENERATOR))) {
AvroKeyInputFormat.addInputPath(job, new Path(entry.getKey()));
}
}
}
示例6: createAndSubmitJob
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = new Configuration(yarnUnit.getConfig());
configuration.setBoolean("mapred.mapper.new-api", true);
configuration.setBoolean("mapred.reducer.new-api", true);
Job job = Job.getInstance(configuration);
job.setJobName(this.getClass().getSimpleName() + "-job");
job.setNumReduceTasks(1);
job.setMapperClass(AvroMapReduce.AvroMapper.class);
Schema inputSchema = new Schema.Parser().parse(
MapreduceAvroTest.class.getClassLoader().getResourceAsStream("mapreduce-avro/input.avsc"));
FileInputFormat.addInputPath(job, new Path(inputPath));
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, inputSchema);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(AvroMapReduce.AvroReducer.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setOutputFormatClass(AvroKeyOutputFormat.class);
AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(
MapreduceAvroTest.class.getClassLoader().getResourceAsStream("mapreduce-avro/output.avsc")));
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
job.setSpeculativeExecution(false);
job.setMaxMapAttempts(1); // speed up failures
return job.waitForCompletion(true);
}
示例7: process
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
throws ToolException {
AvroJobInfo avroInfo = (AvroJobInfo)annotation;
if (avroInfo.inputKeySchema() != AvroDefault.class) {
AvroJob.setInputKeySchema(job, getSchema(avroInfo.inputKeySchema()));
}
if (avroInfo.inputValueSchema() != AvroDefault.class) {
AvroJob.setInputValueSchema(job, getSchema(avroInfo.inputValueSchema()));
}
if (avroInfo.outputKeySchema() != AvroDefault.class) {
AvroJob.setOutputKeySchema(job, getSchema(avroInfo.outputKeySchema()));
}
if (avroInfo.outputValueSchema() != AvroDefault.class) {
AvroJob.setOutputValueSchema(job, getSchema(avroInfo.outputValueSchema()));
}
if (avroInfo.mapOutputKeySchema() != AvroDefault.class) {
AvroJob.setMapOutputKeySchema(job, getSchema(avroInfo.mapOutputKeySchema()));
}
if (avroInfo.mapOutputValueSchema() != AvroDefault.class) {
AvroJob.setMapOutputValueSchema(job, getSchema(avroInfo.mapOutputValueSchema()));
}
AvroSerialization.addToConfiguration(job.getConfiguration());
}
示例8: getJob
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private Job getJob(Schema avroSchema) {
Job job;
try {
job = Job.getInstance();
} catch (IOException e) {
throw new RuntimeException(e);
}
AvroJob.setInputKeySchema(job, avroSchema);
return job;
}
示例9: run
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
org.apache.log4j.BasicConfigurator.configure();
if (args.length != 2) {
System.err.println("Usage: MapReduceAgeCount <input path> <output path>");
return -1;
}
Job job = Job.getInstance(getConf());
job.setJarByClass(MapReduceAgeCount.class);
job.setJobName("Age Count");
// RECORDSERVICE:
// To read from a table instead of a path, comment out
// FileInputFormat.setInputPaths() and instead use:
// FileInputFormat.setInputPaths(job, new Path(args[0]));
RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
// RECORDSERVICE:
// Use the RecordService version of the AvroKeyValueInputFormat
job.setInputFormatClass(
com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(AgeCountMapper.class);
// Set schema for input key and value.
AvroJob.setInputKeySchema(job, UserKey.getClassSchema());
AvroJob.setInputValueSchema(job, UserValue.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
job.setReducerClass(AgeCountReducer.class);
AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
return (job.waitForCompletion(true) ? 0 : 1);
}
示例10: run
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
org.apache.log4j.BasicConfigurator.configure();
if (args.length != 2) {
System.err.println("Usage: MapReduceColorCount <input path> <output path>");
return -1;
}
Job job = Job.getInstance(getConf());
job.setJarByClass(MapReduceColorCount.class);
job.setJobName("Color Count");
// RECORDSERVICE:
// To read from a table instead of a path, comment out
// FileInputFormat.setInputPaths() and instead use:
//FileInputFormat.setInputPaths(job, new Path(args[0]));
RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");
// RECORDSERVICE:
// Use the RecordService version of the AvroKeyInputFormat
job.setInputFormatClass(
com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
//job.setInputFormatClass(AvroKeyInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(ColorCountMapper.class);
AvroJob.setInputKeySchema(job, User.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
job.setReducerClass(ColorCountReducer.class);
AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
return (job.waitForCompletion(true) ? 0 : 1);
}
示例11: afterPropertiesSet
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void afterPropertiesSet() throws Exception {
if (avroInputKey != null) {
AvroJob.setInputKeySchema(job, resolveClass(avroInputKey).newInstance().getSchema());
}
if (avroInputValue != null) {
AvroJob.setInputValueSchema(job, resolveClass(avroInputValue).newInstance().getSchema());
}
if (avroMapOutputKey != null) {
AvroJob.setMapOutputKeySchema(job, resolveClass(avroMapOutputKey).newInstance().getSchema());
}
if (avroMapOutputValue != null) {
Class<? extends IndexedRecord> c = resolveClass(avroMapOutputValue);
IndexedRecord o = c.newInstance();
AvroJob.setMapOutputValueSchema(job, o.getSchema());
}
if (avroOutputKey != null) {
AvroJob.setOutputKeySchema(job, resolveClass(avroOutputKey).newInstance().getSchema());
}
if (avroOutputValue != null) {
AvroJob.setOutputValueSchema(job, resolveClass(avroOutputValue).newInstance().getSchema());
}
}
示例12: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = getNewestSchemaFromSource(job, this.fs);
if (this.useSingleInputSchema) {
AvroJob.setInputKeySchema(job, newestSchema);
}
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例13: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) {
AvroJob.setInputKeySchema(job, newestSchema);
}
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例14: run
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Create configuration
Configuration conf = this.getConf();
// Create job
Job job = Job.getInstance(conf);
job.setJobName("recordsPerCustomer");
job.setJarByClass(CustdatHadoopReader.class);
// Setup MapReduce classes
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// Set only 1 reduce task
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(args[0]));
job.setInputFormatClass(ZosRdwAvroInputFormat.class);
Cob2AvroJob.setInputKeyCobolContext(job, EbcdicCobolContext.class);
Cob2AvroJob.setInputKeyRecordType(job, CobolCustomerData.class);
Cob2AvroJob.setInputRecordMatcher(job, CustdatZosRdwRecordMatcher.class);
AvroJob.setInputKeySchema(job, CustomerData.getClassSchema());
job.setMapperClass(MyMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setReducerClass(MyReducer.class);
// Execute job
return job.waitForCompletion(true) ? 0 : 1;
}
示例15: internalRun
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public Job internalRun(Path origInput, Path destInput, Path outputDir, Configuration conf) throws Exception {
conf.set("viadeo.diff.diffinpath", origInput.toString());
conf.set("viadeo.diff.diffoutpath", destInput.toString());
Job job = new Job(conf);
job.setJarByClass(DiffJob.class);
job.setJobName("diff");
Schema schema = SchemaUtils.getConfSchema(conf);
if(schema == null) schema = SchemaUtils.getSchema(conf, destInput);
FileInputFormat.setInputPaths(job, origInput, destInput);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(DiffMapper.class);
AvroJob.setInputKeySchema(job, schema);
AvroJob.setMapOutputKeySchema(job, schema);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(DiffReducer.class);
AvroJob.setOutputKeySchema(job, schema);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
// ~ OUTPUT
FileOutputFormat.setOutputPath(job, outputDir);
AvroMultipleOutputs.addNamedOutput(job, "kernel", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.addNamedOutput(job, "add", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.addNamedOutput(job, "del", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.setCountersEnabled(job, true);
return job;
}