本文整理汇总了Java中org.apache.avro.mapreduce.AvroJob类的典型用法代码示例。如果您正苦于以下问题:Java AvroJob类的具体用法?Java AvroJob怎么用?Java AvroJob使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AvroJob类属于org.apache.avro.mapreduce包,在下文中一共展示了AvroJob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createRecordReader
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Override
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(
InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
if (null == keyReaderSchema) {
LOG.warn("Key reader schema was not set. " +
"Use AvroJob.setInputKeySchema() if desired.");
LOG.info("Using a key reader schema equal to the writer schema.");
}
Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
if (null == valueReaderSchema) {
LOG.warn("Value reader schema was not set. " +
"Use AvroJob.setInputValueSchema() if desired.");
LOG.info("Using a value reader schema equal to the writer schema.");
}
return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}
示例2: setSchema
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
/** Hacked method */
private void setSchema(Job job, Schema keySchema, Schema valSchema) {
boolean isMaponly = job.getNumReduceTasks() == 0;
if (keySchema != null) {
if (isMaponly){
AvroJob.setMapOutputKeySchema(job, keySchema);
}
AvroJob.setOutputKeySchema(job, keySchema);
}
if (valSchema != null) {
if (isMaponly){
AvroJob.setMapOutputValueSchema(job, valSchema);
}
AvroJob.setOutputValueSchema(job, valSchema);
}
}
示例3: testMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyAvroReducer reducer = new MyAvroReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
// Run the reducer.
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例4: testMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyNodeReducer reducer = new MyNodeReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(
new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例5: run
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
public int run(String[] args) throws Exception {
Job job = new Job(getConf());
job.setJarByClass(AVROMultipleValues.class);
job.setJobName("AVRO Multiple Values");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(AVROMultipleValuesMapper.class);
job.setReducerClass(AVROMultipleValuesReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(AvroValue.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
AvroJob.setMapOutputValueSchema(job, Multiple.SCHEMA$);
AvroJob.setOutputValueSchema(job, Multiple.SCHEMA$);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
}
示例6: getRecordWriter
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
// Get the writer schema.
Schema writerSchema = AvroJob.getOutputKeySchema(conf);
boolean isMapOnly = job.getNumReduceTasks() == 0;
if (isMapOnly) {
Schema mapOutputSchema = AvroJob.getMapOutputKeySchema(conf);
if (mapOutputSchema != null) {
writerSchema = mapOutputSchema;
}
}
if (null == writerSchema) {
throw new IOException(
"AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema().");
}
GenericData dataModel = AvroSerialization.createDataModel(conf);
return new PentahoAvroWrapperRecordWriter(writerSchema, dataModel, getCompressionCodec(job),
getAvroFileOutputStream(job), getSyncInterval(job));
}
示例7: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setMapOutputKeyClass(WeatherSubset.class);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.setPartitionerClass(WeatherPartitioner.class);
job.setGroupingComparatorClass(WeatherSubsetGroupingComparator.class);
job.setSortComparatorClass(WeatherSubsetSortComparator.class);
return job.waitForCompletion(true);
}
示例8: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例9: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, Weather.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例10: createRecordReader
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
Schema readerSchema = AvroJob.getInputKeySchema(taskAttemptContext.getConfiguration());
if(null == readerSchema) {
LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
}
return new ErrorHandlingAvroKeyRecordReader(readerSchema);
}
示例11: getRecordWriter
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public RecordWriter<AvroKey<K>, V> getRecordWriter(TaskAttemptContext context)
throws IOException {
// Get the writer schema.
Schema writerSchema = AvroJob.getOutputKeySchema(context.getConfiguration());
if (null == writerSchema)
throw new IOException(NodesOutputFormat.class.getName() + " requires an output schema. Use AvroJob.setOutputKeySchema().");
return this.recordWriterFactory.create(writerSchema, getCompressionCodec(context),
getAvroFileOutputStream(context));
}
示例12: setup
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
keySchema = AvroJob.getMapOutputKeySchema(context.getConfiguration());
outKey = new AvroKey<GenericRecord>();
outKey.datum(new GenericData.Record(keySchema));
outValue = new AvroValue<GenericRecord>();
}
示例13: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = getNewestSchemaFromSource(job);
AvroJob.setInputKeySchema(job, newestSchema);
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例14: initialiseInput
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
private void initialiseInput(final Job job, final MapReduce operation) throws IOException {
if (null == avroSchemaFilePath) {
throw new IllegalArgumentException("Avro schema file path has not been set");
}
final Schema schema = new Parser().parse(new File(avroSchemaFilePath));
AvroJob.setInputKeySchema(job, schema);
job.setInputFormatClass(AvroKeyInputFormat.class);
for (final Map.Entry<String, String> entry : operation.getInputMapperPairs().entrySet()) {
if (entry.getValue().contains(job.getConfiguration().get(MAPPER_GENERATOR))) {
AvroKeyInputFormat.addInputPath(job, new Path(entry.getKey()));
}
}
}
示例15: createAndSubmitJob
import org.apache.avro.mapreduce.AvroJob; //导入依赖的package包/类
public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = new Configuration(yarnUnit.getConfig());
configuration.setBoolean("mapred.mapper.new-api", true);
configuration.setBoolean("mapred.reducer.new-api", true);
Job job = Job.getInstance(configuration);
job.setJobName(this.getClass().getSimpleName() + "-job");
job.setNumReduceTasks(1);
job.setMapperClass(AvroMapReduce.AvroMapper.class);
Schema inputSchema = new Schema.Parser().parse(
MapreduceAvroTest.class.getClassLoader().getResourceAsStream("mapreduce-avro/input.avsc"));
FileInputFormat.addInputPath(job, new Path(inputPath));
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, inputSchema);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(AvroMapReduce.AvroReducer.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setOutputFormatClass(AvroKeyOutputFormat.class);
AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(
MapreduceAvroTest.class.getClassLoader().getResourceAsStream("mapreduce-avro/output.avsc")));
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
job.setSpeculativeExecution(false);
job.setMaxMapAttempts(1); // speed up failures
return job.waitForCompletion(true);
}