本文整理汇总了Java中org.apache.avro.mapreduce.AvroJob.setMapOutputKeySchema方法的典型用法代码示例。如果您正苦于以下问题:Java AvroJob.setMapOutputKeySchema方法的具体用法?Java AvroJob.setMapOutputKeySchema怎么用?Java AvroJob.setMapOutputKeySchema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.mapreduce.AvroJob
的用法示例。
在下文中一共展示了AvroJob.setMapOutputKeySchema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
/** Hacked method */
private void setSchema(Job job, Schema keySchema, Schema valSchema) {
boolean isMaponly = job.getNumReduceTasks() == 0;
if (keySchema != null) {
if (isMaponly){
AvroJob.setMapOutputKeySchema(job, keySchema);
}
AvroJob.setOutputKeySchema(job, keySchema);
}
if (valSchema != null) {
if (isMaponly){
AvroJob.setMapOutputValueSchema(job, valSchema);
}
AvroJob.setOutputValueSchema(job, valSchema);
}
}
示例2: testMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyAvroReducer reducer = new MyAvroReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
// Run the reducer.
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例3: testMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyNodeReducer reducer = new MyNodeReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(
new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例4: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例5: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, Weather.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true);
}
示例6: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = getNewestSchemaFromSource(job);
AvroJob.setInputKeySchema(job, newestSchema);
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例7: process
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
throws ToolException {
AvroJobInfo avroInfo = (AvroJobInfo)annotation;
if (avroInfo.inputKeySchema() != AvroDefault.class) {
AvroJob.setInputKeySchema(job, getSchema(avroInfo.inputKeySchema()));
}
if (avroInfo.inputValueSchema() != AvroDefault.class) {
AvroJob.setInputValueSchema(job, getSchema(avroInfo.inputValueSchema()));
}
if (avroInfo.outputKeySchema() != AvroDefault.class) {
AvroJob.setOutputKeySchema(job, getSchema(avroInfo.outputKeySchema()));
}
if (avroInfo.outputValueSchema() != AvroDefault.class) {
AvroJob.setOutputValueSchema(job, getSchema(avroInfo.outputValueSchema()));
}
if (avroInfo.mapOutputKeySchema() != AvroDefault.class) {
AvroJob.setMapOutputKeySchema(job, getSchema(avroInfo.mapOutputKeySchema()));
}
if (avroInfo.mapOutputValueSchema() != AvroDefault.class) {
AvroJob.setMapOutputValueSchema(job, getSchema(avroInfo.mapOutputValueSchema()));
}
AvroSerialization.addToConfiguration(job.getConfiguration());
}
示例8: afterPropertiesSet
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void afterPropertiesSet() throws Exception {
if (avroInputKey != null) {
AvroJob.setInputKeySchema(job, resolveClass(avroInputKey).newInstance().getSchema());
}
if (avroInputValue != null) {
AvroJob.setInputValueSchema(job, resolveClass(avroInputValue).newInstance().getSchema());
}
if (avroMapOutputKey != null) {
AvroJob.setMapOutputKeySchema(job, resolveClass(avroMapOutputKey).newInstance().getSchema());
}
if (avroMapOutputValue != null) {
Class<? extends IndexedRecord> c = resolveClass(avroMapOutputValue);
IndexedRecord o = c.newInstance();
AvroJob.setMapOutputValueSchema(job, o.getSchema());
}
if (avroOutputKey != null) {
AvroJob.setOutputKeySchema(job, resolveClass(avroOutputKey).newInstance().getSchema());
}
if (avroOutputValue != null) {
AvroJob.setOutputValueSchema(job, resolveClass(avroOutputValue).newInstance().getSchema());
}
}
示例9: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = getNewestSchemaFromSource(job, this.fs);
if (this.useSingleInputSchema) {
AvroJob.setInputKeySchema(job, newestSchema);
}
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例10: configureSchema
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) {
AvroJob.setInputKeySchema(job, newestSchema);
}
AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
AvroJob.setMapOutputValueSchema(job, newestSchema);
AvroJob.setOutputKeySchema(job, newestSchema);
}
示例11: internalRun
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public Job internalRun(Path origInput, Path destInput, Path outputDir, Configuration conf) throws Exception {
conf.set("viadeo.diff.diffinpath", origInput.toString());
conf.set("viadeo.diff.diffoutpath", destInput.toString());
Job job = new Job(conf);
job.setJarByClass(DiffJob.class);
job.setJobName("diff");
Schema schema = SchemaUtils.getConfSchema(conf);
if(schema == null) schema = SchemaUtils.getSchema(conf, destInput);
FileInputFormat.setInputPaths(job, origInput, destInput);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(DiffMapper.class);
AvroJob.setInputKeySchema(job, schema);
AvroJob.setMapOutputKeySchema(job, schema);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(DiffReducer.class);
AvroJob.setOutputKeySchema(job, schema);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
// ~ OUTPUT
FileOutputFormat.setOutputPath(job, outputDir);
AvroMultipleOutputs.addNamedOutput(job, "kernel", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.addNamedOutput(job, "add", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.addNamedOutput(job, "del", AvroKeyOutputFormat.class, schema);
AvroMultipleOutputs.setCountersEnabled(job, true);
return job;
}
示例12: submitJob
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void submitJob(StagedOutputJobExecutor executor, String inputPattern, String output, String clusterName, String year, String day, int numReducers)
{
List<String> inputPaths = new ArrayList<String>();
inputPaths.add(inputPattern);
final StagedOutputJob job = StagedOutputJob.createStagedJob(
_props,
_name + "-" + "usage-per-hour-" + clusterName + "-" + year + "-" + day,
inputPaths,
"/tmp" + output,
output,
_log);
final Configuration conf = job.getConfiguration();
conf.set("cluster.name", clusterName);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(AvroKeyValueInputFormat.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
AvroJob.setInputKeySchema(job, Schema.create(Type.STRING));
AvroJob.setInputValueSchema(job, LogData.SCHEMA$);
AvroJob.setMapOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
AvroJob.setOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
AvroJob.setOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
job.setNumReduceTasks(numReducers);
job.setMapperClass(ComputeUsagePerHour.TheMapper.class);
job.setReducerClass(ComputeUsagePerHour.TheReducer.class);
executor.submit(job);
}
示例13: getContext
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private TaskAttemptContext getContext(String nameOutput) throws IOException {
TaskAttemptContext taskContext = taskContexts.get(nameOutput);
if (taskContext != null) {
return taskContext;
}
// The following trick leverages the instantiation of a record writer via
// the job thus supporting arbitrary output formats.
context.getConfiguration().set("avro.mo.config.namedOutput",nameOutput);
Job job = new Job(context.getConfiguration());
job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
Schema keySchema = keySchemas.get(nameOutput+"_KEYSCHEMA");
Schema valSchema = valSchemas.get(nameOutput+"_VALSCHEMA");
boolean isMaponly=job.getNumReduceTasks() == 0;
if(keySchema!=null)
{
if(isMaponly)
AvroJob.setMapOutputKeySchema(job,keySchema);
else
AvroJob.setOutputKeySchema(job,keySchema);
}
if(valSchema!=null)
{
if(isMaponly)
AvroJob.setMapOutputValueSchema(job,valSchema);
else
AvroJob.setOutputValueSchema(job,valSchema);
}
taskContext = new TaskAttemptContext(
job.getConfiguration(), context.getTaskAttemptID());
taskContexts.put(nameOutput, taskContext);
return taskContext;
}
示例14: runMapReduce
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroSort.builder()
.setJob(job)
.addPartitionField(WeatherNoIgnore.SCHEMA$, "station", true)
.addSortField(WeatherNoIgnore.SCHEMA$, "station", true)
.addSortField(WeatherNoIgnore.SCHEMA$, "time", true)
.addSortField(WeatherNoIgnore.SCHEMA$, "temp", true)
.addGroupField(WeatherNoIgnore.SCHEMA$, "station", true)
.addGroupField(WeatherNoIgnore.SCHEMA$, "time", true)
.configure();
return job.waitForCompletion(true);
}
示例15: execute
import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public void execute(StagedOutputJobExecutor executor) throws IOException, InterruptedException, ExecutionException
{
for (String clusterName : _clusterNames.split(","))
{
System.out.println("Processing cluster " + clusterName);
List<JobStatsProcessing.ProcessingTask> processingTasks = JobStatsProcessing.getTasks(_fs, _logsRoot, clusterName, _jobsOutputPathRoot, _incremental, _numDays, _numDaysForced);
for (JobStatsProcessing.ProcessingTask task : processingTasks)
{
List<String> inputPaths = new ArrayList<String>();
inputPaths.add(task.inputPathFormat);
String outputPath = task.outputPath;
final StagedOutputJob job = StagedOutputJob.createStagedJob(
_props,
_name + "-parse-jobs-" + task.id,
inputPaths,
"/tmp" + outputPath,
outputPath,
_log);
job.getConfiguration().set("jobs.output.path", _jobsOutputPathRoot);
job.getConfiguration().set("logs.cluster.name", clusterName);
// 1 reducer per 12 GB of input data
long numReduceTasks = (int)Math.ceil(((double)task.totalLength) / 1024 / 1024 / 1024 / 12);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(CombinedTextInputFormat.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
AvroJob.setOutputKeySchema(job, Schema.create(Type.STRING));
AvroJob.setOutputValueSchema(job, LogData.SCHEMA$);
job.setNumReduceTasks((int)numReduceTasks);
job.setMapperClass(ParseJobsFromLogs.TheMapper.class);
job.setReducerClass(ParseJobsFromLogs.TheReducer.class);
AvroJob.setMapOutputKeySchema(job, Schema.create(Type.STRING));
AvroJob.setMapOutputValueSchema(job, LogData.SCHEMA$);
MyAvroMultipleOutputs.addNamedOutput(job, "logs", AvroKeyValueOutputFormat.class, Schema.create(Type.STRING), LogData.SCHEMA$);
executor.submit(job);
}
executor.waitForCompletion();
}
}