Java AvroJob.setMapOutputKeySchema方法代码示例

本文整理汇总了Java中org.apache.avro.mapreduce.AvroJob.setMapOutputKeySchema方法的典型用法代码示例。如果您正苦于以下问题：Java AvroJob.setMapOutputKeySchema方法的具体用法？Java AvroJob.setMapOutputKeySchema怎么用？Java AvroJob.setMapOutputKeySchema使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.mapreduce.AvroJob的用法示例。

在下文中一共展示了AvroJob.setMapOutputKeySchema方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setSchema

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
/** Hacked method */
private void setSchema(Job job, Schema keySchema, Schema valSchema) {

  boolean isMaponly = job.getNumReduceTasks() == 0;
  if (keySchema != null) {
    if (isMaponly){
      AvroJob.setMapOutputKeySchema(job, keySchema);
    }
    AvroJob.setOutputKeySchema(job, keySchema);
  }
  if (valSchema != null) {
    if (isMaponly){
      AvroJob.setMapOutputValueSchema(job, valSchema);
    }
    AvroJob.setOutputValueSchema(job, valSchema);
  }

}

开发者ID:openaire，项目名称:iis，代码行数:19，代码来源:AvroMultipleOutputs.java

示例2: testMapReduce

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Test
public void testMapReduce() throws IOException {
  MyAvroReducer reducer = new MyAvroReducer();

  // Configure a job.
  Job job = new Job();
  // We've got to do a little hacking here since mrunit doesn't run exactly like
  // the real hadoop mapreduce framework.
  AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
  AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
  AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

  // Run the reducer.
  ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
      = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
  driver.setReducer(reducer);
  driver.withConfiguration(job.getConfiguration());
  driver.withInput(new Text("foo"),
      Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
  List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
  assertEquals(1, output.size());
  assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}

开发者ID:kijiproject，项目名称:kiji-mapreduce-lib，代码行数:24，代码来源:TestAvroReducer.java

示例3: testMapReduce

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Test
public void testMapReduce() throws IOException {
  MyNodeReducer reducer = new MyNodeReducer();

  // Configure a job.
  Job job = new Job();
  // We've got to do a little hacking here since mrunit doesn't run exactly like
  // the real hadoop mapreduce framework.
  AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
  AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
  AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

  ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
      = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
  driver.setReducer(reducer);
  driver.withConfiguration(job.getConfiguration());
  driver.withInput(
      new Text("foo"),
      Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
  List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
  assertEquals(1, output.size());
  assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}

开发者ID:kijiproject，项目名称:kiji-mapreduce-lib，代码行数:24，代码来源:TestNodeReducer.java

示例4: runMapReduce

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);

    job.setMapperClass(SortMapper.class);
    AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
    AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);

    job.setReducerClass(SortReducer.class);
    AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true);
}

开发者ID:alexholmes，项目名称:avro-sorting，代码行数:18，代码来源:AvroSortDefault.java

示例5: runMapReduce

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, Weather.SCHEMA$);

    job.setMapperClass(SortMapper.class);
    AvroJob.setMapOutputKeySchema(job, Weather.SCHEMA$);
    AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);

    job.setReducerClass(SortReducer.class);
    AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true);
}

开发者ID:alexholmes，项目名称:avro-sorting，代码行数:18，代码来源:AvroSortWithIgnores.java

示例6: configureSchema

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
  Schema newestSchema = getNewestSchemaFromSource(job);
  AvroJob.setInputKeySchema(job, newestSchema);
  AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
  AvroJob.setMapOutputValueSchema(job, newestSchema);
  AvroJob.setOutputKeySchema(job, newestSchema);
}

开发者ID:Hanmourang，项目名称:Gobblin，代码行数:8，代码来源:MRCompactorAvroKeyDedupJobRunner.java

示例7: process

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
		throws ToolException {

	AvroJobInfo avroInfo = (AvroJobInfo)annotation;
	if (avroInfo.inputKeySchema() != AvroDefault.class) {
		AvroJob.setInputKeySchema(job, getSchema(avroInfo.inputKeySchema()));
	}
	if (avroInfo.inputValueSchema() != AvroDefault.class) {
		AvroJob.setInputValueSchema(job, getSchema(avroInfo.inputValueSchema()));
	}

	if (avroInfo.outputKeySchema() != AvroDefault.class) {
		AvroJob.setOutputKeySchema(job, getSchema(avroInfo.outputKeySchema()));
	}
	if (avroInfo.outputValueSchema() != AvroDefault.class) {
		AvroJob.setOutputValueSchema(job, getSchema(avroInfo.outputValueSchema()));
	}

	if (avroInfo.mapOutputKeySchema() != AvroDefault.class) {
		AvroJob.setMapOutputKeySchema(job, getSchema(avroInfo.mapOutputKeySchema()));
	}
	if (avroInfo.mapOutputValueSchema() != AvroDefault.class) {
		AvroJob.setMapOutputValueSchema(job, getSchema(avroInfo.mapOutputValueSchema()));
	}

	AvroSerialization.addToConfiguration(job.getConfiguration());
}

开发者ID:conversant，项目名称:mara，代码行数:29，代码来源:AvroJobInfoAnnotationHandler.java

示例8: afterPropertiesSet

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
@Override
public void afterPropertiesSet() throws Exception {

    if (avroInputKey != null) {
        AvroJob.setInputKeySchema(job, resolveClass(avroInputKey).newInstance().getSchema());
    }

    if (avroInputValue != null) {
        AvroJob.setInputValueSchema(job, resolveClass(avroInputValue).newInstance().getSchema());
    }

    if (avroMapOutputKey != null) {
        AvroJob.setMapOutputKeySchema(job, resolveClass(avroMapOutputKey).newInstance().getSchema());
    }

    if (avroMapOutputValue != null) {
        Class<? extends IndexedRecord> c = resolveClass(avroMapOutputValue);
        IndexedRecord o = c.newInstance();
        AvroJob.setMapOutputValueSchema(job, o.getSchema());
    }

    if (avroOutputKey != null) {
        AvroJob.setOutputKeySchema(job, resolveClass(avroOutputKey).newInstance().getSchema());
    }

    if (avroOutputValue != null) {
        AvroJob.setOutputValueSchema(job, resolveClass(avroOutputValue).newInstance().getSchema());
    }
}

开发者ID:ch4mpy，项目名称:hadoop2，代码行数:30，代码来源:AvroJobInitializingBean.java

示例9: configureSchema

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
  Schema newestSchema = getNewestSchemaFromSource(job, this.fs);
  if (this.useSingleInputSchema) {
    AvroJob.setInputKeySchema(job, newestSchema);
  }
  AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
  AvroJob.setMapOutputValueSchema(job, newestSchema);
  AvroJob.setOutputKeySchema(job, newestSchema);
}

开发者ID:apache，项目名称:incubator-gobblin，代码行数:10，代码来源:MRCompactorAvroKeyDedupJobRunner.java

示例10: configureSchema

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void configureSchema(Job job) throws IOException {
  Schema newestSchema = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
  if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) {
    AvroJob.setInputKeySchema(job, newestSchema);
  }
  AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getKeySchema(job, newestSchema) : newestSchema);
  AvroJob.setMapOutputValueSchema(job, newestSchema);
  AvroJob.setOutputKeySchema(job, newestSchema);
}

开发者ID:apache，项目名称:incubator-gobblin，代码行数:10，代码来源:CompactionAvroJobConfigurator.java

示例11: internalRun

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public Job internalRun(Path origInput, Path destInput, Path outputDir, Configuration conf) throws Exception {

        conf.set("viadeo.diff.diffinpath", origInput.toString());
        conf.set("viadeo.diff.diffoutpath", destInput.toString());

        Job job = new Job(conf);
        job.setJarByClass(DiffJob.class);
        job.setJobName("diff");

        Schema schema = SchemaUtils.getConfSchema(conf);
        if(schema == null) schema = SchemaUtils.getSchema(conf, destInput);


        FileInputFormat.setInputPaths(job, origInput, destInput);
        job.setInputFormatClass(AvroKeyInputFormat.class);

        job.setMapperClass(DiffMapper.class);
        AvroJob.setInputKeySchema(job, schema);
        AvroJob.setMapOutputKeySchema(job, schema);
        job.setMapOutputValueClass(Text.class);
        job.setReducerClass(DiffReducer.class);
        AvroJob.setOutputKeySchema(job, schema);

        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(AvroKeyOutputFormat.class);


        // ~ OUTPUT
        FileOutputFormat.setOutputPath(job, outputDir);
        AvroMultipleOutputs.addNamedOutput(job, "kernel", AvroKeyOutputFormat.class, schema);
        AvroMultipleOutputs.addNamedOutput(job, "add", AvroKeyOutputFormat.class, schema);
        AvroMultipleOutputs.addNamedOutput(job, "del", AvroKeyOutputFormat.class, schema);

        AvroMultipleOutputs.setCountersEnabled(job, true);

        return job;
    }

开发者ID:viadeo，项目名称:viadeo-avro-utils，代码行数:38，代码来源:DiffJob.java

示例12: submitJob

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private void submitJob(StagedOutputJobExecutor executor, String inputPattern, String output, String clusterName, String year, String day, int numReducers)
{
  List<String> inputPaths = new ArrayList<String>();
  
  inputPaths.add(inputPattern);
  
  final StagedOutputJob job = StagedOutputJob.createStagedJob(
    _props,
    _name + "-" + "usage-per-hour-" + clusterName + "-" + year + "-" + day,
    inputPaths,
    "/tmp" + output,
    output,
    _log);
  
  final Configuration conf = job.getConfiguration();
  
  conf.set("cluster.name", clusterName);
              
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormatClass(AvroKeyValueInputFormat.class);
  job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
  
  AvroJob.setInputKeySchema(job, Schema.create(Type.STRING));
  AvroJob.setInputValueSchema(job, LogData.SCHEMA$);
  
  AvroJob.setMapOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
  AvroJob.setMapOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
  
  AvroJob.setOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
  AvroJob.setOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
  
  job.setNumReduceTasks(numReducers);
  
  job.setMapperClass(ComputeUsagePerHour.TheMapper.class);
  job.setReducerClass(ComputeUsagePerHour.TheReducer.class);
  
  executor.submit(job);
}

开发者ID:nkrishnaveni，项目名称:polar-bear，代码行数:41，代码来源:ComputeUsagePerHour.java

示例13: getContext

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
private TaskAttemptContext getContext(String nameOutput) throws IOException {

    TaskAttemptContext taskContext = taskContexts.get(nameOutput);

    if (taskContext != null) {
      return taskContext;
    }

    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats.
    context.getConfiguration().set("avro.mo.config.namedOutput",nameOutput);
    Job job = new Job(context.getConfiguration());
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    Schema keySchema = keySchemas.get(nameOutput+"_KEYSCHEMA");
    Schema valSchema = valSchemas.get(nameOutput+"_VALSCHEMA");

    boolean isMaponly=job.getNumReduceTasks() == 0;

    if(keySchema!=null)
    {
      if(isMaponly)
        AvroJob.setMapOutputKeySchema(job,keySchema);
      else
        AvroJob.setOutputKeySchema(job,keySchema);
    }
    if(valSchema!=null)
    {
      if(isMaponly)
        AvroJob.setMapOutputValueSchema(job,valSchema);
      else
        AvroJob.setOutputValueSchema(job,valSchema);
    }
    taskContext = new TaskAttemptContext(
      job.getConfiguration(), context.getTaskAttemptID());
    
    taskContexts.put(nameOutput, taskContext);
    
    return taskContext;
  }

开发者ID:nkrishnaveni，项目名称:polar-bear，代码行数:40，代码来源:MyAvroMultipleOutputs.java

示例14: runMapReduce

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$);

    job.setMapperClass(SortMapper.class);
    AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);
    AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$);

    job.setReducerClass(SortReducer.class);
    AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    AvroSort.builder()
            .setJob(job)
            .addPartitionField(WeatherNoIgnore.SCHEMA$, "station", true)
            .addSortField(WeatherNoIgnore.SCHEMA$, "station", true)
            .addSortField(WeatherNoIgnore.SCHEMA$, "time", true)
            .addSortField(WeatherNoIgnore.SCHEMA$, "temp", true)
            .addGroupField(WeatherNoIgnore.SCHEMA$, "station", true)
            .addGroupField(WeatherNoIgnore.SCHEMA$, "time", true)
            .configure();

    return job.waitForCompletion(true);
}

开发者ID:alexholmes，项目名称:avro-sorting，代码行数:28，代码来源:AvroSortCustom.java

示例15: execute

import org.apache.avro.mapreduce.AvroJob; //导入方法依赖的package包/类
public void execute(StagedOutputJobExecutor executor) throws IOException, InterruptedException, ExecutionException
{
  for (String clusterName : _clusterNames.split(","))
  {
    System.out.println("Processing cluster " + clusterName);
          
    List<JobStatsProcessing.ProcessingTask> processingTasks = JobStatsProcessing.getTasks(_fs, _logsRoot, clusterName, _jobsOutputPathRoot, _incremental, _numDays, _numDaysForced);
    
    for (JobStatsProcessing.ProcessingTask task : processingTasks)
    {      
      List<String> inputPaths = new ArrayList<String>();
      inputPaths.add(task.inputPathFormat);
      
      String outputPath = task.outputPath;
      
      final StagedOutputJob job = StagedOutputJob.createStagedJob(
         _props,
         _name + "-parse-jobs-" + task.id,
         inputPaths,
         "/tmp" + outputPath,
         outputPath,
         _log);
      
      job.getConfiguration().set("jobs.output.path", _jobsOutputPathRoot);
      job.getConfiguration().set("logs.cluster.name", clusterName);
              
      // 1 reducer per 12 GB of input data
      long numReduceTasks = (int)Math.ceil(((double)task.totalLength) / 1024 / 1024 / 1024 / 12);
              
      job.setOutputKeyClass(BytesWritable.class);
      job.setOutputValueClass(BytesWritable.class);

      job.setInputFormatClass(CombinedTextInputFormat.class);
      job.setOutputFormatClass(AvroKeyValueOutputFormat.class);

      AvroJob.setOutputKeySchema(job, Schema.create(Type.STRING));
      AvroJob.setOutputValueSchema(job, LogData.SCHEMA$);
      
      job.setNumReduceTasks((int)numReduceTasks);
 
      job.setMapperClass(ParseJobsFromLogs.TheMapper.class);
      job.setReducerClass(ParseJobsFromLogs.TheReducer.class);
       
      AvroJob.setMapOutputKeySchema(job, Schema.create(Type.STRING));
      AvroJob.setMapOutputValueSchema(job, LogData.SCHEMA$);
      
      MyAvroMultipleOutputs.addNamedOutput(job, "logs", AvroKeyValueOutputFormat.class, Schema.create(Type.STRING), LogData.SCHEMA$);
      
      executor.submit(job);
    }
    
    executor.waitForCompletion();
  }
}

开发者ID:nkrishnaveni，项目名称:polar-bear，代码行数:55，代码来源:ParseJobsFromLogs.java

注：本文中的org.apache.avro.mapreduce.AvroJob.setMapOutputKeySchema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。