当前位置: 首页>>代码示例>>Java>>正文


Java AvroMultipleOutputs类代码示例

本文整理汇总了Java中org.apache.avro.mapreduce.AvroMultipleOutputs的典型用法代码示例。如果您正苦于以下问题:Java AvroMultipleOutputs类的具体用法?Java AvroMultipleOutputs怎么用?Java AvroMultipleOutputs使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


AvroMultipleOutputs类属于org.apache.avro.mapreduce包,在下文中一共展示了AvroMultipleOutputs类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: process

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
		throws ToolException {
	AvroNamedOutput avroOut = (AvroNamedOutput)annotation;

	Schema schema = getSchema(avroOut.record());
	String[] names = getNames(avroOut);
	for (String name : names) {
		name = (String)evaluateExpression(name);
		if (!configured.contains(name)) {
			AvroMultipleOutputs.addNamedOutput(job, name, avroOut.format(), schema);
			AvroMultipleOutputs.setCountersEnabled(job, avroOut.countersEnabled());
			configured.add(name);
		}
	}

	AvroSerialization.addToConfiguration(job.getConfiguration());
}
 
开发者ID:conversant,项目名称:mara,代码行数:19,代码来源:AvroNamedOutputAnnotationHandler.java

示例2: setContext

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
@Override
public void setContext(TaskInputOutputContext<Object,Object,Object,Object> context)
{           
  super.setContext(context);
  
  // ... and we also write the final output to multiple directories
  _multipleOutputs = new AvroMultipleOutputs(context);
}
 
开发者ID:apache,项目名称:incubator-datafu,代码行数:9,代码来源:PartitioningReducer.java

示例3: internalRun

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
public Job internalRun(Path origInput, Path destInput, Path outputDir, Configuration conf) throws Exception {

        conf.set("viadeo.diff.diffinpath", origInput.toString());
        conf.set("viadeo.diff.diffoutpath", destInput.toString());

        Job job = new Job(conf);
        job.setJarByClass(DiffJob.class);
        job.setJobName("diff");

        Schema schema = SchemaUtils.getConfSchema(conf);
        if(schema == null) schema = SchemaUtils.getSchema(conf, destInput);


        FileInputFormat.setInputPaths(job, origInput, destInput);
        job.setInputFormatClass(AvroKeyInputFormat.class);

        job.setMapperClass(DiffMapper.class);
        AvroJob.setInputKeySchema(job, schema);
        AvroJob.setMapOutputKeySchema(job, schema);
        job.setMapOutputValueClass(Text.class);
        job.setReducerClass(DiffReducer.class);
        AvroJob.setOutputKeySchema(job, schema);

        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(AvroKeyOutputFormat.class);


        // ~ OUTPUT
        FileOutputFormat.setOutputPath(job, outputDir);
        AvroMultipleOutputs.addNamedOutput(job, "kernel", AvroKeyOutputFormat.class, schema);
        AvroMultipleOutputs.addNamedOutput(job, "add", AvroKeyOutputFormat.class, schema);
        AvroMultipleOutputs.addNamedOutput(job, "del", AvroKeyOutputFormat.class, schema);

        AvroMultipleOutputs.setCountersEnabled(job, true);

        return job;
    }
 
开发者ID:viadeo,项目名称:viadeo-avro-utils,代码行数:38,代码来源:DiffJob.java

示例4: setup

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
@Override
public void setup(Context context) throws IOException, InterruptedException {
  LOGGER.info("DerivedColumnTransformationPhaseJob.DerivedColumnTransformationPhaseMapper.setup()");
  Configuration configuration = context.getConfiguration();
  FileSystem fs = FileSystem.get(configuration);

  FileSplit fileSplit = (FileSplit) context.getInputSplit();
  inputFileName = fileSplit.getPath().getName();
  inputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(ThirdEyeConstants.AVRO_SUFFIX));
  LOGGER.info("split name:" + inputFileName);

  thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
  config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
  dimensionsNames = config.getDimensionNames();
  dimensionsTypes = config.getDimensionTypes();
  metricNames = config.getMetricNames();
  metricTypes = config.getMetricTypes();
  timeColumnName = config.getTimeColumnName();
  whitelist = config.getWhitelist();
  nonWhitelistValueMap = config.getNonWhitelistValue();

  outputSchema = new Schema.Parser().parse(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));

  Path topKPath = new Path(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
      + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
  topKDimensionValues = new TopKDimensionValues();
  if (fs.exists(topKPath)) {
    FSDataInputStream topkValuesStream = fs.open(topKPath);
    topKDimensionValues = OBJECT_MAPPER.readValue((DataInput) topkValuesStream, TopKDimensionValues.class);
    topkValuesStream.close();
  }
  topKDimensionsMap = topKDimensionValues.getTopKDimensions();

  avroMultipleOutputs = new AvroMultipleOutputs(context);
}
 
开发者ID:linkedin,项目名称:pinot,代码行数:36,代码来源:DerivedColumnTransformationPhaseJob.java

示例5: getDefaultAvroNamedOutput

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
protected AvroMultipleOutputs getDefaultAvroNamedOutput() {
	return getAvroNamedOutput("default");
}
 
开发者ID:conversant,项目名称:mara,代码行数:4,代码来源:BaseMRUnitTest.java

示例6: getAvroNamedOutput

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
protected AvroMultipleOutputs getAvroNamedOutput(String name) {
	return this.avroNamedOutputs.get(name);
}
 
开发者ID:conversant,项目名称:mara,代码行数:4,代码来源:BaseMRUnitTest.java

示例7: setup

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
@Override
protected void setup(org.apache.hadoop.mapreduce.Reducer<RecordKey, AvroValue<Record>, NullWritable, AvroValue<Record>>.Context context)
  throws IOException, InterruptedException {
  multipleOutputsAvro = new AvroMultipleOutputs(context);
  partitions.clear();
}
 
开发者ID:ggear,项目名称:cloudera-framework,代码行数:7,代码来源:Partition.java

示例8: setup

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
@Override
protected void setup(Context context) {
    amos = new AvroMultipleOutputs(context);
}
 
开发者ID:viadeo,项目名称:viadeo-avro-utils,代码行数:5,代码来源:DiffJob.java

示例9: run

import org.apache.avro.mapreduce.AvroMultipleOutputs; //导入依赖的package包/类
public Job run() throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJobName(name);
  job.setJarByClass(DerivedColumnTransformationPhaseJob.class);

  Configuration configuration = job.getConfiguration();
  FileSystem fs = FileSystem.get(configuration);

  // Input Path
  String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
  LOGGER.info("Input path dir: " + inputPathDir);
  for (String inputPath : inputPathDir.split(",")) {
    LOGGER.info("Adding input:" + inputPath);
    Path input = new Path(inputPath);
    FileInputFormat.addInputPath(job, input);
  }

  // Topk path
  String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
  LOGGER.info("Topk path : " + topkPath);

  // Output path
  Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
  LOGGER.info("Output path dir: " + outputPath.toString());
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  FileOutputFormat.setOutputPath(job, outputPath);

  // Schema
  Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
  LOGGER.info("Schema : {}", avroSchema.toString(true));

  // ThirdEyeConfig
  String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
  String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
      props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
  props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
  ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
  job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
      OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
  LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());

  // New schema
  Schema outputSchema = newSchema(thirdeyeConfig);
  job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString());

  // Map config
  job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
  job.setInputFormatClass(AvroKeyInputFormat.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(NullWritable.class);
  AvroJob.setOutputKeySchema(job, outputSchema);
  LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
  AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);

  job.setNumReduceTasks(0);

  job.waitForCompletion(true);

  return job;
}
 
开发者ID:linkedin,项目名称:pinot,代码行数:66,代码来源:DerivedColumnTransformationPhaseJob.java


注:本文中的org.apache.avro.mapreduce.AvroMultipleOutputs类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。