当前位置: 首页>>代码示例>>Java>>正文


Java AvroValue类代码示例

本文整理汇总了Java中org.apache.avro.mapred.AvroValue的典型用法代码示例。如果您正苦于以下问题:Java AvroValue类的具体用法?Java AvroValue怎么用?Java AvroValue使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


AvroValue类属于org.apache.avro.mapred包,在下文中一共展示了AvroValue类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context)
    throws IOException, InterruptedException {
  int numVals = 0;

  for (AvroValue<GenericRecord> value : values) {
    outKey.datum(value.datum());
    numVals++;
  }

  if (numVals > 1) {
    context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1);
    context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1);
  }

  context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);

  context.write(outKey, NullWritable.get());
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:20,代码来源:AvroKeyDedupReducer.java

示例2: map

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void map(RecordKey key, AvroGenericRecordWritable value,
                   org.apache.hadoop.mapreduce.Mapper<RecordKey, AvroGenericRecordWritable, RecordKey, AvroValue<Record>>.Context
                     context)
  throws IOException, InterruptedException {
  if (key.isValid()) {
    key.setHash(recordValue.hashCode());
    recordWrapped.datum((Record) value.getRecord());
    context.write(recordKey, recordWrapped);
  } else {
    context.getCounter(RecordCounter.RECORDS).increment(1);
    context.getCounter(RecordCounter.RECORDS_MALFORMED).increment(1);
    textValue.set(key.getSource());
    string.setLength(0);
    multipleOutputs.write(OUTPUT_TEXT, NullWritable.get(), textValue,
      string.append(MALFORMED_PATH_PREFIX).append(key.getBatch()).toString());
  }
}
 
开发者ID:ggear,项目名称:cloudera-framework,代码行数:19,代码来源:Partition.java

示例3: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(RecordKey key, Iterable<AvroValue<Record>> values,
                      org.apache.hadoop.mapreduce.Reducer<RecordKey, AvroValue<Record>, NullWritable, AvroValue<Record>>
                        .Context context)
  throws IOException, InterruptedException {
  records.clear();
  for (AvroValue<Record> record : values) {
    context.getCounter(RecordCounter.RECORDS).increment(1);
    RecordCounter counter = records.add(record) ? RecordCounter.RECORDS_CANONICAL : RecordCounter.RECORDS_DUPLICATE;
    context.getCounter(counter).increment(1);
    this.record.datum(record.datum());
    calendar.setTimeInMillis(record.datum().getMyTimestamp());
    string.setLength(0);
    string
      .append(counter.equals(RecordCounter.RECORDS_CANONICAL) ? Constants.DIR_REL_MYDS_PARTITIONED_CANONICAL_AVRO
        : Constants.DIR_REL_MYDS_PARTITIONED_DUPLICATE_AVRO)
      .append(Path.SEPARATOR_CHAR).append(PARTITION_YEAR).append(calendar.get(Calendar.YEAR)).append(Path.SEPARATOR_CHAR)
      .append(PARTITION_MONTH).append(calendar.get(Calendar.MONTH) + 1).append(Path.SEPARATOR_CHAR);
    partitions.add(string.toString());
    multipleOutputsAvro.write(OUTPUT_AVRO, this.record, NullWritable.get(), string.append(PARTITION_FILE).toString());
  }
}
 
开发者ID:ggear,项目名称:cloudera-framework,代码行数:23,代码来源:Partition.java

示例4: createRecordReader

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(
    InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException {
  Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
  if (null == keyReaderSchema) {
    LOG.warn("Key reader schema was not set. " +
        "Use AvroJob.setInputKeySchema() if desired.");
    LOG.info("Using a key reader schema equal to the writer schema.");
  }
  Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
  if (null == valueReaderSchema) {
    LOG.warn("Value reader schema was not set. " +
        "Use AvroJob.setInputValueSchema() if desired.");
    LOG.info("Using a value reader schema equal to the writer schema.");
  }
  return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}
 
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:19,代码来源:AvroKeyValueInputFormat.java

示例5: testReduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testReduce() throws IOException {
    LongWritable idx = new LongWritable(3L);
    List<AvroValue<SerializableBill>> wrappedBills = new ArrayList<AvroValue<SerializableBill>>(2);
    for (SerializableBill bill : getTestBills()) {
        wrappedBills.add(new AvroValue<SerializableBill>(bill));
    }

    LongIndexedSerializableBill bills = new LongIndexedSerializableBill();
    bills.setIdx(idx.get());
    bills.setBills(getTestBills());

    reduceDriver.setInput(idx, wrappedBills);
    List<Pair<AvroKey<LongIndexedSerializableBill>, NullWritable>> outputRecords = reduceDriver.run();

    assertEquals(1, outputRecords.size());
    LongIndexedSerializableBill actualBills = outputRecords.get(0).getFirst().datum();
    assertEquals(Long.valueOf(3L), actualBills.getIdx());
    assertEquals(3, actualBills.getBills().size());
    assertEquals(Long.valueOf(1L), actualBills.getBills().get(0).getId());
    assertEquals(Long.valueOf(2L), actualBills.getBills().get(1).getId());
    assertEquals(Long.valueOf(3L), actualBills.getBills().get(2).getId());
}
 
开发者ID:ch4mpy,项目名称:hadoop2,代码行数:24,代码来源:BillByLongAvroReducerTest.java

示例6: testReduceIndexedOnCustomerId

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testReduceIndexedOnCustomerId() throws IOException {
    List<AvroValue<PostcodeCategoryTurnoverTmp>> input = new ArrayList<AvroValue<PostcodeCategoryTurnoverTmp>>(3);
    input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, null, 0.0)));
    input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(2L, "FR-92100", 3L, null, 0.0)));
    input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, null, 0.0)));
    input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(null, null, 3L, "High tech", 199.0)));
    reduceDriver.setInput(new LongWritable(3L), input);

    reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, "High tech", 199.0)),
            NullWritable.get());
    reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(2L, "FR-92100", 3L, "High tech", 199.0)),
            NullWritable.get());
    reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, "High tech", 199.0)),
            NullWritable.get());

    reduceDriver.runTest(false);
}
 
开发者ID:ch4mpy,项目名称:hadoop2,代码行数:19,代码来源:PostcodeCategoryTurnoverTmpByProductIdReducerTest.java

示例7: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(LongWritable idx, Iterable<AvroValue<SerializableBill>> bills, Context context) throws IOException, InterruptedException {
    LongIndexedSerializableBill.Builder builder = LongIndexedSerializableBill.newBuilder();
    builder.setIdx(idx.get());

    // A product is likely to be several times in the same bill (i.e.
    // quantity > 1)
    // Using a Map to keep a single instance in such cases
    Map<Long, SerializableBill> allBills = new HashMap<Long, SerializableBill>();
    for (AvroValue<SerializableBill> value : bills) {
        SerializableBill bill = value.datum();
        if (!allBills.containsKey(bill.getId())) {
            allBills.put(bill.getId(), SerializableBill.newBuilder(bill).build());
        }
    }
    builder.setBills(new ArrayList<SerializableBill>(allBills.values()));

    context.write(new AvroKey<LongIndexedSerializableBill>(builder.build()), NullWritable.get());
}
 
开发者ID:ch4mpy,项目名称:hadoop2,代码行数:20,代码来源:BillByLongAvroReducer.java

示例8: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public void reduce(AvroKey<String> key, Iterable<AvroValue<IndexedRecord>> values, Context context) 
           throws IOException, InterruptedException {
       Iterator<AvroValue<IndexedRecord>> iterator = values.iterator();
       List<IndexedRecord> objects = new ArrayList<IndexedRecord>();
       
       while (iterator.hasNext()) {            
           AvroValue<IndexedRecord> value = iterator.next();
           objects.add((IndexedRecord) AvroUtils.getCopy(value.datum(), inputSchema, inputSchemaClass));
       }
       
       List<IndexedRecord> collapsedList = recordCollapser.collapse(objects);
       for (IndexedRecord collapsed : collapsedList) {
           context.write(new AvroKey<IndexedRecord>(collapsed), NullWritable.get());
       }
}
 
开发者ID:openaire,项目名称:iis,代码行数:17,代码来源:CollapserReducer.java

示例9: testGroupingWithInvalidBlockingField

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testGroupingWithInvalidBlockingField() throws Exception {
    // given
    Configuration conf = new Configuration();
    conf.set(GroupByFieldMapper.BLOCKING_FIELD, "invalid");
    doReturn(conf).when(context).getConfiguration();
    mapper.setup(context);
    String idValue = "someId";
    Identifier id = Identifier.newBuilder().setId(idValue).build();
    
    // execute
    mapper.map(new AvroKey<>(id), null, context);
    
    // validate
    verify(context, times(1)).write(new AvroKey<String>(null), new AvroValue<Identifier>(id));
}
 
开发者ID:openaire,项目名称:iis,代码行数:17,代码来源:GroupByFieldMapperTest.java

示例10: testGrouping

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testGrouping() throws Exception {
    // given
    Configuration conf = new Configuration();
    conf.set(GroupByFieldMapper.BLOCKING_FIELD, "id");
    doReturn(conf).when(context).getConfiguration();
    mapper.setup(context);
    String idValue = "someId";
    Identifier id = Identifier.newBuilder().setId(idValue).build();
    
    // execute
    mapper.map(new AvroKey<>(id), null, context);
    
    // validate
    verify(context, times(1)).write(new AvroKey<String>(idValue), new AvroValue<Identifier>(id));
}
 
开发者ID:openaire,项目名称:iis,代码行数:17,代码来源:GroupByFieldMapperTest.java

示例11: getPartition

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public int getPartition(AvroKey<String> key,
                        AvroValue<Integer> value,
                        int numPartitions)
{
  String k = key.datum().toString();
  if (_partitionIdMap!=null)
  {
    if (_partitionIdMap.containsKey(k))
    {
      int partitionId = _partitionIdMap.get(k);
      return partitionId % numPartitions;
    }
  }
  return Math.abs(k.hashCode()) % numPartitions;
}
 
开发者ID:linkedin,项目名称:ml-ease,代码行数:17,代码来源:RegressionNaiveTrain.java

示例12: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context)
    throws IOException, InterruptedException {
  int numVals = 0;

  AvroValue<GenericRecord> valueToRetain = null;

  for (AvroValue<GenericRecord> value : values) {
    if (valueToRetain == null) {
      valueToRetain = value;
    } else if (this.deltaComparatorOptional.isPresent()) {
      valueToRetain = this.deltaComparatorOptional.get().compare(valueToRetain, value) >= 0 ? valueToRetain : value;
    }
    numVals++;
  }
  this.outKey.datum(valueToRetain.datum());

  if (numVals > 1) {
    context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1);
    context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1);
  }

  context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);

  context.write(this.outKey, NullWritable.get());
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:27,代码来源:AvroKeyDedupReducer.java

示例13: testMapReduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
  MyAvroReducer reducer = new MyAvroReducer();

  // Configure a job.
  Job job = new Job();
  // We've got to do a little hacking here since mrunit doesn't run exactly like
  // the real hadoop mapreduce framework.
  AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
  AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
  AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

  // Run the reducer.
  ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
      = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
  driver.setReducer(reducer);
  driver.withConfiguration(job.getConfiguration());
  driver.withInput(new Text("foo"),
      Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
  List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
  assertEquals(1, output.size());
  assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
 
开发者ID:kijiproject,项目名称:kiji-mapreduce-lib,代码行数:24,代码来源:TestAvroReducer.java

示例14: testMapReduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
  MyNodeReducer reducer = new MyNodeReducer();

  // Configure a job.
  Job job = new Job();
  // We've got to do a little hacking here since mrunit doesn't run exactly like
  // the real hadoop mapreduce framework.
  AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
  AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
  AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);

  ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
      = new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
  driver.setReducer(reducer);
  driver.withConfiguration(job.getConfiguration());
  driver.withInput(
      new Text("foo"),
      Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
  List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
  assertEquals(1, output.size());
  assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
 
开发者ID:kijiproject,项目名称:kiji-mapreduce-lib,代码行数:24,代码来源:TestNodeReducer.java

示例15: reduce

import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
/** {@inheritDoc} */
@Override
protected void reduce(AvroKey<CharSequence> key, Iterable<AvroValue<SongCount>> values,
    KijiTableContext context) throws IOException {
  // We are reusing objects, so we should make sure they are cleared for each new key.
  mTopNextSongs.clear();

  // Iterate through the song counts and track the top ${mNumberOfTopSongs} counts.
  for (AvroValue<SongCount> value : values) {
    // Remove AvroValue wrapper.
    SongCount currentSongCount = SongCount.newBuilder(value.datum()).build();

    mTopNextSongs.add(currentSongCount);
    // If we now have too many elements, remove the element with the smallest count.
    if (mTopNextSongs.size() > mNumberOfTopSongs) {
      mTopNextSongs.pollFirst();
    }
  }
  // Set the field of mTopSongs to be a list of SongCounts corresponding to the top songs played
  // next for this key/song.
  mTopSongs.setTopSongs(Lists.newArrayList(mTopNextSongs));
  // Write this to the song table.
  context.put(context.getEntityId(key.datum().toString()), "info", "top_next_songs", mTopSongs);
}
 
开发者ID:kijiproject,项目名称:kiji-music,代码行数:25,代码来源:TopNextSongsReducer.java


注:本文中的org.apache.avro.mapred.AvroValue类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。