本文整理汇总了Java中org.apache.avro.mapred.AvroValue类的典型用法代码示例。如果您正苦于以下问题:Java AvroValue类的具体用法?Java AvroValue怎么用?Java AvroValue使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AvroValue类属于org.apache.avro.mapred包,在下文中一共展示了AvroValue类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context)
throws IOException, InterruptedException {
int numVals = 0;
for (AvroValue<GenericRecord> value : values) {
outKey.datum(value.datum());
numVals++;
}
if (numVals > 1) {
context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1);
context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1);
}
context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
context.write(outKey, NullWritable.get());
}
示例2: map
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void map(RecordKey key, AvroGenericRecordWritable value,
org.apache.hadoop.mapreduce.Mapper<RecordKey, AvroGenericRecordWritable, RecordKey, AvroValue<Record>>.Context
context)
throws IOException, InterruptedException {
if (key.isValid()) {
key.setHash(recordValue.hashCode());
recordWrapped.datum((Record) value.getRecord());
context.write(recordKey, recordWrapped);
} else {
context.getCounter(RecordCounter.RECORDS).increment(1);
context.getCounter(RecordCounter.RECORDS_MALFORMED).increment(1);
textValue.set(key.getSource());
string.setLength(0);
multipleOutputs.write(OUTPUT_TEXT, NullWritable.get(), textValue,
string.append(MALFORMED_PATH_PREFIX).append(key.getBatch()).toString());
}
}
示例3: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(RecordKey key, Iterable<AvroValue<Record>> values,
org.apache.hadoop.mapreduce.Reducer<RecordKey, AvroValue<Record>, NullWritable, AvroValue<Record>>
.Context context)
throws IOException, InterruptedException {
records.clear();
for (AvroValue<Record> record : values) {
context.getCounter(RecordCounter.RECORDS).increment(1);
RecordCounter counter = records.add(record) ? RecordCounter.RECORDS_CANONICAL : RecordCounter.RECORDS_DUPLICATE;
context.getCounter(counter).increment(1);
this.record.datum(record.datum());
calendar.setTimeInMillis(record.datum().getMyTimestamp());
string.setLength(0);
string
.append(counter.equals(RecordCounter.RECORDS_CANONICAL) ? Constants.DIR_REL_MYDS_PARTITIONED_CANONICAL_AVRO
: Constants.DIR_REL_MYDS_PARTITIONED_DUPLICATE_AVRO)
.append(Path.SEPARATOR_CHAR).append(PARTITION_YEAR).append(calendar.get(Calendar.YEAR)).append(Path.SEPARATOR_CHAR)
.append(PARTITION_MONTH).append(calendar.get(Calendar.MONTH) + 1).append(Path.SEPARATOR_CHAR);
partitions.add(string.toString());
multipleOutputsAvro.write(OUTPUT_AVRO, this.record, NullWritable.get(), string.append(PARTITION_FILE).toString());
}
}
示例4: createRecordReader
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(
InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
if (null == keyReaderSchema) {
LOG.warn("Key reader schema was not set. " +
"Use AvroJob.setInputKeySchema() if desired.");
LOG.info("Using a key reader schema equal to the writer schema.");
}
Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
if (null == valueReaderSchema) {
LOG.warn("Value reader schema was not set. " +
"Use AvroJob.setInputValueSchema() if desired.");
LOG.info("Using a value reader schema equal to the writer schema.");
}
return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}
示例5: testReduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testReduce() throws IOException {
LongWritable idx = new LongWritable(3L);
List<AvroValue<SerializableBill>> wrappedBills = new ArrayList<AvroValue<SerializableBill>>(2);
for (SerializableBill bill : getTestBills()) {
wrappedBills.add(new AvroValue<SerializableBill>(bill));
}
LongIndexedSerializableBill bills = new LongIndexedSerializableBill();
bills.setIdx(idx.get());
bills.setBills(getTestBills());
reduceDriver.setInput(idx, wrappedBills);
List<Pair<AvroKey<LongIndexedSerializableBill>, NullWritable>> outputRecords = reduceDriver.run();
assertEquals(1, outputRecords.size());
LongIndexedSerializableBill actualBills = outputRecords.get(0).getFirst().datum();
assertEquals(Long.valueOf(3L), actualBills.getIdx());
assertEquals(3, actualBills.getBills().size());
assertEquals(Long.valueOf(1L), actualBills.getBills().get(0).getId());
assertEquals(Long.valueOf(2L), actualBills.getBills().get(1).getId());
assertEquals(Long.valueOf(3L), actualBills.getBills().get(2).getId());
}
示例6: testReduceIndexedOnCustomerId
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testReduceIndexedOnCustomerId() throws IOException {
List<AvroValue<PostcodeCategoryTurnoverTmp>> input = new ArrayList<AvroValue<PostcodeCategoryTurnoverTmp>>(3);
input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, null, 0.0)));
input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(2L, "FR-92100", 3L, null, 0.0)));
input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, null, 0.0)));
input.add(new AvroValue<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(null, null, 3L, "High tech", 199.0)));
reduceDriver.setInput(new LongWritable(3L), input);
reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, "High tech", 199.0)),
NullWritable.get());
reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(2L, "FR-92100", 3L, "High tech", 199.0)),
NullWritable.get());
reduceDriver.addOutput(new AvroKey<PostcodeCategoryTurnoverTmp>(new PostcodeCategoryTurnoverTmp(1L, "FR-04000", 3L, "High tech", 199.0)),
NullWritable.get());
reduceDriver.runTest(false);
}
示例7: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(LongWritable idx, Iterable<AvroValue<SerializableBill>> bills, Context context) throws IOException, InterruptedException {
LongIndexedSerializableBill.Builder builder = LongIndexedSerializableBill.newBuilder();
builder.setIdx(idx.get());
// A product is likely to be several times in the same bill (i.e.
// quantity > 1)
// Using a Map to keep a single instance in such cases
Map<Long, SerializableBill> allBills = new HashMap<Long, SerializableBill>();
for (AvroValue<SerializableBill> value : bills) {
SerializableBill bill = value.datum();
if (!allBills.containsKey(bill.getId())) {
allBills.put(bill.getId(), SerializableBill.newBuilder(bill).build());
}
}
builder.setBills(new ArrayList<SerializableBill>(allBills.values()));
context.write(new AvroKey<LongIndexedSerializableBill>(builder.build()), NullWritable.get());
}
示例8: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public void reduce(AvroKey<String> key, Iterable<AvroValue<IndexedRecord>> values, Context context)
throws IOException, InterruptedException {
Iterator<AvroValue<IndexedRecord>> iterator = values.iterator();
List<IndexedRecord> objects = new ArrayList<IndexedRecord>();
while (iterator.hasNext()) {
AvroValue<IndexedRecord> value = iterator.next();
objects.add((IndexedRecord) AvroUtils.getCopy(value.datum(), inputSchema, inputSchemaClass));
}
List<IndexedRecord> collapsedList = recordCollapser.collapse(objects);
for (IndexedRecord collapsed : collapsedList) {
context.write(new AvroKey<IndexedRecord>(collapsed), NullWritable.get());
}
}
示例9: testGroupingWithInvalidBlockingField
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testGroupingWithInvalidBlockingField() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(GroupByFieldMapper.BLOCKING_FIELD, "invalid");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
String idValue = "someId";
Identifier id = Identifier.newBuilder().setId(idValue).build();
// execute
mapper.map(new AvroKey<>(id), null, context);
// validate
verify(context, times(1)).write(new AvroKey<String>(null), new AvroValue<Identifier>(id));
}
示例10: testGrouping
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testGrouping() throws Exception {
// given
Configuration conf = new Configuration();
conf.set(GroupByFieldMapper.BLOCKING_FIELD, "id");
doReturn(conf).when(context).getConfiguration();
mapper.setup(context);
String idValue = "someId";
Identifier id = Identifier.newBuilder().setId(idValue).build();
// execute
mapper.map(new AvroKey<>(id), null, context);
// validate
verify(context, times(1)).write(new AvroKey<String>(idValue), new AvroValue<Identifier>(id));
}
示例11: getPartition
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
public int getPartition(AvroKey<String> key,
AvroValue<Integer> value,
int numPartitions)
{
String k = key.datum().toString();
if (_partitionIdMap!=null)
{
if (_partitionIdMap.containsKey(k))
{
int partitionId = _partitionIdMap.get(k);
return partitionId % numPartitions;
}
}
return Math.abs(k.hashCode()) % numPartitions;
}
示例12: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Override
protected void reduce(AvroKey<GenericRecord> key, Iterable<AvroValue<GenericRecord>> values, Context context)
throws IOException, InterruptedException {
int numVals = 0;
AvroValue<GenericRecord> valueToRetain = null;
for (AvroValue<GenericRecord> value : values) {
if (valueToRetain == null) {
valueToRetain = value;
} else if (this.deltaComparatorOptional.isPresent()) {
valueToRetain = this.deltaComparatorOptional.get().compare(valueToRetain, value) >= 0 ? valueToRetain : value;
}
numVals++;
}
this.outKey.datum(valueToRetain.datum());
if (numVals > 1) {
context.getCounter(EVENT_COUNTER.MORE_THAN_1).increment(1);
context.getCounter(EVENT_COUNTER.DEDUPED).increment(numVals - 1);
}
context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
context.write(this.outKey, NullWritable.get());
}
示例13: testMapReduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyAvroReducer reducer = new MyAvroReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
// Run the reducer.
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例14: testMapReduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
@Test
public void testMapReduce() throws IOException {
MyNodeReducer reducer = new MyNodeReducer();
// Configure a job.
Job job = new Job();
// We've got to do a little hacking here since mrunit doesn't run exactly like
// the real hadoop mapreduce framework.
AvroJob.setMapOutputKeySchema(job, Node.SCHEMA$);
AvroJob.setOutputKeySchema(job, reducer.getAvroKeyWriterSchema());
AvroSerialization.setValueWriterSchema(job.getConfiguration(), Node.SCHEMA$);
ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable> driver
= new ReduceDriver<Text, AvroValue<Node>, AvroKey<Node>, NullWritable>();
driver.setReducer(reducer);
driver.withConfiguration(job.getConfiguration());
driver.withInput(
new Text("foo"),
Collections.singletonList(new AvroValue<Node>(new NodeBuilder("bar", 1.0).build())));
List<Pair<AvroKey<Node>, NullWritable>> output = driver.run();
assertEquals(1, output.size());
assertEquals("bar", output.get(0).getFirst().datum().getLabel().toString());
}
示例15: reduce
import org.apache.avro.mapred.AvroValue; //导入依赖的package包/类
/** {@inheritDoc} */
@Override
protected void reduce(AvroKey<CharSequence> key, Iterable<AvroValue<SongCount>> values,
KijiTableContext context) throws IOException {
// We are reusing objects, so we should make sure they are cleared for each new key.
mTopNextSongs.clear();
// Iterate through the song counts and track the top ${mNumberOfTopSongs} counts.
for (AvroValue<SongCount> value : values) {
// Remove AvroValue wrapper.
SongCount currentSongCount = SongCount.newBuilder(value.datum()).build();
mTopNextSongs.add(currentSongCount);
// If we now have too many elements, remove the element with the smallest count.
if (mTopNextSongs.size() > mNumberOfTopSongs) {
mTopNextSongs.pollFirst();
}
}
// Set the field of mTopSongs to be a list of SongCounts corresponding to the top songs played
// next for this key/song.
mTopSongs.setTopSongs(Lists.newArrayList(mTopNextSongs));
// Write this to the song table.
context.put(context.getEntityId(key.datum().toString()), "info", "top_next_songs", mTopSongs);
}