本文整理匯總了Java中org.apache.avro.file.DataFileStream.next方法的典型用法代碼示例。如果您正苦於以下問題:Java DataFileStream.next方法的具體用法?Java DataFileStream.next怎麽用?Java DataFileStream.next使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.avro.file.DataFileStream
的用法示例。
在下文中一共展示了DataFileStream.next方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: deserialize
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Deserializes the bytes as an array of Generic containers.
*
* <p>The bytes include a standard Avro header that contains a magic byte, the
* record's Avro schema (and so on), followed by the byte representation of the record.
*
* <p>Implementation detail: This method uses Avro's {@code DataFileWriter}.
* @schema Schema associated with this container
* @return A Generic Container class
*/
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
GenericContainer ret = null;
List<GenericContainer> retList = new ArrayList<>();
if (container != null) {
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
ByteArrayInputStream in = new ByteArrayInputStream(container);
DataFileStream<GenericContainer> reader =
new DataFileStream<GenericContainer>(in, datumReader);
while (reader.hasNext()) {
ret = reader.next(ret);
retList.add(ret);
}
return retList.toArray(new GenericContainer[retList.size()]);
} else {
return null;
}
}
示例2: deserialize
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Deserializes the bytes as an array of Generic containers.
*
* <p>The bytes include a standard Avro header that contains a magic byte, the
* record's Avro schema (and so on), followed by the byte representation of the record.
*
* <p>Implementation detail: This method uses Avro's {@code DataFileWriter}.
*
* @return A Generic Container class
* @schema Schema associated with this container
*/
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
GenericContainer ret = null;
List<GenericContainer> retList = new ArrayList<>();
if (container != null) {
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
ByteArrayInputStream in = new ByteArrayInputStream(container);
DataFileStream<GenericContainer> reader = new DataFileStream<GenericContainer>(
in,
datumReader
);
while (reader.hasNext()) {
ret = reader.next(ret);
retList.add(ret);
}
return retList.toArray(new GenericContainer[retList.size()]);
} else {
return null;
}
}
示例3: getRealtimeSegment
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private IndexSegment getRealtimeSegment() throws IOException {
RealtimeSegmentImpl realtimeSegmentImpl = new RealtimeSegmentImpl(PINOT_SCHEMA, 100000);
try {
DataFileStream<GenericRecord> avroReader =
AvroUtils.getAvroReader(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(
AVRO_DATA))));
while (avroReader.hasNext()) {
GenericRecord avroRecord = avroReader.next();
GenericRow genericRow = AVRO_RECORD_TRANSFORMER.transform(avroRecord);
// System.out.println(genericRow);
realtimeSegmentImpl.index(genericRow);
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Current raw events indexed: " + realtimeSegmentImpl.getRawDocumentCount() + ", totalDocs = "
+ realtimeSegmentImpl.getTotalDocs());
realtimeSegmentImpl.setSegmentName("testTable_testTable");
realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
return realtimeSegmentImpl;
}
示例4: loadMemberCount
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private Long loadMemberCount(Path path, String timestamp) throws IOException
{
FileSystem fs = getFileSystem();
Assert.assertTrue(fs.exists(new Path(path, timestamp)));
for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
GenericRecord r = dataFileStream.next();
Long count = (Long)((GenericRecord)r.get("value")).get("count");
Assert.assertNotNull(count);
System.out.println("found count: " + count);
return count;
}
finally
{
dataFileStream.close();
}
}
throw new RuntimeException("found no data");
}
示例5: getTimeColumnStatsCollector
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
throws FileNotFoundException, IOException {
String timeColumnName = schema.getTimeColumnName();
FieldSpec spec = schema.getTimeFieldSpec();
LOGGER.info("Spec for " + timeColumnName + " is " + spec);
LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
DataFileStream<GenericRecord> dataStream =
new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
while (dataStream.hasNext()) {
GenericRecord next = dataStream.next();
timeColumnStatisticsCollector.collect(next.get(timeColumnName));
}
dataStream.close();
timeColumnStatisticsCollector.seal();
return timeColumnStatisticsCollector;
}
示例6: verifyOutputAvroFiles
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix,
List<String> bodies) throws IOException {
int found = 0;
int expected = bodies.size();
for (String outputFile : getAllFiles(dir)) {
String name = (new File(outputFile)).getName();
if (name.startsWith(prefix)) {
FSDataInputStream input = fs.open(new Path(outputFile));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(input, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
ByteBuffer body = (ByteBuffer) record.get("body");
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode(body).toString();
LOG.debug("Removing event: {}", bodyStr);
bodies.remove(bodyStr);
found++;
}
avroStream.close();
input.close();
}
}
Assert.assertTrue("Found = " + found + ", Expected = " +
expected + ", Left = " + bodies.size() + " " + bodies,
bodies.size() == 0);
}
示例7: testGzipDurabilityWithSerializer
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void testGzipDurabilityWithSerializer() throws Exception {
Context context = new Context();
context.put("serializer", "AVRO_EVENT");
HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
writer.configure(context);
writer.open(fileURI, factory.getCodec(new Path(fileURI)),
SequenceFile.CompressionType.BLOCK);
String[] bodies = { "yarf!", "yarfing!" };
writeBodies(writer, bodies);
int found = 0;
int expected = bodies.length;
List<String> expectedBodies = Lists.newArrayList(bodies);
GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(cmpIn, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode((ByteBuffer) record.get("body"))
.toString();
expectedBodies.remove(bodyStr);
found++;
}
avroStream.close();
cmpIn.close();
Assert.assertTrue("Found = " + found + ", Expected = " + expected
+ ", Left = " + expectedBodies.size() + " " + expectedBodies,
expectedBodies.size() == 0);
}
示例8: deserialize
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public Employee deserialize(InputStream in) throws IOException
{
GenericDatumReader datum = new GenericDatumReader(schema);
DataFileStream<GenericData.Record> reader = new DataFileStream<GenericData.Record>(in, datum);
Employee employee = null;
try
{
GenericData.Record record = new GenericData.Record(schema);
if(reader.hasNext())
{
employee = new Employee();
reader.next(record);
employee.setName(AvroUtils.get("name", record, "").toString());
employee.setAge(Integer.parseInt(AvroUtils.get("yrs", record, "0").toString()));
employee.setMails(AvroUtils.getArray(record, "emails", Object::toString));
}
return employee;
}
finally
{
reader.close();
}
}
示例9: processAvroMessage
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Processes a blob which contains one or more Avro messages and has the
* schema embedded. This is the pattern when Avro objects are read from a
* file (either local file system or HDFS). Every Avro object in the blob is
* converted to JSON and then submitted to the output port.
*
* @param avroMessage
* The Blob that holds one or more Avro objects and the schema
* @param outStream
* The stream to which the JSON string must be submitted
* @param outTuple
* The tuple holding the JSON string
* @throws Exception
*/
private void processAvroMessage(Blob avroMessage, StreamingOutput<OutputTuple> outStream, OutputTuple outTuple)
throws Exception {
ByteArrayInputStream is = new ByteArrayInputStream(avroMessage.getData());
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileReader = new DataFileStream<GenericRecord>(is, reader);
GenericRecord consumedDatum = null;
while (dataFileReader.hasNext()) {
consumedDatum = dataFileReader.next(consumedDatum);
if (LOGGER.isTraceEnabled())
LOGGER.log(TraceLevel.TRACE, "JSON representation of Avro message: " + consumedDatum.toString());
// Submit new tuple to output port 0
outTuple.setString(outputJsonMessage, consumedDatum.toString());
outStream.submit(outTuple);
}
is.close();
dataFileReader.close();
}
示例10: test1
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void test1() throws Exception {
// load segment in heap mode
final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR, ReadMode.heap);
// compare the loaded inverted index with the record in avro file
final DataFileStream<GenericRecord> reader =
new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader()
.getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
int docId = 0;
while (reader.hasNext()) {
final GenericRecord rec = reader.next();
for (final String column : ((SegmentMetadataImpl) heapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
Object entry = rec.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
final int dicId = heapSegment.getDictionaryFor(column).indexOf(entry);
// make sure that docId for dicId exist in the inverted index
Assert.assertEquals(true, heapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
final int size = heapSegment.getDictionaryFor(column).length();
for (int i = 0; i < size; ++i) { // remove this for-loop for quick test
if (i == dicId) {
continue;
}
// make sure that docId for dicId does not exist in the inverted index
Assert.assertEquals(false, heapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
}
}
++docId;
}
}
示例11: test2
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void test2() throws Exception {
// load segment in mmap mode
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR, ReadMode.mmap);
// compare the loaded inverted index with the record in avro file
final DataFileStream<GenericRecord> reader =
new DataFileStream<GenericRecord>(new FileInputStream(new File(getClass().getClassLoader()
.getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>());
int docId = 0;
while (reader.hasNext()) {
final GenericRecord rec = reader.next();
for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
Object entry = rec.get(column);
if (entry instanceof Utf8) {
entry = ((Utf8) entry).toString();
}
final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry);
// make sure that docId for dicId exist in the inverted index
Assert.assertEquals(true, mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId));
final int size = mmapSegment.getDictionaryFor(column).length();
for (int i = 0; i < size; ++i) { // remove this for-loop for quick test
if (i == dicId) {
continue;
}
// make sure that docId for dicId does not exist in the inverted index
Assert.assertEquals(false, mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId));
}
}
++docId;
}
}
示例12: computeU
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private void computeU(JobConf conf, String uPath, String uplusxPath, Map<String, LinearModel> z) throws IOException
{
AvroHdfsFileWriter<GenericRecord> writer =
new AvroHdfsFileWriter<GenericRecord>(conf, uPath, LinearModelAvro.SCHEMA$);
DataFileWriter<GenericRecord> recordwriter = writer.get();
// read u+x
for (Path path : Util.findPartFiles(conf, new Path(uplusxPath)))
{
DataFileStream<Object> stream = AvroUtils.getAvroDataStream(conf, path);
while (stream.hasNext())
{
GenericData.Record record = (GenericData.Record) stream.next();
String partitionID = Util.getStringAvro(record, "key", false);
if (record.get("uplusx") != null)
{
String lambda = Util.getLambda(partitionID);
LinearModel newu =
new LinearModel(LibLinearDataset.INTERCEPT_NAME, (List<?>) record.get("uplusx"));
newu.linearCombine(1.0, -1.0, z.get(lambda));
GenericData.Record newvaluemap =
new GenericData.Record(LinearModelAvro.SCHEMA$);
List modellist = newu.toAvro(LibLinearDataset.INTERCEPT_NAME);
newvaluemap.put("key", partitionID);
newvaluemap.put("model", modellist);
recordwriter.append(newvaluemap);
}
}
}
recordwriter.close();
}
示例13: loadOutputCounts
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private HashMap<Long,Integer> loadOutputCounts(Path path, String timestamp) throws IOException
{
HashMap<Long,Integer> counts = new HashMap<Long,Integer>();
FileSystem fs = getFileSystem();
Assert.assertTrue(fs.exists(new Path(path, timestamp)));
for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
while (dataFileStream.hasNext())
{
GenericRecord r = dataFileStream.next();
_log.info("found: " + r.toString());
Long memberId = (Long)((GenericRecord)r.get("key")).get("member_id");
Assert.assertNotNull(memberId);
Integer count = (Integer)((GenericRecord)r.get("value")).get("count");
Assert.assertNotNull(count);
Assert.assertFalse(counts.containsKey(memberId));
counts.put(memberId, count);
}
}
finally
{
dataFileStream.close();
}
}
return counts;
}
示例14: loadOutputCounts
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
HashMap<Long,Long> counts = new HashMap<Long,Long>();
FileSystem fs = getFileSystem();
String nestedPath = getNestedPathFromTimestamp(timestamp);
Assert.assertTrue(fs.exists(new Path(_outputPath, nestedPath)));
for (FileStatus stat : fs.globStatus(new Path(_outputPath,nestedPath + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
while (dataFileStream.hasNext())
{
GenericRecord r = dataFileStream.next();
Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
Long count = (Long)((GenericRecord)r.get("value")).get("count");
Assert.assertFalse(counts.containsKey(memberId));
counts.put(memberId, count);
}
}
finally
{
dataFileStream.close();
}
}
return counts;
}
示例15: loadOutputCounts
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
HashMap<Long,Long> counts = new HashMap<Long,Long>();
FileSystem fs = getFileSystem();
Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
while (dataFileStream.hasNext())
{
GenericRecord r = dataFileStream.next();
Long memberId = (Long)r.get("id");
Long count = (Long)r.get("count");
Assert.assertFalse(counts.containsKey(memberId));
counts.put(memberId, count);
}
}
finally
{
dataFileStream.close();
}
}
return counts;
}