本文整理汇总了Java中org.apache.avro.file.DataFileReader.openReader方法的典型用法代码示例。如果您正苦于以下问题:Java DataFileReader.openReader方法的具体用法?Java DataFileReader.openReader怎么用?Java DataFileReader.openReader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.file.DataFileReader
的用法示例。
在下文中一共展示了DataFileReader.openReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: AvroFileInputStream
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public AvroFileInputStream(FileStatus status) throws IOException {
pos = 0;
buffer = new byte[0];
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
FileContext fc = FileContext.getFileContext(new Configuration());
fileReader =
DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
Schema schema = fileReader.getSchema();
writer = new GenericDatumWriter<Object>(schema);
output = new ByteArrayOutputStream();
JsonGenerator generator =
new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
generator.setPrettyPrinter(prettyPrinter);
encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
示例2: initReader
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
示例3: testDeserializeToSpecificType
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
* This test validates proper serialization with specific (generated POJO) types.
*/
@Test
public void testDeserializeToSpecificType() throws IOException {
DatumReader<User> datumReader = new SpecificDatumReader<User>(userSchema);
try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
User rec = dataFileReader.next();
// check if record has been read correctly
assertNotNull(rec);
assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());
// now serialize it with our framework:
ExecutionConfig ec = new ExecutionConfig();
TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);
Assert.assertEquals(AvroTypeInfo.class, te.getClass());
TypeSerializer<User> tser = te.createSerializer(ec);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
tser.serialize(rec, outView);
}
User newRec;
try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
new ByteArrayInputStream(out.toByteArray())))
{
newRec = tser.deserialize(inView);
}
// check if it is still the same
assertNotNull(newRec);
assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
}
}
示例4: AvroFileInputStream
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public AvroFileInputStream(FileStatus status) throws IOException {
pos = 0;
buffer = new byte[0];
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
fileReader =
DataFileReader.openReader(new File(status.getPath().toUri()), reader);
Schema schema = fileReader.getSchema();
writer = new GenericDatumWriter<Object>(schema);
output = new ByteArrayOutputStream();
JsonGenerator generator =
new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
generator.setPrettyPrinter(prettyPrinter);
encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
示例5: getSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
*
* @param file
* @return
* @throws IOException
*/
public Schema getSchema(File file) throws IOException {
Schema schema = null;
FileReader<IndexedRecord> fileReader = null;
try {
DatumReader<IndexedRecord> reader = new GenericDatumReader<>();
fileReader = DataFileReader.openReader(file, reader);
schema = fileReader.getSchema();
}
finally {
if (fileReader != null) {
fileReader.close();
}
}
return schema;
}
示例6: readAndCheckResultsFromHdfs
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
FileStatus[] statuses = fileSystem.globStatus(logsPath);
List<TestLogData> resultTestLogs = new ArrayList<>();
Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
for (FileStatus status : statuses) {
FileReader<GenericRecord> fileReader = null;
try {
SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
fileReader = DataFileReader.openReader(input, datumReader);
for (GenericRecord record : fileReader) {
RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
Assert.assertEquals(header, recordHeader);
TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
resultTestLogs.add(recordData);
}
} finally {
IOUtils.closeQuietly(fileReader);
}
}
Assert.assertEquals(testLogs, resultTestLogs);
}
示例7: decodePojo
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private <T> T decodePojo(byte[] encodedData, Class type) {
log.debug("Reflecting decode of {}, consider registering a converter", type);
DatumReader<T> userDatumReader = RD.createDatumReader(RD.getSchema(type));
try {
FileReader<T> ts = DataFileReader.openReader(new SeekableByteArrayInput(encodedData), userDatumReader);
T mine = (T) type.newInstance();
return (T) ts.next(mine);
} catch (IOException | IllegalAccessException | InstantiationException e) {
e.printStackTrace();
}
return null;
}
示例8: open
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public void open(FileInputSplit split) throws IOException {
super.open(split);
DatumReader<E> datumReader;
if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
datumReader = new SpecificDatumReader<E>(avroValueType);
} else {
datumReader = new ReflectDatumReader<E>(avroValueType);
}
LOG.info("Opening split " + split);
SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength());
dataFileReader = DataFileReader.openReader(in, datumReader);
dataFileReader.sync(split.getStart());
}
示例9: convert
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public FixFile convert(TestRun testRun, FixFile ff) throws IOException {
byte[] bytes = IOUtils.toByteArray(ff.getContent());
if (bytes.length == 0) {
return ff;
}
ByteArrayOutputStream os = new ByteArrayOutputStream();
GenericDatumReader<Object> reader = new GenericDatumReader<>();
FileReader<Object> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
try {
Schema schema = fileReader.getSchema();
DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, os);
for (Object datum : fileReader) {
writer.write(datum, encoder);
}
encoder.flush();
} finally {
fileReader.close();
}
return new FixFile(new ByteArrayInputStream(os.toByteArray()));
}
示例10: getSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
SeekableInput input = new FsInput(path, conf);
DatumReader<Object> reader = new GenericDatumReader<>();
FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
org.apache.avro.Schema schema = fileReader.getSchema();
fileReader.close();
return avroData.toConnectSchema(schema);
}
示例11: readData
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
ArrayList<Object> collection = new ArrayList<>();
SeekableInput input = new FsInput(path, conf);
DatumReader<Object> reader = new GenericDatumReader<>();
FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
for (Object object: fileReader) {
collection.add(object);
}
fileReader.close();
return collection;
}
示例12: getAvroSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
* Get the schema of AVRO files stored in a directory
*/
public static Schema getAvroSchema(Path path, Configuration conf)
throws IOException {
FileSystem fs = path.getFileSystem(conf);
Path fileToTest;
if (fs.isDirectory(path)) {
FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
@Override
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
});
if (fileStatuses.length == 0) {
return null;
}
fileToTest = fileStatuses[0].getPath();
} else {
fileToTest = path;
}
SeekableInput input = new FsInput(fileToTest, conf);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
Schema result = fileReader.getSchema();
fileReader.close();
return result;
}
示例13: initialize
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
Configuration conf = context.getConfiguration();
SeekableInput in = new FsInput(split.getPath(), conf);
DatumReader<T> datumReader = new GenericDatumReader<T>();
this.reader = DataFileReader.openReader(in, datumReader);
reader.sync(split.getStart()); // sync to start
this.start = reader.tell();
this.end = split.getStart() + split.getLength();
}
示例14: checkAvroFileForLine
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private boolean checkAvroFileForLine(FileSystem fs, Path p, List<Integer> record)
throws IOException {
SeekableInput in = new FsInput(p, new Configuration());
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
FileReader<GenericRecord> reader = DataFileReader.openReader(in, datumReader);
reader.sync(0);
while (reader.hasNext()) {
if (valueMatches(reader.next(), record)) {
return true;
}
}
return false;
}
示例15: testDeserializeToSpecificType
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
* This test validates proper serialization with specific (generated POJO) types.
*/
@Test
public void testDeserializeToSpecificType() throws IOException {
DatumReader<User> datumReader = new SpecificDatumReader<User>(userSchema);
try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
User rec = dataFileReader.next();
// check if record has been read correctly
assertNotNull(rec);
assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());
// now serialize it with our framework:
ExecutionConfig ec = new ExecutionConfig();
TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);
Assert.assertEquals(AvroTypeInfo.class, te.getClass());
TypeSerializer<User> tser = te.createSerializer(ec);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
tser.serialize(rec, outView);
}
User newRec;
try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
new ByteArrayInputStream(out.toByteArray()))) {
newRec = tser.deserialize(inView);
}
// check if it is still the same
assertNotNull(newRec);
assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
}
}