本文整理汇总了Java中org.apache.parquet.avro.AvroParquetReader类的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetReader类的具体用法?Java AvroParquetReader怎么用?Java AvroParquetReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AvroParquetReader类属于org.apache.parquet.avro包,在下文中一共展示了AvroParquetReader类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSchema
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("parquet file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();
String schemaString = reader.read().getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
// TODO set codec
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), null, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例2: getSampleData
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();
Iterator<GenericRecord> iter = Collections.singletonList(reader.read()).iterator();
int count = 0;
List<Object> list = new ArrayList<Object>();
//JSONArray list = new JSONArray();
while (iter.hasNext() && count < 10) {
// TODO handle out of memory error
list.add(iter.next().toString().replaceAll("[\\n\\r\\p{C}]", "").replaceAll("\"", "\\\""));
count++;
}
SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
return sampleDataRecord;
}
示例3: validateParquetFile
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
public void validateParquetFile(Path parquetFile, long recourdCount) throws IOException {
ParquetReader reader = AvroParquetReader.builder(parquetFile)
.build();
for(long i = 0; i < recourdCount; i++) {
GenericData.Record actualRow = (GenericData.Record) reader.read();
Assert.assertNotNull("Can't read row " + i, actualRow);
Assert.assertEquals("Value different in row " + i + " for key b", actualRow.get("b"), i % 2 == 0);
Assert.assertEquals("Value different in row " + i + " for key s", actualRow.get("s"), new Utf8(String.valueOf(i)));
Assert.assertEquals("Value different in row " + i + " for key l", actualRow.get("l"), i);
Assert.assertEquals("Value different in row " + i + " for key l100", actualRow.get("l100"), i%100);
Assert.assertEquals("Value different in row " + i + " for key s100", actualRow.get("s100"), new Utf8(String.valueOf(i % 100)));
}
Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
示例4: validateParquetFile
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
public void validateParquetFile(Path parquetFile, List<Map<String, Object>> data) throws IOException {
ParquetReader reader = AvroParquetReader.builder(parquetFile)
.build();
int position = 0;
for(Map<String, Object> expectedRow : data) {
GenericData.Record actualRow = (GenericData.Record) reader.read();
Assert.assertNotNull("Can't read row " + position, actualRow);
for(Map.Entry<String, Object> entry : expectedRow.entrySet()) {
Object value = actualRow.get(entry.getKey());
Assert.assertEquals("Different value on row " + position + " for key " + entry.getKey(), entry.getValue(), value);
}
}
Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
示例5: initReader
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
private ParquetReader<GenericRecord> initReader() throws IOException {
Configuration configuration = getFs().getConf();
if (this.schema != null) {
AvroReadSupport.setAvroReadSchema(configuration, this.schema);
}
if (this.projection != null) {
AvroReadSupport.setRequestedProjection(configuration, this.projection);
}
ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath())
.withConf(configuration).build();
return reader;
}
示例6: read
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
/**
* 读取parquet文件内容
*
* @param parquetPath
*/
public void read(String parquetPath) {
AvroParquetReader<GenericRecord> reader = null;
try {
reader = new AvroParquetReader<GenericRecord>(new Path(parquetPath));
GenericRecord result = reader.read();
System.out.println(result.getSchema());
while ((result = reader.read()) != null) {
System.out.println(result);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
示例7: assertReadParquetFile
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
/**
* Tests that a file on the HDFS cluster contains the given parquet.
*
* @param path the name of the file on the HDFS cluster
* @param expected the expected avro record in the file .
*/
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
Path p = new Path(path);
if (fs.isFile(p)) {
try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(), new Path(path))) {
IndexedRecord record = null;
while (null != (record = reader.read())){
IndexedRecord eqRecord = null;
for (IndexedRecord indexedRecord : expected) {
if(indexedRecord.equals(record)){
eqRecord = indexedRecord;
break;
}
}
expected.remove(eqRecord);
}
}
// Check before asserting for the message.
if (!part && expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else if (fs.isDirectory(p)) {
for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
}
// Check before asserting for the message.
if (expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else {
fail("No such path: " + path);
}
}
示例8: AvroParquetFileReader
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException {
Path path = new Path(logFilePath.getLogFilePath());
String topic = logFilePath.getTopic();
Schema schema = schemaRegistryClient.getSchema(topic);
reader = AvroParquetReader.<GenericRecord>builder(path).build();
writer = new SpecificDatumWriter(schema);
offset = logFilePath.getOffset();
}
示例9: serializeToByteBuffer
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
/**
* Serialize Avro data to a in-memory ByteBuffer.
* @return A ByteBuffer that contains avro data.
* @throws IOException if the parquet file couldn't be parsed correctly.
*/
public ByteBuffer serializeToByteBuffer() throws IOException {
final ByteArrayOutputStream stream = new ByteArrayOutputStream();
final Encoder encoder = EncoderFactory.get().binaryEncoder(stream, null);
final DatumWriter writer = new GenericDatumWriter<GenericRecord>();
writer.setSchema(createAvroSchema());
final AvroParquetReader<GenericRecord> reader = createAvroReader();
GenericRecord record = reader.read();
while (record != null) {
writer.write(record, encoder);
record = reader.read();
}
try {
reader.close();
} catch (IOException ex){
LOG.log(Level.SEVERE, ex.getMessage());
throw ex;
}
encoder.flush();
final ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray());
buf.order(ByteOrder.LITTLE_ENDIAN);
return buf;
}
示例10: createAvroReader
import org.apache.parquet.avro.AvroParquetReader; //导入依赖的package包/类
/**
* Construct an avro reader from parquet file.
* @return avro reader based on the provided parquet file.
* @throws IOException if the parquet file couldn't be parsed correctly.
*/
private AvroParquetReader<GenericRecord> createAvroReader() throws IOException {
return new AvroParquetReader<GenericRecord>(parquetFilePath);
}