本文整理汇总了Java中org.apache.parquet.hadoop.ParquetReader.close方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetReader.close方法的具体用法?Java ParquetReader.close怎么用?Java ParquetReader.close使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.hadoop.ParquetReader
的用法示例。
在下文中一共展示了ParquetReader.close方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSchema
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("parquet file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();
String schemaString = reader.read().getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
// TODO set codec
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), null, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例2: read
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
{
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
for (int i = 0; i < nRows; i++) {
Group group = reader.read();
blackhole.consume(group.getBinary("binary_field", 0));
blackhole.consume(group.getInteger("int32_field", 0));
blackhole.consume(group.getLong("int64_field", 0));
blackhole.consume(group.getBoolean("boolean_field", 0));
blackhole.consume(group.getFloat("float_field", 0));
blackhole.consume(group.getDouble("double_field", 0));
blackhole.consume(group.getBinary("flba_field", 0));
blackhole.consume(group.getInt96("int96_field", 0));
}
reader.close();
}
示例3: countFilteredRecords
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static long countFilteredRecords(Path path, FilterPredicate pred) throws IOException{
ParquetReader<Group> reader = ParquetReader
.builder(new GroupReadSupport(), path)
.withFilter(FilterCompat.get(pred))
.build();
long count = 0;
try {
while (reader.read() != null) {
count += 1;
}
} finally {
reader.close();
}
return count;
}
示例4: read
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static <D> List<D> read(GenericData model, Schema schema, File file) throws IOException {
List<D> data = new ArrayList<D>();
Configuration conf = new Configuration(false);
AvroReadSupport.setRequestedProjection(conf, schema);
AvroReadSupport.setAvroReadSchema(conf, schema);
ParquetReader<D> fileReader = AvroParquetReader
.<D>builder(new Path(file.toString()))
.withDataModel(model) // reflect disables compatibility
.withConf(conf)
.build();
try {
D datum;
while ((datum = fileReader.read()) != null) {
data.add(datum);
}
} finally {
fileReader.close();
}
return data;
}
示例5: getSchema
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
GenericRecord record;
Schema schema = null;
while ((record = parquetReader.read()) != null) {
schema = avroData.toConnectSchema(record.getSchema());
}
parquetReader.close();
return schema;
}
示例6: readData
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
Collection<Object> result = new ArrayList<>();
AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
GenericRecord record;
while ((record = parquetReader.read()) != null) {
result.add(record);
}
parquetReader.close();
return result;
}
示例7: execute
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
long num = DEFAULT;
if (options.hasOption('n')) {
num = Long.parseLong(options.getOptionValue('n'));
}
String[] args = options.getArgs();
String input = args[0];
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
value.prettyPrint(writer);
writer.println();
}
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception ex) {
}
}
}
}
示例8: execute
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());
for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
if (options.hasOption('j')) {
writer.write(formatter.formatRecord(value));
} else {
value.prettyPrint(writer);
}
writer.println();
}
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception ex) {
}
}
}
}
示例9: testScroogeBinaryEncoding
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testScroogeBinaryEncoding() throws Exception {
StringAndBinary expected = new StringAndBinary.Immutable("test",
ByteBuffer.wrap(new byte[] {-123, 20, 33}));
File temp = tempDir.newFile(UUID.randomUUID().toString());
temp.deleteOnExit();
temp.delete();
Path path = new Path(temp.getPath());
ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
writer.write(expected);
writer.close();
// read using the parquet-thrift version to isolate the write path
ParquetReader<org.apache.parquet.thrift.test.binary.StringAndBinary> reader = ThriftParquetReader.<org.apache.parquet.thrift.test.binary.StringAndBinary>
build(path)
.withThriftClass(org.apache.parquet.thrift.test.binary.StringAndBinary.class)
.build();
org.apache.parquet.thrift.test.binary.StringAndBinary record = reader.read();
reader.close();
Assert.assertEquals("String should match after serialization round trip",
"test", record.s);
Assert.assertEquals("ByteBuffer should match after serialization round trip",
ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b);
}
示例10: testScroogeBinaryDecoding
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
@SuppressWarnings("unchecked")
public void testScroogeBinaryDecoding() throws Exception {
StringAndBinary expected = new StringAndBinary.Immutable("test",
ByteBuffer.wrap(new byte[] {-123, 20, 33}));
File temp = tempDir.newFile(UUID.randomUUID().toString());
temp.deleteOnExit();
temp.delete();
Path path = new Path(temp.getPath());
ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
writer.write(expected);
writer.close();
Configuration conf = new Configuration();
conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
ParquetReader<StringAndBinary> reader = ParquetReader.<StringAndBinary>
builder(new ScroogeReadSupport(), path)
.withConf(conf)
.build();
StringAndBinary record = reader.read();
reader.close();
Assert.assertEquals("String should match after serialization round trip",
"test", record.s());
Assert.assertEquals("ByteBuffer should match after serialization round trip",
ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b());
}
示例11: testBinary
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testBinary() throws IOException {
StringAndBinary expected = new StringAndBinary("test",
ByteBuffer.wrap(new byte[] { -123, 20, 33 }));
File temp = tempDir.newFile(UUID.randomUUID().toString());
temp.deleteOnExit();
temp.delete();
Path path = new Path(temp.getPath());
ThriftParquetWriter<StringAndBinary> writer =
new ThriftParquetWriter<StringAndBinary>(
path, StringAndBinary.class, CompressionCodecName.SNAPPY);
writer.write(expected);
writer.close();
ParquetReader<StringAndBinary> reader = ThriftParquetReader.<StringAndBinary>
build(path)
.withThriftClass(StringAndBinary.class)
.build();
StringAndBinary record = reader.read();
reader.close();
assertSchema(ParquetFileReader.readFooter(new Configuration(), path));
assertEquals("Should match after serialization round trip",
expected, record);
}
示例12: testGeneric
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testGeneric() throws IOException {
GenericRecord avroRecord;
DataFileReader<GenericRecord> avro = new DataFileReader<GenericRecord>(
avroFile, new GenericDatumReader<GenericRecord>(SCHEMA));
try {
avroRecord = avro.next();
} finally {
avro.close();
}
GenericRecord parquetRecord;
Configuration conf = new Configuration();
conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
AvroReadSupport.setAvroDataSupplier(conf, GenericDataSupplier.class);
AvroReadSupport.setAvroReadSchema(conf, SCHEMA);
ParquetReader<GenericRecord> parquet = AvroParquetReader
.<GenericRecord>builder(parquetFile)
.withConf(conf)
.build();
try {
parquetRecord = parquet.read();
} finally {
parquet.close();
}
Assert.assertEquals("Avro default string class should be Utf8",
Utf8.class, avroRecord.get("default_class").getClass());
Assert.assertEquals("Parquet default string class should be Utf8",
Utf8.class, parquetRecord.get("default_class").getClass());
Assert.assertEquals("Avro avro.java.string=String class should be String",
String.class, avroRecord.get("string_class").getClass());
Assert.assertEquals("Parquet avro.java.string=String class should be String",
String.class, parquetRecord.get("string_class").getClass());
Assert.assertEquals("Avro stringable class should be Utf8",
Utf8.class, avroRecord.get("stringable_class").getClass());
Assert.assertEquals("Parquet stringable class should be Utf8",
Utf8.class, parquetRecord.get("stringable_class").getClass());
Assert.assertEquals("Avro map default string class should be Utf8",
Utf8.class, keyClass(avroRecord.get("default_map")));
Assert.assertEquals("Parquet map default string class should be Utf8",
Utf8.class, keyClass(parquetRecord.get("default_map")));
Assert.assertEquals("Avro map avro.java.string=String class should be String",
String.class, keyClass(avroRecord.get("string_map")));
Assert.assertEquals("Parquet map avro.java.string=String class should be String",
String.class, keyClass(parquetRecord.get("string_map")));
Assert.assertEquals("Avro map stringable class should be Utf8",
Utf8.class, keyClass(avroRecord.get("stringable_map")));
Assert.assertEquals("Parquet map stringable class should be Utf8",
Utf8.class, keyClass(parquetRecord.get("stringable_map")));
}
示例13: testReflect
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testReflect() throws IOException {
Schema reflectSchema = ReflectData.get()
.getSchema(ReflectRecord.class);
ReflectRecord avroRecord;
DataFileReader<ReflectRecord> avro = new DataFileReader<ReflectRecord>(
avroFile, new ReflectDatumReader<ReflectRecord>(reflectSchema));
try {
avroRecord = avro.next();
} finally {
avro.close();
}
ReflectRecord parquetRecord;
Configuration conf = new Configuration();
conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
AvroReadSupport.setAvroDataSupplier(conf, ReflectDataSupplier.class);
AvroReadSupport.setAvroReadSchema(conf, reflectSchema);
ParquetReader<ReflectRecord> parquet = AvroParquetReader
.<ReflectRecord>builder(parquetFile)
.withConf(conf)
.build();
try {
parquetRecord = parquet.read();
} finally {
parquet.close();
}
Assert.assertEquals("Avro default string class should be String",
String.class, avroRecord.default_class.getClass());
Assert.assertEquals("Parquet default string class should be String",
String.class, parquetRecord.default_class.getClass());
Assert.assertEquals("Avro avro.java.string=String class should be String",
String.class, avroRecord.string_class.getClass());
Assert.assertEquals("Parquet avro.java.string=String class should be String",
String.class, parquetRecord.string_class.getClass());
Assert.assertEquals("Avro stringable class should be BigDecimal",
BigDecimal.class, avroRecord.stringable_class.getClass());
Assert.assertEquals("Parquet stringable class should be BigDecimal",
BigDecimal.class, parquetRecord.stringable_class.getClass());
Assert.assertEquals("Should have the correct BigDecimal value",
BIG_DECIMAL, parquetRecord.stringable_class);
Assert.assertEquals("Avro map default string class should be String",
String.class, keyClass(avroRecord.default_map));
Assert.assertEquals("Parquet map default string class should be String",
String.class, keyClass(parquetRecord.default_map));
Assert.assertEquals("Avro map avro.java.string=String class should be String",
String.class, keyClass(avroRecord.string_map));
Assert.assertEquals("Parquet map avro.java.string=String class should be String",
String.class, keyClass(parquetRecord.string_map));
Assert.assertEquals("Avro map stringable class should be BigDecimal",
BigDecimal.class, keyClass(avroRecord.stringable_map));
Assert.assertEquals("Parquet map stringable class should be BigDecimal",
BigDecimal.class, keyClass(parquetRecord.stringable_map));
}
示例14: testReflectJavaClass
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testReflectJavaClass() throws IOException {
Schema reflectSchema = ReflectData.get()
.getSchema(ReflectRecordJavaClass.class);
System.err.println("Schema: " + reflectSchema.toString(true));
ReflectRecordJavaClass avroRecord;
DataFileReader<ReflectRecordJavaClass> avro =
new DataFileReader<ReflectRecordJavaClass>(avroFile,
new ReflectDatumReader<ReflectRecordJavaClass>(reflectSchema));
try {
avroRecord = avro.next();
} finally {
avro.close();
}
ReflectRecordJavaClass parquetRecord;
Configuration conf = new Configuration();
conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
AvroReadSupport.setAvroDataSupplier(conf, ReflectDataSupplier.class);
AvroReadSupport.setAvroReadSchema(conf, reflectSchema);
AvroReadSupport.setRequestedProjection(conf, reflectSchema);
ParquetReader<ReflectRecordJavaClass> parquet = AvroParquetReader
.<ReflectRecordJavaClass>builder(parquetFile)
.withConf(conf)
.build();
try {
parquetRecord = parquet.read();
} finally {
parquet.close();
}
// Avro uses String even if CharSequence is set
Assert.assertEquals("Avro default string class should be String",
String.class, avroRecord.default_class.getClass());
Assert.assertEquals("Parquet default string class should be String",
String.class, parquetRecord.default_class.getClass());
Assert.assertEquals("Avro stringable class should be BigDecimal",
BigDecimal.class, avroRecord.stringable_class.getClass());
Assert.assertEquals("Parquet stringable class should be BigDecimal",
BigDecimal.class, parquetRecord.stringable_class.getClass());
Assert.assertEquals("Should have the correct BigDecimal value",
BIG_DECIMAL, parquetRecord.stringable_class);
}