本文整理汇总了Java中org.apache.parquet.hadoop.ParquetReader.read方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetReader.read方法的具体用法?Java ParquetReader.read怎么用?Java ParquetReader.read使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.hadoop.ParquetReader
的用法示例。
在下文中一共展示了ParquetReader.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: validateParquetFile
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public void validateParquetFile(Path parquetFile, long recourdCount) throws IOException {
ParquetReader reader = AvroParquetReader.builder(parquetFile)
.build();
for(long i = 0; i < recourdCount; i++) {
GenericData.Record actualRow = (GenericData.Record) reader.read();
Assert.assertNotNull("Can't read row " + i, actualRow);
Assert.assertEquals("Value different in row " + i + " for key b", actualRow.get("b"), i % 2 == 0);
Assert.assertEquals("Value different in row " + i + " for key s", actualRow.get("s"), new Utf8(String.valueOf(i)));
Assert.assertEquals("Value different in row " + i + " for key l", actualRow.get("l"), i);
Assert.assertEquals("Value different in row " + i + " for key l100", actualRow.get("l100"), i%100);
Assert.assertEquals("Value different in row " + i + " for key s100", actualRow.get("s100"), new Utf8(String.valueOf(i % 100)));
}
Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
示例2: validateParquetFile
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public void validateParquetFile(Path parquetFile, List<Map<String, Object>> data) throws IOException {
ParquetReader reader = AvroParquetReader.builder(parquetFile)
.build();
int position = 0;
for(Map<String, Object> expectedRow : data) {
GenericData.Record actualRow = (GenericData.Record) reader.read();
Assert.assertNotNull("Can't read row " + position, actualRow);
for(Map.Entry<String, Object> entry : expectedRow.entrySet()) {
Object value = actualRow.get(entry.getKey());
Assert.assertEquals("Different value on row " + position + " for key " + entry.getKey(), entry.getValue(), value);
}
}
Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
示例3: read
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
{
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
for (int i = 0; i < nRows; i++) {
Group group = reader.read();
blackhole.consume(group.getBinary("binary_field", 0));
blackhole.consume(group.getInteger("int32_field", 0));
blackhole.consume(group.getLong("int64_field", 0));
blackhole.consume(group.getBoolean("boolean_field", 0));
blackhole.consume(group.getFloat("float_field", 0));
blackhole.consume(group.getDouble("double_field", 0));
blackhole.consume(group.getBinary("flba_field", 0));
blackhole.consume(group.getInt96("int96_field", 0));
}
reader.close();
}
示例4: countFilteredRecords
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static long countFilteredRecords(Path path, FilterPredicate pred) throws IOException{
ParquetReader<Group> reader = ParquetReader
.builder(new GroupReadSupport(), path)
.withFilter(FilterCompat.get(pred))
.build();
long count = 0;
try {
while (reader.read() != null) {
count += 1;
}
} finally {
reader.close();
}
return count;
}
示例5: readFile
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static List<Group> readFile(File f, Filter filter) throws IOException {
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(schema, conf);
ParquetReader<Group> reader =
ParquetReader.builder(new GroupReadSupport(), new Path(f.getAbsolutePath()))
.withConf(conf)
.withFilter(filter)
.build();
Group current;
List<Group> users = new ArrayList<Group>();
current = reader.read();
while (current != null) {
users.add(current);
current = reader.read();
}
return users;
}
示例6: testWriteFile
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFile() throws IOException, InterruptedException, TException {
final AddressBook a = new AddressBook(
Arrays.asList(
new Person(
new Name("Bob", "Roberts"),
0,
"[email protected]",
Arrays.asList(new PhoneNumber("1234567890")))));
final Path fileToCreate = createFile(a);
ParquetReader<Group> reader = createRecordReader(fileToCreate);
Group g = null;
int i = 0;
while((g = reader.read()) != null) {
assertEquals(a.persons.size(), g.getFieldRepetitionCount("persons"));
assertEquals(a.persons.get(0).email, g.getGroup("persons", 0).getGroup(0, 0).getString("email", 0));
// just some sanity check, we're testing the various layers somewhere else
++i;
}
assertEquals("read 1 record", 1, i);
}
示例7: testWriteFileMapOfList
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFileMapOfList() throws IOException, InterruptedException, TException {
Map<String, List<String>> map = new HashMap<String,List<String>>();
map.put("key", Arrays.asList("val1","val2"));
final TestListInMap mapList = new TestListInMap("maplist", map);
final Path fileToCreate = createFile(mapList);
ParquetReader<Group> reader = createRecordReader(fileToCreate);
Group g = null;
while((g = reader.read()) != null) {
assertEquals("key",
g.getGroup("names", 0).getGroup("map",0).getBinary("key", 0).toStringUsingUTF8());
assertEquals(map.get("key").size(),
g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getFieldRepetitionCount(0));
}
}
示例8: testWriteFileMapOfLists
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFileMapOfLists() throws IOException, InterruptedException, TException {
Map<List<String>, List<String>> map = new HashMap<List<String>,List<String>>();
map.put(Arrays.asList("key1","key2"), Arrays.asList("val1","val2"));
final TestListsInMap mapList = new TestListsInMap("maplists", map);
final Path fileToCreate = createFile(mapList);
ParquetReader<Group> reader = createRecordReader(fileToCreate);
Group g = null;
while((g = reader.read()) != null) {
assertEquals("key1",
g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 0).toStringUsingUTF8());
assertEquals("key2",
g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 1).toStringUsingUTF8());
assertEquals("val1",
g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 0).toStringUsingUTF8());
assertEquals("val2",
g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 1).toStringUsingUTF8());
}
}
示例9: read
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static <D> List<D> read(GenericData model, Schema schema, File file) throws IOException {
List<D> data = new ArrayList<D>();
Configuration conf = new Configuration(false);
AvroReadSupport.setRequestedProjection(conf, schema);
AvroReadSupport.setAvroReadSchema(conf, schema);
ParquetReader<D> fileReader = AvroParquetReader
.<D>builder(new Path(file.toString()))
.withDataModel(model) // reflect disables compatibility
.withConf(conf)
.build();
try {
D datum;
while ((datum = fileReader.read()) != null) {
data.add(datum);
}
} finally {
fileReader.close();
}
return data;
}
示例10: testAvroReadSchema
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testAvroReadSchema() throws IOException {
Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
Configuration conf = new Configuration(testConf);
AvroReadSupport.setAvroReadSchema(conf, NewCar.SCHEMA$);
ParquetReader<NewCar> reader = new AvroParquetReader<NewCar>(conf, path);
for (NewCar car = reader.read(); car != null; car = reader.read()) {
assertEquals(car.getEngine() != null, true);
assertEquals(car.getBrand() != null, true);
assertEquals(car.getYear() != null, true);
assertEquals(car.getVin() != null, true);
assertEquals(car.getDescription() == null, true);
assertEquals(car.getOpt() == 5, true);
}
}
示例11: testCompatStringCompatibility
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testCompatStringCompatibility() throws IOException {
// some older versions of Parquet used avro.schema instead of
// parquet.avro.schema and didn't annotate binary with UTF8 when the type
// was converted from an Avro string. this validates that the old read
// schema is recognized and used to read the file as expected.
Path testFile = new Path(Resources.getResource("strings-2.parquet").getFile());
Configuration conf = new Configuration();
ParquetReader<GenericRecord> reader = AvroParquetReader
.builder(new AvroReadSupport<GenericRecord>(), testFile)
.withConf(conf)
.build();
GenericRecord r;
while ((r = reader.read()) != null) {
Assert.assertTrue("Should read value into a String",
r.get("text") instanceof String);
}
}
示例12: getSchema
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
GenericRecord record;
Schema schema = null;
while ((record = parquetReader.read()) != null) {
schema = avroData.toConnectSchema(record.getSchema());
}
parquetReader.close();
return schema;
}
示例13: readData
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
Collection<Object> result = new ArrayList<>();
AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
GenericRecord record;
while ((record = parquetReader.read()) != null) {
result.add(record);
}
parquetReader.close();
return result;
}
示例14: execute
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
long num = DEFAULT;
if (options.hasOption('n')) {
num = Long.parseLong(options.getOptionValue('n'));
}
String[] args = options.getArgs();
String input = args[0];
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
value.prettyPrint(writer);
writer.println();
}
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception ex) {
}
}
}
}
示例15: execute
import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());
for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
if (options.hasOption('j')) {
writer.write(formatter.formatRecord(value));
} else {
value.prettyPrint(writer);
}
writer.println();
}
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception ex) {
}
}
}
}