当前位置: 首页>>代码示例>>Java>>正文


Java ParquetReader.read方法代码示例

本文整理汇总了Java中org.apache.parquet.hadoop.ParquetReader.read方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetReader.read方法的具体用法?Java ParquetReader.read怎么用?Java ParquetReader.read使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.parquet.hadoop.ParquetReader的用法示例。


在下文中一共展示了ParquetReader.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: validateParquetFile

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public void validateParquetFile(Path parquetFile, long recourdCount) throws IOException {
  ParquetReader reader = AvroParquetReader.builder(parquetFile)
    .build();

  for(long i = 0; i < recourdCount; i++) {
    GenericData.Record actualRow = (GenericData.Record) reader.read();
    Assert.assertNotNull("Can't read row " + i, actualRow);

    Assert.assertEquals("Value different in row " + i + " for key b", actualRow.get("b"), i % 2 == 0);
    Assert.assertEquals("Value different in row " + i + " for key s", actualRow.get("s"), new Utf8(String.valueOf(i)));
    Assert.assertEquals("Value different in row " + i + " for key l", actualRow.get("l"), i);
    Assert.assertEquals("Value different in row " + i + " for key l100", actualRow.get("l100"), i%100);
    Assert.assertEquals("Value different in row " + i + " for key s100", actualRow.get("s100"), new Utf8(String.valueOf(i % 100)));
  }

  Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
 
开发者ID:streamsets,项目名称:datacollector,代码行数:18,代码来源:LargeInputFileIT.java

示例2: validateParquetFile

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public void validateParquetFile(Path parquetFile, List<Map<String, Object>> data) throws IOException {
  ParquetReader reader = AvroParquetReader.builder(parquetFile)
    .build();

  int position = 0;
  for(Map<String, Object> expectedRow : data) {
    GenericData.Record actualRow = (GenericData.Record) reader.read();
    Assert.assertNotNull("Can't read row " + position, actualRow);

    for(Map.Entry<String, Object> entry : expectedRow.entrySet()) {
      Object value = actualRow.get(entry.getKey());
      Assert.assertEquals("Different value on row " + position + " for key " + entry.getKey(), entry.getValue(), value);
    }
  }

  Assert.assertNull("Parquet file contains more then expected rows", reader.read());
}
 
开发者ID:streamsets,项目名称:datacollector,代码行数:18,代码来源:BaseAvroParquetConvertIT.java

示例3: read

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
{
  ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
  for (int i = 0; i < nRows; i++) {
    Group group = reader.read();
    blackhole.consume(group.getBinary("binary_field", 0));
    blackhole.consume(group.getInteger("int32_field", 0));
    blackhole.consume(group.getLong("int64_field", 0));
    blackhole.consume(group.getBoolean("boolean_field", 0));
    blackhole.consume(group.getFloat("float_field", 0));
    blackhole.consume(group.getDouble("double_field", 0));
    blackhole.consume(group.getBinary("flba_field", 0));
    blackhole.consume(group.getInt96("int96_field", 0));
  }
  reader.close();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:ReadBenchmarks.java

示例4: countFilteredRecords

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static long countFilteredRecords(Path path, FilterPredicate pred) throws IOException{
  ParquetReader<Group> reader = ParquetReader
      .builder(new GroupReadSupport(), path)
      .withFilter(FilterCompat.get(pred))
      .build();

  long count = 0;
  try {
    while (reader.read() != null) {
      count += 1;
    }
  } finally {
    reader.close();
  }
  return count;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestFiltersWithMissingColumns.java

示例5: readFile

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static List<Group> readFile(File f, Filter filter) throws IOException {
  Configuration conf = new Configuration();
  GroupWriteSupport.setSchema(schema, conf);

  ParquetReader<Group> reader =
      ParquetReader.builder(new GroupReadSupport(), new Path(f.getAbsolutePath()))
                   .withConf(conf)
                   .withFilter(filter)
                   .build();

  Group current;
  List<Group> users = new ArrayList<Group>();

  current = reader.read();
  while (current != null) {
    users.add(current);
    current = reader.read();
  }

  return users;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:PhoneBookWriter.java

示例6: testWriteFile

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFile() throws IOException, InterruptedException, TException {
  final AddressBook a = new AddressBook(
      Arrays.asList(
          new Person(
              new Name("Bob", "Roberts"),
              0,
              "[email protected]",
              Arrays.asList(new PhoneNumber("1234567890")))));

  final Path fileToCreate = createFile(a);

  ParquetReader<Group> reader = createRecordReader(fileToCreate);

  Group g = null;
  int i = 0;
  while((g = reader.read()) != null) {
    assertEquals(a.persons.size(), g.getFieldRepetitionCount("persons"));
    assertEquals(a.persons.get(0).email, g.getGroup("persons", 0).getGroup(0, 0).getString("email", 0));
    // just some sanity check, we're testing the various layers somewhere else
    ++i;
  }
  assertEquals("read 1 record", 1, i);

}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:26,代码来源:TestThriftToParquetFileWriter.java

示例7: testWriteFileMapOfList

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFileMapOfList() throws IOException, InterruptedException, TException {
  Map<String, List<String>> map = new HashMap<String,List<String>>();
  map.put("key", Arrays.asList("val1","val2"));
  final TestListInMap mapList = new TestListInMap("maplist", map);
  final Path fileToCreate = createFile(mapList);

  ParquetReader<Group> reader = createRecordReader(fileToCreate);

  Group g = null;
  while((g = reader.read()) != null) {
    assertEquals("key",
        g.getGroup("names", 0).getGroup("map",0).getBinary("key", 0).toStringUsingUTF8());
    assertEquals(map.get("key").size(),
        g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getFieldRepetitionCount(0));
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:18,代码来源:TestThriftToParquetFileWriter.java

示例8: testWriteFileMapOfLists

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testWriteFileMapOfLists() throws IOException, InterruptedException, TException {
  Map<List<String>, List<String>> map = new HashMap<List<String>,List<String>>();
  map.put(Arrays.asList("key1","key2"), Arrays.asList("val1","val2"));
  final TestListsInMap mapList = new TestListsInMap("maplists", map);
  final Path fileToCreate = createFile(mapList);

  ParquetReader<Group> reader = createRecordReader(fileToCreate);

  Group g = null;
  while((g = reader.read()) != null) {
    assertEquals("key1",
        g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 0).toStringUsingUTF8());
    assertEquals("key2",
        g.getGroup("names", 0).getGroup("map",0).getGroup("key", 0).getBinary("key_tuple", 1).toStringUsingUTF8());
    assertEquals("val1",
        g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 0).toStringUsingUTF8());
    assertEquals("val2",
        g.getGroup("names", 0).getGroup("map",0).getGroup("value", 0).getBinary("value_tuple", 1).toStringUsingUTF8());
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:TestThriftToParquetFileWriter.java

示例9: read

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
public static <D> List<D> read(GenericData model, Schema schema, File file) throws IOException {
  List<D> data = new ArrayList<D>();
  Configuration conf = new Configuration(false);
  AvroReadSupport.setRequestedProjection(conf, schema);
  AvroReadSupport.setAvroReadSchema(conf, schema);
  ParquetReader<D> fileReader = AvroParquetReader
      .<D>builder(new Path(file.toString()))
      .withDataModel(model) // reflect disables compatibility
      .withConf(conf)
      .build();

  try {
    D datum;
    while ((datum = fileReader.read()) != null) {
      data.add(datum);
    }
  } finally {
    fileReader.close();
  }

  return data;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:AvroTestUtil.java

示例10: testAvroReadSchema

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testAvroReadSchema() throws IOException {
  Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
  Configuration conf = new Configuration(testConf);
  AvroReadSupport.setAvroReadSchema(conf, NewCar.SCHEMA$);

  ParquetReader<NewCar> reader = new AvroParquetReader<NewCar>(conf, path);
  for (NewCar car = reader.read(); car != null; car = reader.read()) {
    assertEquals(car.getEngine() != null, true);
    assertEquals(car.getBrand() != null, true);
    assertEquals(car.getYear() != null, true);
    assertEquals(car.getVin() != null, true);
    assertEquals(car.getDescription() == null, true);
    assertEquals(car.getOpt() == 5, true);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestSpecificReadWrite.java

示例11: testCompatStringCompatibility

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Test
public void testCompatStringCompatibility() throws IOException {
  // some older versions of Parquet used avro.schema instead of
  // parquet.avro.schema and didn't annotate binary with UTF8 when the type
  // was converted from an Avro string. this validates that the old read
  // schema is recognized and used to read the file as expected.
  Path testFile = new Path(Resources.getResource("strings-2.parquet").getFile());
  Configuration conf = new Configuration();
  ParquetReader<GenericRecord> reader = AvroParquetReader
      .builder(new AvroReadSupport<GenericRecord>(), testFile)
      .withConf(conf)
      .build();
  GenericRecord r;
  while ((r = reader.read()) != null) {
    Assert.assertTrue("Should read value into a String",
        r.get("text") instanceof String);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TestBackwardCompatibility.java

示例12: getSchema

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
  AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
  ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
  ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
  GenericRecord record;
  Schema schema = null;
  while ((record = parquetReader.read()) != null) {
    schema = avroData.toConnectSchema(record.getSchema());
  }
  parquetReader.close();
  return schema;
}
 
开发者ID:jiangxiluning,项目名称:kafka-connect-hdfs,代码行数:14,代码来源:ParquetFileReader.java

示例13: readData

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
  Collection<Object> result = new ArrayList<>();
  AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>();
  ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path);
  ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build();
  GenericRecord record;
  while ((record = parquetReader.read()) != null) {
    result.add(record);
  }
  parquetReader.close();
  return result;
}
 
开发者ID:jiangxiluning,项目名称:kafka-connect-hdfs,代码行数:14,代码来源:ParquetFileReader.java

示例14: execute

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  long num = DEFAULT;
  if (options.hasOption('n')) {
    num = Long.parseLong(options.getOptionValue('n'));
  }

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
    for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
      value.prettyPrint(writer);
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:HeadCommand.java

示例15: execute

import org.apache.parquet.hadoop.ParquetReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  String[] args = options.getArgs();
  String input = args[0];

  ParquetReader<SimpleRecord> reader = null;
  try {
    PrintWriter writer = new PrintWriter(Main.out, true);
    reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
    ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
    JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());

    for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
      if (options.hasOption('j')) {
        writer.write(formatter.formatRecord(value));
      } else {
        value.prettyPrint(writer);
      }
      writer.println();
    }
  } finally {
    if (reader != null) {
      try {
        reader.close();
      } catch (Exception ex) {
      }
    }
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:32,代码来源:CatCommand.java


注:本文中的org.apache.parquet.hadoop.ParquetReader.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。