本文整理汇总了Java中org.apache.parquet.column.page.PageReadStore.getPageReader方法的典型用法代码示例。如果您正苦于以下问题:Java PageReadStore.getPageReader方法的具体用法?Java PageReadStore.getPageReader怎么用?Java PageReadStore.getPageReader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.column.page.PageReadStore
的用法示例。
在下文中一共展示了PageReadStore.getPageReader方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: read
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
@Test
public void read(String fileName) throws IOException
{
Path path = new Path(fileName);
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
ParquetMetadata metadata = ParquetFileReader.readFooter(conf, path, NO_FILTER);
ParquetFileReader reader = new ParquetFileReader(conf, metadata.getFileMetaData(), path, metadata.getBlocks(), metadata.getFileMetaData().getSchema().getColumns());
PageReadStore pageReadStore;
PageReader pageReader;
DataPage page;
while ((pageReadStore = reader.readNextRowGroup()) != null) {
for (ColumnDescriptor cd: metadata.getFileMetaData().getSchema().getColumns()) {
pageReader = pageReadStore.getPageReader(cd);
page = pageReader.readPage();
}
}
}
示例2: validateContains
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes)
throws IOException {
PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path));
DataPageV1 page = (DataPageV1) pageReader.readPage();
assertEquals(values, page.getValueCount());
assertArrayEquals(bytes.toByteArray(), page.getBytes().toByteArray());
}
示例3: getPageGroupForColumn
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private static List<DataPage> getPageGroupForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) {
PageReader pageReader = pageReadStore.getPageReader(columnDescriptor);
List<DataPage> pageGroup = new ArrayList<DataPage>();
DataPage page;
while ((page = pageReader.readPage()) != null) {
pageGroup.add(reusableCopy(page));
}
return pageGroup;
}
示例4: validate
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
public void validate(MessageType schema, PageReadStore store) {
for (ColumnDescriptor desc : schema.getColumns()) {
PageReader reader = store.getPageReader(desc);
DictionaryPage dict = reader.readDictionaryPage();
DataPage page;
while ((page = reader.readPage()) != null) {
validateStatsForPage(page, dict, desc);
}
}
}
示例5: test
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
Path file = new Path("target/test/TestColumnChunkPageWriteStore/test.parquet");
Path root = file.getParent();
FileSystem fs = file.getFileSystem(conf);
if (fs.exists(root)) {
fs.delete(root, true);
}
fs.mkdirs(root);
MessageType schema = MessageTypeParser.parseMessageType("message test { repeated binary bar; }");
ColumnDescriptor col = schema.getColumns().get(0);
Encoding dataEncoding = PLAIN;
int valueCount = 10;
int d = 1;
int r = 2;
int v = 3;
BytesInput definitionLevels = BytesInput.fromInt(d);
BytesInput repetitionLevels = BytesInput.fromInt(r);
Statistics<?> statistics = new BinaryStatistics();
BytesInput data = BytesInput.fromInt(v);
int rowCount = 5;
int nullCount = 1;
{
ParquetFileWriter writer = new ParquetFileWriter(conf, schema, file);
writer.start();
writer.startBlock(rowCount);
{
ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor(GZIP), schema , new HeapByteBufferAllocator());
PageWriter pageWriter = store.getPageWriter(col);
pageWriter.writePageV2(
rowCount, nullCount, valueCount,
repetitionLevels, definitionLevels,
dataEncoding, data,
statistics);
store.flushToFileWriter(writer);
}
writer.endBlock();
writer.end(new HashMap<String, String>());
}
{
ParquetMetadata footer = ParquetFileReader.readFooter(conf, file, NO_FILTER);
ParquetFileReader reader = new ParquetFileReader(
conf, footer.getFileMetaData(), file, footer.getBlocks(), schema.getColumns());
PageReadStore rowGroup = reader.readNextRowGroup();
PageReader pageReader = rowGroup.getPageReader(col);
DataPageV2 page = (DataPageV2)pageReader.readPage();
assertEquals(rowCount, page.getRowCount());
assertEquals(nullCount, page.getNullCount());
assertEquals(valueCount, page.getValueCount());
assertEquals(d, intValue(page.getDefinitionLevels()));
assertEquals(r, intValue(page.getRepetitionLevels()));
assertEquals(dataEncoding, page.getDataEncoding());
assertEquals(v, intValue(page.getData()));
assertEquals(statistics.toString(), page.getStatistics().toString());
reader.close();
}
}
示例6: validateContains
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes) throws IOException {
PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path));
DataPage page = pageReader.readPage();
assertEquals(values, page.getValueCount());
assertArrayEquals(bytes.toByteArray(), ((DataPageV1)page).getBytes().toByteArray());
}
示例7: getDictionaryPageForColumn
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private static DictionaryPage getDictionaryPageForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) {
PageReader pageReader = pageReadStore.getPageReader(columnDescriptor);
return pageReader.readDictionaryPage();
}