本文整理匯總了Java中org.apache.parquet.format.PageType類的典型用法代碼示例。如果您正苦於以下問題:Java PageType類的具體用法?Java PageType怎麽用?Java PageType使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
PageType類屬於org.apache.parquet.format包,在下文中一共展示了PageType類的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: readDictionaries
import org.apache.parquet.format.PageType; //導入依賴的package包/類
/**
* Return dictionary per row group for all binary columns in given parquet file.
* @param fs filesystem object.
* @param filePath parquet file to scan
* @return pair of dictionaries found for binary fields and list of binary fields which are not dictionary encoded.
* @throws IOException
*/
public static Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> readDictionaries(FileSystem fs, Path filePath, CodecFactory codecFactory) throws IOException {
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(fs.getConf(), filePath, ParquetMetadataConverter.NO_FILTER);
if (parquetMetadata.getBlocks().size() > 1) {
throw new IOException(
format("Global dictionaries can only be built on a parquet file with a single row group, found %d row groups for file %s",
parquetMetadata.getBlocks().size(), filePath));
}
final BlockMetaData rowGroupMetadata = parquetMetadata.getBlocks().get(0);
final Map<ColumnPath, ColumnDescriptor> columnDescriptorMap = Maps.newHashMap();
for (ColumnDescriptor columnDescriptor : parquetMetadata.getFileMetaData().getSchema().getColumns()) {
columnDescriptorMap.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
}
final Set<ColumnDescriptor> columnsToSkip = Sets.newHashSet(); // columns which are found in parquet file but are not dictionary encoded
final Map<ColumnDescriptor, Dictionary> dictionaries = Maps.newHashMap();
try(final FSDataInputStream in = fs.open(filePath)) {
for (ColumnChunkMetaData columnChunkMetaData : rowGroupMetadata.getColumns()) {
if (isBinaryType(columnChunkMetaData.getType())) {
final ColumnDescriptor column = columnDescriptorMap.get(columnChunkMetaData.getPath());
// if first page is dictionary encoded then load dictionary, otherwise skip this column.
final PageHeaderWithOffset pageHeader = columnChunkMetaData.getPageHeaders().get(0);
if (PageType.DICTIONARY_PAGE == pageHeader.getPageHeader().getType()) {
dictionaries.put(column, readDictionary(in, column, pageHeader, codecFactory.getDecompressor(columnChunkMetaData.getCodec())));
} else {
columnsToSkip.add(column);
}
}
}
}
return new ImmutablePair<>(dictionaries, columnsToSkip);
}
示例2: newDataPageHeader
import org.apache.parquet.format.PageType; //導入依賴的package包/類
private PageHeader newDataPageHeader(
int uncompressedSize, int compressedSize,
int valueCount,
org.apache.parquet.column.statistics.Statistics statistics,
org.apache.parquet.column.Encoding rlEncoding,
org.apache.parquet.column.Encoding dlEncoding,
org.apache.parquet.column.Encoding valuesEncoding) {
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
// TODO: pageHeader.crc = ...;
pageHeader.setData_page_header(new DataPageHeader(
valueCount,
getEncoding(valuesEncoding),
getEncoding(dlEncoding),
getEncoding(rlEncoding)));
if (!statistics.isEmpty()) {
pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics));
}
return pageHeader;
}
示例3: newDataPageV2Header
import org.apache.parquet.format.PageType; //導入依賴的package包/類
private PageHeader newDataPageV2Header(
int uncompressedSize, int compressedSize,
int valueCount, int nullCount, int rowCount,
org.apache.parquet.column.statistics.Statistics<?> statistics,
org.apache.parquet.column.Encoding dataEncoding,
int rlByteLength, int dlByteLength) {
// TODO: pageHeader.crc = ...;
DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2(
valueCount, nullCount, rowCount,
getEncoding(dataEncoding),
dlByteLength, rlByteLength);
if (!statistics.isEmpty()) {
dataPageHeaderV2.setStatistics(
toParquetStatistics(statistics));
}
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize);
pageHeader.setData_page_header_v2(dataPageHeaderV2);
return pageHeader;
}
示例4: updateStats
import org.apache.parquet.format.PageType; //導入依賴的package包/類
private void updateStats(PageHeader pageHeader, String op, long start, long time, long bytesin, long bytesout) {
String pageType = "Data Page";
if (pageHeader.type == PageType.DICTIONARY_PAGE) {
pageType = "Dictionary Page";
}
logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}", op, pageType.toString(),
this.parentColumnReader.parentReader.hadoopPath,
this.parentColumnReader.columnDescriptor.toString(), start, bytesin, bytesout, time);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
if (bytesin == bytesout) {
this.stats.timePageLoads += time;
this.stats.numPageLoads++;
this.stats.totalPageReadBytes += bytesin;
} else {
this.stats.timePagesDecompressed += time;
this.stats.numPagesDecompressed++;
this.stats.totalDecompressedBytes += bytesin;
}
} else {
if (bytesin == bytesout) {
this.stats.timeDictPageLoads += time;
this.stats.numDictPageLoads++;
this.stats.totalDictPageReadBytes += bytesin;
} else {
this.stats.timeDictPagesDecompressed += time;
this.stats.numDictPagesDecompressed++;
this.stats.totalDictDecompressedBytes += bytesin;
}
}
}
示例5: nextInternal
import org.apache.parquet.format.PageType; //導入依賴的package包/類
/**
* Get the page header and the pageData (uncompressed) for the next page
*/
protected void nextInternal() throws IOException{
Stopwatch timer = Stopwatch.createUnstarted();
// next, we need to decompress the bytes
// TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one
// I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary
do {
long start=dataReader.getPos();
timer.start();
pageHeader = Util.readPageHeader(dataReader);
long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
long pageHeaderBytes=dataReader.getPos()-start;
this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","",
this.parentColumnReader.parentReader.hadoopPath,
this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead);
timer.reset();
if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {
readDictionaryPage(pageHeader, parentColumnReader);
}
} while (pageHeader.getType() == PageType.DICTIONARY_PAGE);
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
pageData = readPage(pageHeader, compressedSize, uncompressedSize);
}
示例6: updateStats
import org.apache.parquet.format.PageType; //導入依賴的package包/類
protected void updateStats(PageHeader pageHeader, String op, long start, long time, long bytesin, long bytesout) {
String pageType = "Data Page";
if (pageHeader.type == PageType.DICTIONARY_PAGE) {
pageType = "Dictionary Page";
}
logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}", op, pageType,
this.parentColumnReader.parentReader.hadoopPath,
this.parentColumnReader.columnDescriptor.toString(), start, bytesin, bytesout, time);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
if (bytesin == bytesout) {
this.stats.timeDataPageLoads.addAndGet(time);
this.stats.numDataPageLoads.incrementAndGet();
this.stats.totalDataPageReadBytes.addAndGet(bytesin);
} else {
this.stats.timeDataPagesDecompressed.addAndGet(time);
this.stats.numDataPagesDecompressed.incrementAndGet();
this.stats.totalDataDecompressedBytes.addAndGet(bytesin);
}
} else {
if (bytesin == bytesout) {
this.stats.timeDictPageLoads.addAndGet(time);
this.stats.numDictPageLoads.incrementAndGet();
this.stats.totalDictPageReadBytes.addAndGet(bytesin);
} else {
this.stats.timeDictPagesDecompressed.addAndGet(time);
this.stats.numDictPagesDecompressed.incrementAndGet();
this.stats.totalDictDecompressedBytes.addAndGet(bytesin);
}
}
}
示例7: writeDictionaryPageHeader
import org.apache.parquet.format.PageType; //導入依賴的package包/類
public void writeDictionaryPageHeader(
int uncompressedSize, int compressedSize, int valueCount,
org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException {
PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding)));
writePageHeader(pageHeader, to);
}
示例8: testPageHeader
import org.apache.parquet.format.PageType; //導入依賴的package包/類
@Test
public void testPageHeader() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
PageType type = PageType.DATA_PAGE;
int compSize = 10;
int uncSize = 20;
PageHeader pageHeader = new PageHeader(type, uncSize, compSize);
writePageHeader(pageHeader, out);
PageHeader readPageHeader = readPageHeader(new ByteArrayInputStream(out.toByteArray()));
assertEquals(pageHeader, readPageHeader);
}