本文整理汇总了Java中parquet.hadoop.metadata.BlockMetaData.getTotalByteSize方法的典型用法代码示例。如果您正苦于以下问题:Java BlockMetaData.getTotalByteSize方法的具体用法?Java BlockMetaData.getTotalByteSize怎么用?Java BlockMetaData.getTotalByteSize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.hadoop.metadata.BlockMetaData
的用法示例。
在下文中一共展示了BlockMetaData.getTotalByteSize方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: generateSplitByDeprecatedConstructor
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private List<ParquetInputSplit> generateSplitByDeprecatedConstructor(long min, long max) throws
IOException {
List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();
List<ClientSideMetadataSplitStrategy.SplitInfo> splitInfos = ClientSideMetadataSplitStrategy
.generateSplitInfo(blocks, hdfsBlocks, min, max);
for (ClientSideMetadataSplitStrategy.SplitInfo splitInfo : splitInfos) {
BlockMetaData lastRowGroup = splitInfo.getRowGroups().get(splitInfo.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
ParquetInputSplit split = new ParquetInputSplit(fileStatus.getPath(),
splitInfo.hdfsBlock.getOffset(), end, splitInfo.hdfsBlock.getHosts(),
splitInfo.rowGroups, schema.toString(), null, null, extramd);
splits.add(split);
}
return splits;
}
示例2: getParquetInputSplit
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
示例3: addRowGroup
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
//rowGroup.total_byte_size = ;
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
columnChunk.file_path = block.getPath(); // they are in the same file for now
columnChunk.meta_data = new parquet.format.ColumnMetaData(
getType(columnMetaData.getType()),
toFormatEncodings(columnMetaData.getEncodings()),
Arrays.asList(columnMetaData.getPath().toArray()),
columnMetaData.getCodec().getParquetCompressionCodec(),
columnMetaData.getValueCount(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getTotalSize(),
columnMetaData.getFirstDataPageOffset());
columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
if (!columnMetaData.getStatistics().isEmpty()) {
columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
}
// columnChunk.meta_data.index_page_offset = ;
// columnChunk.meta_data.key_value_metadata = ; // nothing yet
parquetColumns.add(columnChunk);
}
RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
rowGroups.add(rowGroup);
}
示例4: showDetails
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static void showDetails(PrettyPrintWriter out, BlockMetaData meta, Long num) {
long rows = meta.getRowCount();
long tbs = meta.getTotalByteSize();
out.format("row group%s: RC:%d TS:%d%n", (num == null ? "" : " " + num), rows, tbs);
out.rule('-');
showDetails(out, meta.getColumns());
}