本文整理汇总了Java中parquet.hadoop.metadata.ColumnChunkMetaData.getTotalSize方法的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData.getTotalSize方法的具体用法?Java ColumnChunkMetaData.getTotalSize怎么用?Java ColumnChunkMetaData.getTotalSize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.hadoop.metadata.ColumnChunkMetaData
的用法示例。
在下文中一共展示了ColumnChunkMetaData.getTotalSize方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PageReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
throws ExecutionSetupException{
this.parentColumnReader = parentStatus;
allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
codecFactory = parentColumnReader.parentReader.getCodecFactory();
long start = columnChunkMetaData.getFirstDataPageOffset();
try {
FSDataInputStream f = fs.open(path);
this.dataReader = new ColumnDataReader(f, start, columnChunkMetaData.getTotalSize());
loadDictionaryIfExists(parentStatus, columnChunkMetaData, f);
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
+ path.getName(), e);
}
}
示例2: showDetails
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
long doff = meta.getDictionaryPageOffset();
long foff = meta.getFirstDataPageOffset();
long tsize = meta.getTotalSize();
long usize = meta.getTotalUncompressedSize();
long count = meta.getValueCount();
double ratio = usize / (double)tsize;
String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());
if (name) {
String path = Joiner.on('.').skipNulls().join(meta.getPath());
out.format("%s: ", path);
}
out.format(" %s", meta.getType());
out.format(" %s", meta.getCodec());
out.format(" DO:%d", doff);
out.format(" FPO:%d", foff);
out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
out.format(" VC:%d", count);
if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
out.println();
}
示例3: ColumnChunkIncPageReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ColumnChunkIncPageReader(ColumnChunkMetaData metaData, ColumnDescriptor columnDescriptor, FSDataInputStream in) {
this.metaData = metaData;
this.columnDescriptor = columnDescriptor;
this.size = metaData.getTotalSize();
this.fileOffset = metaData.getStartingPos();
this.in = in;
this.decompressor = codecFactory.getDecompressor(metaData.getCodec());
}
示例4: getParquetInputSplit
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
示例5: end
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : blocks) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
return length;
}
示例6: addRowGroup
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
//rowGroup.total_byte_size = ;
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
columnChunk.file_path = block.getPath(); // they are in the same file for now
columnChunk.meta_data = new parquet.format.ColumnMetaData(
getType(columnMetaData.getType()),
toFormatEncodings(columnMetaData.getEncodings()),
Arrays.asList(columnMetaData.getPath().toArray()),
columnMetaData.getCodec().getParquetCompressionCodec(),
columnMetaData.getValueCount(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getTotalSize(),
columnMetaData.getFirstDataPageOffset());
columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
if (!columnMetaData.getStatistics().isEmpty()) {
columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
}
// columnChunk.meta_data.index_page_offset = ;
// columnChunk.meta_data.key_value_metadata = ; // nothing yet
parquetColumns.add(columnChunk);
}
RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
rowGroups.add(rowGroup);
}