本文整理汇总了Java中org.apache.parquet.hadoop.metadata.ColumnChunkMetaData.getTotalSize方法的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData.getTotalSize方法的具体用法?Java ColumnChunkMetaData.getTotalSize怎么用?Java ColumnChunkMetaData.getTotalSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.hadoop.metadata.ColumnChunkMetaData
的用法示例。
在下文中一共展示了ColumnChunkMetaData.getTotalSize方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PageReader
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FSDataInputStream inputStream, Path path, ColumnChunkMetaData columnChunkMetaData) throws ExecutionSetupException {
this.parentColumnReader = parentStatus;
allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
codecFactory = parentColumnReader.parentReader.getCodecFactory();
this.stats = parentColumnReader.parentReader.parquetReaderStats;
long start = columnChunkMetaData.getFirstDataPageOffset();
this.inputStream = inputStream;
try {
this.dataReader = new ColumnDataReader(inputStream, start, columnChunkMetaData.getTotalSize());
loadDictionaryIfExists(parentStatus, columnChunkMetaData, inputStream);
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
+ path.getName(), e);
}
}
示例2: assertColumnsEquivalent
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public void assertColumnsEquivalent(List<ColumnChunkMetaData> expected,
List<ColumnChunkMetaData> actual) {
Assert.assertEquals("Should have the expected columns",
expected.size(), actual.size());
for (int i = 0; i < actual.size(); i += 1) {
ColumnChunkMetaData current = actual.get(i);
if (i != 0) {
ColumnChunkMetaData previous = actual.get(i - 1);
long expectedStart = previous.getStartingPos() + previous.getTotalSize();
Assert.assertEquals("Should start after the previous column",
expectedStart, current.getStartingPos());
}
assertColumnMetadataEquivalent(expected.get(i), current);
}
}
示例3: ColumnChunkIncPageReader
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ColumnChunkIncPageReader(ColumnChunkMetaData metaData, ColumnDescriptor columnDescriptor, FSDataInputStream in) throws IOException {
this.metaData = metaData;
this.columnDescriptor = columnDescriptor;
this.size = metaData.getTotalSize();
this.fileOffset = metaData.getStartingPos();
this.in = in;
this.decompressor = codecFactory.getDecompressor(metaData.getCodec());
}
示例4: getParquetFileMetadata
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private ParquetFileMetadata getParquetFileMetadata(FileStatus file) throws IOException {
final ParquetMetadata metadata;
metadata = SingletonParquetFooterCache.readFooter(fs, file, ParquetMetadataConverter.NO_FILTER);
MessageType schema = metadata.getFileMetaData().getSchema();
Map<SchemaPath, OriginalType> originalTypeMap = Maps.newHashMap();
schema.getPaths();
for (String[] path : schema.getPaths()) {
originalTypeMap.put(SchemaPath.getCompoundPath(path), getOriginalType(schema, path, 0));
}
List<RowGroupMetadata> rowGroupMetadataList = Lists.newArrayList();
ArrayList<SchemaPath> ALL_COLS = new ArrayList<>();
ALL_COLS.add(AbstractRecordReader.STAR_COLUMN);
boolean autoCorrectCorruptDates = formatConfig.autoCorrectCorruptDates;
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(metadata, ALL_COLS, autoCorrectCorruptDates);
if(logger.isDebugEnabled()){
logger.debug(containsCorruptDates.toString());
}
final Map<ColumnTypeMetadata.Key, ColumnTypeMetadata> columnTypeInfo = Maps.newHashMap();
for (BlockMetaData rowGroup : metadata.getBlocks()) {
List<ColumnMetadata> columnMetadataList = Lists.newArrayList();
long length = 0;
for (ColumnChunkMetaData col : rowGroup.getColumns()) {
ColumnMetadata columnMetadata;
boolean statsAvailable = (col.getStatistics() != null && !col.getStatistics().isEmpty());
Statistics<?> stats = col.getStatistics();
String[] columnName = col.getPath().toArray();
SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName);
ColumnTypeMetadata columnTypeMetadata =
new ColumnTypeMetadata(columnName, col.getType(), originalTypeMap.get(columnSchemaName));
columnTypeInfo.put(new ColumnTypeMetadata.Key(columnTypeMetadata.name), columnTypeMetadata);
if (statsAvailable) {
// Write stats only if minVal==maxVal. Also, we then store only maxVal
Object mxValue = null;
if (stats.genericGetMax() != null && stats.genericGetMin() != null &&
stats.genericGetMax().equals(stats.genericGetMin())) {
mxValue = stats.genericGetMax();
if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION
&& columnTypeMetadata.originalType == OriginalType.DATE) {
mxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) mxValue);
}
}
columnMetadata =
new ColumnMetadata(columnTypeMetadata.name, mxValue, stats.getNumNulls());
} else {
columnMetadata = new ColumnMetadata(columnTypeMetadata.name,null, null);
}
columnMetadataList.add(columnMetadata);
length += col.getTotalSize();
}
RowGroupMetadata rowGroupMeta =
new RowGroupMetadata(rowGroup.getStartingPos(), length, rowGroup.getRowCount(),
getHostAffinity(file, rowGroup.getStartingPos(), length), columnMetadataList);
rowGroupMetadataList.add(rowGroupMeta);
}
return new ParquetFileMetadata(file, file.getLen(), rowGroupMetadataList, columnTypeInfo);
}
示例5: PageReader
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
PageReader(org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
throws ExecutionSetupException {
this.parentColumnReader = parentStatus;
allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
codecFactory = parentColumnReader.parentReader.getCodecFactory();
this.stats = parentColumnReader.parentReader.parquetReaderStats;
this.fileName = path.toString();
debugName = new StringBuilder()
.append(this.parentColumnReader.parentReader.getFragmentContext().getFragIdString())
.append(":")
.append(this.parentColumnReader.parentReader.getOperatorContext().getStats().getId() )
.append(this.parentColumnReader.columnChunkMetaData.toString() )
.toString();
try {
inputStream = fs.open(path);
BufferAllocator allocator = parentColumnReader.parentReader.getOperatorContext().getAllocator();
columnChunkMetaData.getTotalUncompressedSize();
useBufferedReader = parentColumnReader.parentReader.useBufferedReader;
scanBufferSize = parentColumnReader.parentReader.bufferedReadSize;
useFadvise = parentColumnReader.parentReader.useFadvise;
enforceTotalSize = parentColumnReader.parentReader.enforceTotalSize;
if (useBufferedReader) {
this.dataReader = new BufferedDirectBufInputStream(inputStream, allocator, path.getName(),
columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), scanBufferSize,
enforceTotalSize, useFadvise);
} else {
this.dataReader = new DirectBufInputStream(inputStream, allocator, path.getName(),
columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), enforceTotalSize,
useFadvise);
}
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
+ path.getName(), e);
}
}
示例6: showDetails
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
long doff = meta.getDictionaryPageOffset();
long foff = meta.getFirstDataPageOffset();
long tsize = meta.getTotalSize();
long usize = meta.getTotalUncompressedSize();
long count = meta.getValueCount();
double ratio = usize / (double)tsize;
String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());
if (name) {
String path = Joiner.on('.').skipNulls().join(meta.getPath());
out.format("%s: ", path);
}
out.format(" %s", meta.getType());
out.format(" %s", meta.getCodec());
out.format(" DO:%d", doff);
out.format(" FPO:%d", foff);
out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
out.format(" VC:%d", count);
if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
Statistics<?> stats = meta.getStatistics();
if (stats != null) {
out.format(" ST:[%s]", stats);
} else {
out.format(" ST:[none]");
}
out.println();
}
示例7: getParquetInputSplit
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
示例8: end
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : blocks) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
return length;
}
示例9: addRowGroup
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
//rowGroup.total_byte_size = ;
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
columnChunk.file_path = block.getPath(); // they are in the same file for now
columnChunk.meta_data = new ColumnMetaData(
getType(columnMetaData.getType()),
toFormatEncodings(columnMetaData.getEncodings()),
Arrays.asList(columnMetaData.getPath().toArray()),
toFormatCodec(columnMetaData.getCodec()),
columnMetaData.getValueCount(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getTotalSize(),
columnMetaData.getFirstDataPageOffset());
columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
if (!columnMetaData.getStatistics().isEmpty()) {
columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
}
if (columnMetaData.getEncodingStats() != null) {
columnChunk.meta_data.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats()));
}
// columnChunk.meta_data.index_page_offset = ;
// columnChunk.meta_data.key_value_metadata = ; // nothing yet
parquetColumns.add(columnChunk);
}
RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
rowGroups.add(rowGroup);
}
示例10: printColumnChunk
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private void printColumnChunk(Logger console, int width, ColumnChunkMetaData column, MessageType schema) {
String[] path = column.getPath().toArray();
PrimitiveType type = primitive(schema, path);
Preconditions.checkNotNull(type);
ColumnDescriptor desc = schema.getColumnDescription(path);
long size = column.getTotalSize();
long count = column.getValueCount();
float perValue = ((float) size) / count;
CompressionCodecName codec = column.getCodec();
Set<Encoding> encodings = column.getEncodings();
EncodingStats encodingStats = column.getEncodingStats();
String encodingSummary = encodingStats == null ?
encodingsAsString(encodings, desc) :
encodingStatsAsString(encodingStats);
Statistics stats = column.getStatistics();
String name = column.getPath().toDotString();
PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName();
if (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
console.info(String.format("%-" + width + "s FIXED[%d] %s %-7s %-9d %-8s %-7s %s",
name, type.getTypeLength(), shortCodec(codec), encodingSummary, count,
humanReadable(perValue), stats == null ? "" : String.valueOf(stats.getNumNulls()),
minMaxAsString(stats, type.getOriginalType())));
} else {
console.info(String.format("%-" + width + "s %-9s %s %-7s %-9d %-10s %-7s %s",
name, typeName, shortCodec(codec), encodingSummary, count, humanReadable(perValue),
stats == null ? "" : String.valueOf(stats.getNumNulls()),
minMaxAsString(stats, type.getOriginalType())));
}
}