本文整理汇总了Java中parquet.hadoop.metadata.ColumnChunkMetaData类的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData类的具体用法?Java ColumnChunkMetaData怎么用?Java ColumnChunkMetaData使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ColumnChunkMetaData类属于parquet.hadoop.metadata包,在下文中一共展示了ColumnChunkMetaData类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PageReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
throws ExecutionSetupException{
this.parentColumnReader = parentStatus;
allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
codecFactory = parentColumnReader.parentReader.getCodecFactory();
long start = columnChunkMetaData.getFirstDataPageOffset();
try {
FSDataInputStream f = fs.open(path);
this.dataReader = new ColumnDataReader(f, start, columnChunkMetaData.getTotalSize());
loadDictionaryIfExists(parentStatus, columnChunkMetaData, f);
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
+ path.getName(), e);
}
}
示例2: ColumnReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
protected ColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else {
dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
示例3: add
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private static void add(ParquetMetadata footer) {
for (BlockMetaData blockMetaData : footer.getBlocks()) {
++blockCount;
MessageType schema = footer.getFileMetaData().getSchema();
recordCount += blockMetaData.getRowCount();
List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
add(
desc,
columnMetaData.getValueCount(),
columnMetaData.getTotalSize(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getEncodings(),
columnMetaData.getStatistics());
}
}
}
示例4: endColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
/**
* end a column (once all rep, def and data have been written)
*
* @throws IOException
*/
public void endColumn() throws IOException {
state = state.endColumn();
if (DEBUG) LOG.debug(out.getPos() + ": end column");
currentBlock.addColumn(ColumnChunkMetaData.get(
currentChunkPath,
currentChunkType,
currentChunkCodec,
currentEncodings,
currentStatistics,
currentChunkFirstDataPage,
currentChunkDictionaryPageOffset,
currentChunkValueCount,
compressedLength,
uncompressedLength));
if (DEBUG) LOG.info("ended Column chumk: " + currentColumn);
currentColumn = null;
this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength);
this.uncompressedLength = 0;
this.compressedLength = 0;
}
示例5: visit
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Override
public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
Column<T> filterColumn = lt.getColumn();
T value = lt.getValue();
ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
Statistics<T> stats = columnChunk.getStatistics();
if (stats.isEmpty()) {
// we have no statistics available, we cannot drop any chunks
return false;
}
if (isAllNulls(columnChunk)) {
// we are looking for records where v < someValue
// this chunk is all nulls, so we can drop it
return true;
}
// drop if value <= min
return value.compareTo(stats.genericGetMin()) <= 0;
}
示例6: testClearExceptionForNots
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(
getDoubleColumnMeta(new DoubleStatistics(), 0L),
getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?"
+ " not(eq(double.column, 12.0))", e.getMessage());
}
}
示例7: ParquetFileReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public ParquetFileReader(
Configuration configuration,
Path file,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
{
this.file = file;
this.inputStream = file.getFileSystem(configuration).open(file);
this.blocks = blocks;
if (!blocks.isEmpty()) {
for (ColumnDescriptor columnDescriptor : columns) {
for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
columnMetadata.put(columnDescriptor, metadata);
}
}
}
}
this.codecFactory = new ParquetCodecFactory(configuration);
}
示例8: showDetails
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
Map<String,Object> chunks = new LinkedHashMap<String,Object>();
for (ColumnChunkMetaData cmeta : ccmeta) {
String[] path = cmeta.getPath().toArray();
Map<String,Object> current = chunks;
for (int i = 0; i < path.length - 1; ++i) {
String next = path[i];
if (!current.containsKey(next)) {
current.put(next, new LinkedHashMap<String,Object>());
}
current = (Map<String,Object>)current.get(next);
}
current.put(path[path.length - 1], cmeta);
}
showColumnChunkDetails(out, chunks, 0);
}
示例9: VarCharColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varCharVector = v;
mutator = v.getMutator();
}
示例10: NullableVarCharColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
vector = v;
this.mutator = vector.getMutator();
}
示例11: VarBinaryColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varBinaryVector = v;
mutator = v.getMutator();
}
示例12: NullableVarBinaryColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
nullableVarBinaryVector = v;
mutator = v.getMutator();
}
示例13: VarLengthColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarLengthColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
示例14: loadDictionaryIfExists
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private void loadDictionaryIfExists(final ColumnReader<?> parentStatus,
final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException {
if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
f.seek(columnChunkMetaData.getDictionaryPageOffset());
final PageHeader pageHeader = Util.readPageHeader(f);
assert pageHeader.type == PageType.DICTIONARY_PAGE;
final DrillBuf dictionaryData = allocateDictionaryBuffer(pageHeader.getUncompressed_page_size());
if (parentColumnReader.columnChunkMetaData.getCodec() == CompressionCodecName.UNCOMPRESSED) {
dataReader.loadPage(dictionaryData, pageHeader.compressed_page_size);
} else {
final DrillBuf compressedData = allocateTemporaryBuffer(pageHeader.compressed_page_size);
try {
dataReader.loadPage(compressedData, pageHeader.compressed_page_size);
DirectBytesDecompressor decompressor = codecFactory.getDecompressor(parentColumnReader.columnChunkMetaData
.getCodec());
decompressor.decompress(
compressedData,
pageHeader.compressed_page_size,
dictionaryData,
pageHeader.getUncompressed_page_size());
} finally {
compressedData.release();
}
}
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, pageHeader.uncompressed_page_size),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
parquet.column.Encoding.valueOf(pageHeader.dictionary_page_header.encoding.name())
);
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
}
示例15: FixedWidthRepeatedReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
FixedWidthRepeatedReader(ParquetRecordReader parentReader, ColumnReader dataReader, int dataTypeLengthInBytes, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, RepeatedValueVector valueVector, SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, valueVector, schemaElement);
this.castedRepeatedVector = valueVector;
this.dataTypeLengthInBytes = dataTypeLengthInBytes;
this.dataReader = dataReader;
this.dataReader.pageReader.clear();
this.dataReader.pageReader = this.pageReader;
// this is not in the reset method because it needs to be initialized only for the very first page read
// in all other cases if a read ends at a page boundary we will need to keep track of this flag and not
// clear it at the start of the next read loop
notFishedReadingList = false;
}