本文整理汇总了Java中parquet.column.ColumnDescriptor类的典型用法代码示例。如果您正苦于以下问题:Java ColumnDescriptor类的具体用法?Java ColumnDescriptor怎么用?Java ColumnDescriptor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ColumnDescriptor类属于parquet.column包,在下文中一共展示了ColumnDescriptor类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ColumnReader
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
protected ColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else {
dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
示例2: initialize
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
public void initialize(MessageType fileSchema,
Map<String, String> fileMetadata,
Path file, List<BlockMetaData> blocks, Configuration configuration)
throws IOException {
// initialize a ReadContext for this file
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
configuration, toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();
this.fileSchema = fileSchema;
this.file = file;
this.columnCount = requestedSchema.getPaths().size();
this.recordConverter = readSupport.prepareForRead(
configuration, fileMetadata, fileSchema, readContext);
this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
List<ColumnDescriptor> columns = requestedSchema.getColumns();
reader = new ParquetFileReader(configuration, file, blocks, columns);
for (BlockMetaData block : blocks) {
total += block.getRowCount();
}
LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
示例3: add
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
private static void add(ParquetMetadata footer) {
for (BlockMetaData blockMetaData : footer.getBlocks()) {
++blockCount;
MessageType schema = footer.getFileMetaData().getSchema();
recordCount += blockMetaData.getRowCount();
List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
add(
desc,
columnMetaData.getValueCount(),
columnMetaData.getTotalSize(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getEncodings(),
columnMetaData.getStatistics());
}
}
}
示例4: startColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
/**
* start a column inside a block
*
* @param descriptor the column descriptor
* @param valueCount the value count in this column
* @param statistics the statistics in this column
* @param compressionCodecName
* @throws IOException
*/
public void startColumn(ColumnDescriptor descriptor,
long valueCount,
CompressionCodecName compressionCodecName) throws IOException {
state = state.startColumn();
if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
currentEncodings = new HashSet<parquet.column.Encoding>();
currentChunkPath = ColumnPath.get(descriptor.getPath());
currentChunkType = descriptor.getType();
currentChunkCodec = compressionCodecName;
currentChunkValueCount = valueCount;
currentChunkFirstDataPage = out.getPos();
compressedLength = 0;
uncompressedLength = 0;
// need to know what type of stats to initialize to
// better way to do this?
currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}
示例5: createBlockBuilder
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
public static ParquetBlockBuilder createBlockBuilder(int size, ColumnDescriptor descriptor)
{
switch (descriptor.getType()) {
case BOOLEAN:
return new ParquetBooleanBuilder(size, descriptor);
case INT32:
return new ParquetIntBuilder(size, descriptor);
case INT64:
return new ParquetLongBuilder(size, descriptor);
case FLOAT:
return new ParquetFloatBuilder(size, descriptor);
case DOUBLE:
return new ParquetDoubleBuilder(size, descriptor);
case BINARY:
return new ParquetBinaryBuilder(size, descriptor);
default:
throw new PrestoException(NOT_SUPPORTED, "Unsupported parquet type: " + descriptor.getType());
}
}
示例6: ParquetFileReader
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
public ParquetFileReader(
Configuration configuration,
Path file,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
{
this.file = file;
this.inputStream = file.getFileSystem(configuration).open(file);
this.blocks = blocks;
if (!blocks.isEmpty()) {
for (ColumnDescriptor columnDescriptor : columns) {
for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
columnMetadata.put(columnDescriptor, metadata);
}
}
}
}
this.codecFactory = new ParquetCodecFactory(configuration);
}
示例7: ParquetColumnReader
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
public ParquetColumnReader(ColumnDescriptor columnDescriptor, PageReader pageReader)
{
this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor");
this.pageReader = requireNonNull(pageReader, "pageReader");
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(columnDescriptor, dictionaryPage);
}
catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + columnDescriptor, e);
}
}
else {
this.dictionary = null;
}
checkArgument(pageReader.getTotalValueCount() > 0, "page is empty");
this.totalValueCount = pageReader.getTotalValueCount();
}
示例8: showDetails
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
String name = Strings.repeat(".", depth) + type.getName();
OriginalType otype = type.getOriginalType();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (otype != null) out.format(" O:%s", otype);
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[cpath.size()]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
示例9: VarCharColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
VarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varCharVector = v;
mutator = v.getMutator();
}
示例10: NullableVarCharColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
NullableVarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
vector = v;
this.mutator = vector.getMutator();
}
示例11: VarBinaryColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
VarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varBinaryVector = v;
mutator = v.getMutator();
}
示例12: NullableVarBinaryColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
NullableVarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
nullableVarBinaryVector = v;
mutator = v.getMutator();
}
示例13: VarLengthColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
VarLengthColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
示例14: FixedWidthRepeatedReader
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
FixedWidthRepeatedReader(ParquetRecordReader parentReader, ColumnReader dataReader, int dataTypeLengthInBytes, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, RepeatedValueVector valueVector, SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, valueVector, schemaElement);
this.castedRepeatedVector = valueVector;
this.dataTypeLengthInBytes = dataTypeLengthInBytes;
this.dataReader = dataReader;
this.dataReader.pageReader.clear();
this.dataReader.pageReader = this.pageReader;
// this is not in the reset method because it needs to be initialized only for the very first page read
// in all other cases if a read ends at a page boundary we will need to keep track of this flag and not
// clear it at the start of the next read loop
notFishedReadingList = false;
}
示例15: VarLengthValuesColumn
import parquet.column.ColumnDescriptor; //导入依赖的package包/类
VarLengthValuesColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
variableWidthVector = (VariableWidthVector) valueVec;
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}