本文整理汇总了Java中org.apache.parquet.format.SchemaElement类的典型用法代码示例。如果您正苦于以下问题:Java SchemaElement类的具体用法?Java SchemaElement怎么用?Java SchemaElement使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SchemaElement类属于org.apache.parquet.format包,在下文中一共展示了SchemaElement类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ColumnReader
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
protected ColumnReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = (parentReader.getSingleStream() != null)?
new DeprecatedSingleStreamPageReader(this, parentReader.getSingleStream(), parentReader.getHadoopPath(), columnChunkMetaData) :
new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else if (columnDescriptor.getType() == PrimitiveTypeName.INT96
&& (valueVec instanceof TimeStampMilliVector || valueVec instanceof NullableTimeStampMilliVector)) {
// if int 96 column is being read as a Timestamp, this truncates the time format used by Impala
// dataTypeLengthInBits is only ever used when computing offsets into the destination vector, so it
// needs to be set to the bit width of the resulting Arrow type, usually this matches the input length
dataTypeLengthInBits = 64;
} else {
dataTypeLengthInBits = DeprecatedParquetVectorizedReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
示例2: loadParquetSchema
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
/**
* Scan the Parquet footer, then map each Parquet column to the list of columns
* we want to read. Track those to be read.
*/
private void loadParquetSchema() {
// TODO - figure out how to deal with this better once we add nested reading, note also look where this map is used below
// store a map from column name to converted types if they are non-null
Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer);
// loop to add up the length of the fixed width columns and build the schema
for (ColumnDescriptor column : footer.getFileMetaData().getSchema().getColumns()) {
ParquetColumnMetadata columnMetadata = new ParquetColumnMetadata(column);
columnMetadata.resolveDrillType(schemaElements, options);
if (! fieldSelected(columnMetadata.field)) {
continue;
}
selectedColumnMetadata.add(columnMetadata);
}
}
示例3: metadata
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
private FileMetaData metadata(long... sizes) {
List<SchemaElement> schema = emptyList();
List<RowGroup> rowGroups = new ArrayList<RowGroup>();
long offset = 0;
for (long size : sizes) {
ColumnChunk columnChunk = new ColumnChunk(offset);
columnChunk.setMeta_data(new ColumnMetaData(
INT32,
Collections.<org.apache.parquet.format.Encoding>emptyList(),
Collections.<String>emptyList(),
UNCOMPRESSED, 10l, size * 2, size, offset));
rowGroups.add(new RowGroup(Arrays.asList(columnChunk), size, 1));
offset += size;
}
return new FileMetaData(1, schema, sizes.length, rowGroups);
}
示例4: getColNameToSchemaElementMapping
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
public static Map<String, SchemaElement> getColNameToSchemaElementMapping(ParquetMetadata footer) {
HashMap<String, SchemaElement> schemaElements = new HashMap<>();
FileMetaData fileMetaData = new ParquetMetadataConverter().toParquetMetadata(ParquetFileWriter.CURRENT_VERSION, footer);
for (SchemaElement se : fileMetaData.getSchema()) {
schemaElements.put(se.getName(), se);
}
return schemaElements;
}
示例5: VarCharColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
VarCharColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varCharVector = v;
mutator = v.getMutator();
}
示例6: NullableVarCharColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
NullableVarCharColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
vector = v;
this.mutator = vector.getMutator();
}
示例7: VarBinaryColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
VarBinaryColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varBinaryVector = v;
mutator = v.getMutator();
}
示例8: NullableVarBinaryColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
NullableVarBinaryColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
nullableVarBinaryVector = v;
mutator = v.getMutator();
}
示例9: VarLengthColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
VarLengthColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
示例10: CorruptionDetectingNullableDateReader
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
CorruptionDetectingNullableDateReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize,
ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData,
boolean fixedLength, NullableDateMilliVector v, SchemaElement schemaElement)
throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
dateVector = v;
}
示例11: FixedWidthRepeatedReader
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
FixedWidthRepeatedReader(DeprecatedParquetVectorizedReader parentReader, ColumnReader<?> dataReader, int dataTypeLengthInBytes, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, RepeatedValueVector valueVector, SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, valueVector, schemaElement);
this.dataTypeLengthInBytes = dataTypeLengthInBytes;
this.dataReader = dataReader;
this.dataReader.pageReader.clear();
this.dataReader.pageReader = this.pageReader;
// this is not in the reset method because it needs to be initialized only for the very first page read
// in all other cases if a read ends at a page boundary we will need to keep track of this flag and not
// clear it at the start of the next read loop
notFishedReadingList = false;
}
示例12: VarLengthValuesColumn
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
VarLengthValuesColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
variableWidthVector = (VariableWidthVector) valueVec;
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
示例13: getDataTypeLength
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
/**
* Returns data type length for a given {@see ColumnDescriptor} and it's corresponding
* {@see SchemaElement}. Neither is enough information alone as the max
* repetition level (indicating if it is an array type) is in the ColumnDescriptor and
* the length of a fixed width field is stored at the schema level.
*
* @return the length if fixed width, else -1
*/
private int getDataTypeLength(ColumnDescriptor column, SchemaElement se) {
if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (column.getMaxRepetitionLevel() > 0) {
return -1;
}
if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
return se.getType_length() * 8;
} else {
return getTypeLengthInBits(column.getType());
}
} else {
return -1;
}
}
示例14: getReader
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
static VarLengthValuesColumn<?> getReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, ValueVector v,
SchemaElement schemaElement
) throws ExecutionSetupException {
ConvertedType convertedType = schemaElement.getConverted_type();
switch (descriptor.getMaxDefinitionLevel()) {
case 0:
if (convertedType == null) {
return new VarLengthColumnReaders.VarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarBinaryVector) v, schemaElement);
}
switch (convertedType) {
case UTF8:
return new VarLengthColumnReaders.VarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarCharVector) v, schemaElement);
case DECIMAL:
return new VarLengthColumnReaders.Decimal28Column(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableDecimalVector) v, schemaElement);
default:
return new VarLengthColumnReaders.VarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarBinaryVector) v, schemaElement);
}
default:
if (convertedType == null) {
return new VarLengthColumnReaders.NullableVarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarBinaryVector) v, schemaElement);
}
switch (convertedType) {
case UTF8:
return new VarLengthColumnReaders.NullableVarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarCharVector) v, schemaElement);
case DECIMAL:
return new NullableDecimalColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableDecimalVector) v, schemaElement);
default:
return new VarLengthColumnReaders.NullableVarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (NullableVarBinaryVector) v, schemaElement);
}
}
}
示例15: toMajorType
import org.apache.parquet.format.SchemaElement; //导入依赖的package包/类
public static TypeProtos.MajorType toMajorType(PrimitiveType.PrimitiveTypeName primitiveTypeName, int length,
TypeProtos.DataMode mode, SchemaElement schemaElement,
OptionManager options, Field arrowField, final boolean readInt96AsTimeStamp) {
MinorType minorType = getMinorType(primitiveTypeName, length, schemaElement, options, arrowField, readInt96AsTimeStamp);
TypeProtos.MajorType.Builder typeBuilder = TypeProtos.MajorType.newBuilder().setMinorType(minorType).setMode(mode);
if (CoreDecimalUtility.isDecimalType(minorType)) {
typeBuilder.setPrecision(schemaElement.getPrecision()).setScale(schemaElement.getScale());
}
return typeBuilder.build();
}