本文整理汇总了Java中org.apache.parquet.schema.PrimitiveType类的典型用法代码示例。如果您正苦于以下问题:Java PrimitiveType类的具体用法?Java PrimitiveType怎么用?Java PrimitiveType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PrimitiveType类属于org.apache.parquet.schema包,在下文中一共展示了PrimitiveType类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ColumnReader
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
protected ColumnReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = (parentReader.getSingleStream() != null)?
new DeprecatedSingleStreamPageReader(this, parentReader.getSingleStream(), parentReader.getHadoopPath(), columnChunkMetaData) :
new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else if (columnDescriptor.getType() == PrimitiveTypeName.INT96
&& (valueVec instanceof TimeStampMilliVector || valueVec instanceof NullableTimeStampMilliVector)) {
// if int 96 column is being read as a Timestamp, this truncates the time format used by Impala
// dataTypeLengthInBits is only ever used when computing offsets into the destination vector, so it
// needs to be set to the bit width of the resulting Arrow type, usually this matches the input length
dataTypeLengthInBits = 64;
} else {
dataTypeLengthInBits = DeprecatedParquetVectorizedReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
示例2: renameChildTypeToElement
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
/**
* Changes the list inner '$data$' vector name to 'element' in the schema
*/
private Type renameChildTypeToElement(Type childType) {
if (childType.isPrimitive()) {
PrimitiveType childPrimitiveType = childType.asPrimitiveType();
return new PrimitiveType(childType.getRepetition(),
childPrimitiveType.getPrimitiveTypeName(),
childPrimitiveType.getTypeLength(),
"element",
childPrimitiveType.getOriginalType(),
childPrimitiveType.getDecimalMetadata(),
null);
} else {
GroupType childGroupType = childType.asGroupType();
return new GroupType(childType.getRepetition(),
"element",
childType.getOriginalType(),
childGroupType.getFields());
}
}
示例3: getColTypeInfo
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
private ColTypeInfo getColTypeInfo(MessageType schema, Type type, String[] path, int depth) {
if (type.isPrimitive()) {
PrimitiveType primitiveType = (PrimitiveType) type;
int precision = 0;
int scale = 0;
if (primitiveType.getDecimalMetadata() != null) {
precision = primitiveType.getDecimalMetadata().getPrecision();
scale = primitiveType.getDecimalMetadata().getScale();
}
int repetitionLevel = schema.getMaxRepetitionLevel(path);
int definitionLevel = schema.getMaxDefinitionLevel(path);
return new ColTypeInfo(type.getOriginalType(), precision, scale, repetitionLevel, definitionLevel);
}
Type t = ((GroupType) type).getType(path[depth]);
return getColTypeInfo(schema, t, path, depth + 1);
}
示例4: newConverter
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
private PrimitiveConverter newConverter(int colIdx, byte vecType, PrimitiveType parquetType) {
switch (vecType) {
case Vec.T_BAD:
case Vec.T_CAT:
case Vec.T_STR:
case Vec.T_UUID:
case Vec.T_TIME:
if (OriginalType.TIMESTAMP_MILLIS.equals(parquetType.getOriginalType()) || parquetType.getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) {
return new TimestampConverter(colIdx, _writer);
} else {
boolean dictSupport = parquetType.getOriginalType() == OriginalType.UTF8 || parquetType.getOriginalType() == OriginalType.ENUM;
return new StringConverter(_writer, colIdx, dictSupport);
}
case Vec.T_NUM:
return new NumberConverter(colIdx, _writer);
default:
throw new UnsupportedOperationException("Unsupported type " + vecType);
}
}
示例5: showDetails
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
String name = Strings.repeat(".", depth) + type.getName();
OriginalType otype = type.getOriginalType();
Repetition rep = type.getRepetition();
PrimitiveTypeName ptype = type.getPrimitiveTypeName();
out.format("%s: %s %s", name, rep, ptype);
if (otype != null) out.format(" O:%s", otype);
if (container != null) {
cpath.add(type.getName());
String[] paths = cpath.toArray(new String[cpath.size()]);
cpath.remove(cpath.size() - 1);
ColumnDescriptor desc = container.getColumnDescription(paths);
int defl = desc.getMaxDefinitionLevel();
int repl = desc.getMaxRepetitionLevel();
out.format(" R:%d D:%d", repl, defl);
}
out.println();
}
示例6: createStats
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
/**
* Creates an empty {@code Statistics} instance for the specified type to be
* used for reading/writing the new min/max statistics used in the V2 format.
*
* @param type
* type of the column
* @return instance of a typed statistics class
*/
public static Statistics<?> createStats(Type type) {
PrimitiveType primitive = type.asPrimitiveType();
switch (primitive.getPrimitiveTypeName()) {
case INT32:
return new IntStatistics(primitive);
case INT64:
return new LongStatistics(primitive);
case FLOAT:
return new FloatStatistics(primitive);
case DOUBLE:
return new DoubleStatistics(primitive);
case BOOLEAN:
return new BooleanStatistics(primitive);
case BINARY:
case INT96:
case FIXED_LEN_BYTE_ARRAY:
return new BinaryStatistics(primitive);
default:
throw new UnknownColumnTypeException(primitive.getPrimitiveTypeName());
}
}
示例7: testReadUsingRequestedSchemaWithIncompatibleField
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
@Test
public void testReadUsingRequestedSchemaWithIncompatibleField(){
MessageType originalSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithIncompatibleField = new MessageType("schema",
new PrimitiveType(OPTIONAL, BINARY, "e")); // Incompatible schema: different type
readGroups(store, originalSchema, schemaWithIncompatibleField, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: optional binary e != optional int32 e", e.getMessage());
}
}
示例8: testReadUsingSchemaWithRequiredFieldThatWasOptional
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
@Test
public void testReadUsingSchemaWithRequiredFieldThatWasOptional(){
MessageType originalSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithRequiredFieldThatWasOptional = new MessageType("schema",
new PrimitiveType(REQUIRED, INT32, "e")); // Incompatible schema: required when it was optional
readGroups(store, originalSchema, schemaWithRequiredFieldThatWasOptional, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: required int32 e != optional int32 e", e.getMessage());
}
}
示例9: testReadUsingProjectedSchema
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
@Test
public void testReadUsingProjectedSchema(){
MessageType orginalSchema = new MessageType("schema",
new PrimitiveType(REQUIRED, INT32, "a"),
new PrimitiveType(REQUIRED, INT32, "b")
);
MessageType projectedSchema = new MessageType("schema",
new PrimitiveType(OPTIONAL, INT32, "b")
);
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
{
List<Group> groups = new ArrayList<Group>();
groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
Object[][] expected = {
{2},
};
validateGroups(groups, expected);
}
}
示例10: IntColumnChunkMetaData
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
/**
* @param path column identifier
* @param type type of the column
* @param codec
* @param encodings
* @param statistics
* @param firstDataPage
* @param dictionaryPageOffset
* @param valueCount
* @param totalSize
* @param totalUncompressedSize
*/
IntColumnChunkMetaData(
ColumnPath path,
PrimitiveType type,
CompressionCodecName codec,
EncodingStats encodingStats,
Set<Encoding> encodings,
Statistics statistics,
long firstDataPage,
long dictionaryPageOffset,
long valueCount,
long totalSize,
long totalUncompressedSize) {
super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings));
this.firstDataPage = positiveLongToInt(firstDataPage);
this.dictionaryPageOffset = positiveLongToInt(dictionaryPageOffset);
this.valueCount = positiveLongToInt(valueCount);
this.totalSize = positiveLongToInt(totalSize);
this.totalUncompressedSize = positiveLongToInt(totalUncompressedSize);
this.statistics = statistics;
}
示例11: LongColumnChunkMetaData
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
/**
* @param path column identifier
* @param type type of the column
* @param codec
* @param encodings
* @param statistics
* @param firstDataPageOffset
* @param dictionaryPageOffset
* @param valueCount
* @param totalSize
* @param totalUncompressedSize
*/
LongColumnChunkMetaData(
ColumnPath path,
PrimitiveType type,
CompressionCodecName codec,
EncodingStats encodingStats,
Set<Encoding> encodings,
Statistics statistics,
long firstDataPageOffset,
long dictionaryPageOffset,
long valueCount,
long totalSize,
long totalUncompressedSize) {
super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings));
this.firstDataPageOffset = firstDataPageOffset;
this.dictionaryPageOffset = dictionaryPageOffset;
this.valueCount = valueCount;
this.totalSize = totalSize;
this.totalUncompressedSize = totalUncompressedSize;
this.statistics = statistics;
}
示例12: testMergeMetadata
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
@Test
public void testMergeMetadata() {
FileMetaData md1 = new FileMetaData(
new MessageType("root1",
new PrimitiveType(REPEATED, BINARY, "a"),
new PrimitiveType(OPTIONAL, BINARY, "b")),
new HashMap<String, String>(), "test");
FileMetaData md2 = new FileMetaData(
new MessageType("root2",
new PrimitiveType(REQUIRED, BINARY, "c")),
new HashMap<String, String>(), "test2");
GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
assertEquals(
merged.getSchema(),
new MessageType("root1",
new PrimitiveType(REPEATED, BINARY, "a"),
new PrimitiveType(OPTIONAL, BINARY, "b"),
new PrimitiveType(REQUIRED, BINARY, "c"))
);
}
示例13: testWriteMetadataFileWithRelativeOutputPath
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
/**
* {@link ParquetFileWriter#mergeFooters(Path, List)} expects a fully-qualified
* path for the root and crashes if a relative one is provided.
*/
@Test
public void testWriteMetadataFileWithRelativeOutputPath() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path relativeRoot = new Path("target/_test_relative");
Path qualifiedRoot = fs.makeQualified(relativeRoot);
ParquetMetadata mock = Mockito.mock(ParquetMetadata.class);
FileMetaData fileMetaData = new FileMetaData(
new MessageType("root1",
new PrimitiveType(REPEATED, BINARY, "a")),
new HashMap<String, String>(), "test");
Mockito.when(mock.getFileMetaData()).thenReturn(fileMetaData);
List<Footer> footers = new ArrayList<Footer>();
Footer footer = new Footer(new Path(qualifiedRoot, "one"), mock);
footers.add(footer);
// This should not throw an exception
ParquetFileWriter.writeMetadataFile(conf, relativeRoot, footers, JobSummaryLevel.ALL);
}
示例14: testIgnoreStatsWithSignedSortOrder
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
@Test
public void testIgnoreStatsWithSignedSortOrder() {
ParquetMetadataConverter converter = new ParquetMetadataConverter();
BinaryStatistics stats = new BinaryStatistics();
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("z"));
stats.incrementNumNulls();
PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
.as(OriginalType.UTF8).named("b");
Statistics convertedStats = converter.fromParquetStatistics(
Version.FULL_VERSION,
StatsHelper.V1.toParquetStatistics(stats),
binaryType);
Assert.assertTrue("Stats should be empty: " + convertedStats, convertedStats.isEmpty());
}
示例15: testStillUseStatsWithSignedSortOrderIfSingleValue
import org.apache.parquet.schema.PrimitiveType; //导入依赖的package包/类
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
ParquetMetadataConverter converter = new ParquetMetadataConverter();
BinaryStatistics stats = new BinaryStatistics();
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
stats.updateStats(Binary.fromString("A"));
stats.incrementNumNulls();
PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
Statistics convertedStats = converter.fromParquetStatistics(
Version.FULL_VERSION,
ParquetMetadataConverter.toParquetStatistics(stats),
binaryType);
Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}