本文整理汇总了Java中parquet.schema.Type类的典型用法代码示例。如果您正苦于以下问题:Java Type类的具体用法?Java Type怎么用?Java Type使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Type类属于parquet.schema包,在下文中一共展示了Type类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: newSchema
import parquet.schema.Type; //导入依赖的package包/类
private void newSchema() throws IOException {
List<Type> types = Lists.newArrayList();
for (MaterializedField field : batchSchema) {
if (field.getPath().equals(SchemaPath.getSimplePath(WriterPrel.PARTITION_COMPARATOR_FIELD))) {
continue;
}
types.add(getType(field));
}
schema = new MessageType("root", types);
int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext,
codecFactory.getCompressor(codec, pageSize),
schema,
initialBlockBufferSize);
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
store = new ColumnWriteStoreV1(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion);
MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
consumer = columnIO.getRecordWriter(store);
setUp(schema, consumer);
}
示例2: getType
import parquet.schema.Type; //导入依赖的package包/类
private parquet.schema.Type getType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
DataMode dataMode = field.getType().getMode();
switch(minorType) {
case MAP:
List<parquet.schema.Type> types = Lists.newArrayList();
for (MaterializedField childField : field.getChildren()) {
types.add(getType(childField));
}
return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getLastName(), types);
case LIST:
throw new UnsupportedOperationException("Unsupported type " + minorType);
default:
return getPrimitiveType(field);
}
}
示例3: writeTuple
import parquet.schema.Type; //导入依赖的package包/类
private void writeTuple(Tuple tuple, GroupType type) {
for (int index = 0; index < type.getFieldCount(); index++) {
Type fieldType = type.getType(index);
String fieldName = fieldType.getName();
// empty fields have to be omitted
if (tuple.isNull(index))
continue;
recordConsumer.startField(fieldName, index);
if (fieldType.isPrimitive()) {
tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
}
else {
recordConsumer.startGroup();
writeTuple(tuple.getTuple(index), fieldType.asGroupType());
recordConsumer.endGroup();
}
recordConsumer.endField(fieldName, index);
}
}
示例4: initScanFilter
import parquet.schema.Type; //导入依赖的package包/类
/**
* init the scan filter with the read schema
* @param scan
*/
public void initScanFilter(Scan scan){
String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
try {
if (scan != null && schema != null && !schema.isEmpty()) {
MessageType readSchema = MessageTypeParser.parseMessageType(schema);
//readSchema.getFields();
List<Type> types = readSchema.getFields();
for(Type type : types){
String columnName = type.getName();
if(columnName.startsWith("cf"))// fetch the real column name
columnName = columnName.substring(3);
filterColumns.add(columnName.getBytes());
}
}
}catch (Exception e){
//TODO: send the exception back to the client
LOG.error("parse the message schema error" + e);
}
}
示例5: groupToCells
import parquet.schema.Type; //导入依赖的package包/类
/**
* transform data in group into cells(List<cell> - > {@link org.apache.hadoop.hbase.client.Result}</>)
* @param group
* @return
*/
public static List<Cell> groupToCells(Group group){
List<Cell> cells = new LinkedList<>();
if(group != null){
cells = new LinkedList<>();
GroupType groupType = group.getType();
List<Type> types = groupType.getFields();
byte [] rowKey = group.getBinary(HConstants.ROW_KEY, 0).getBytes();
long timestamp = group.getLong(HConstants.TIME_STAMP, 0);
for(Type t : types){
if(! t.getName().equals(HConstants.ROW_KEY) && ! t.getName().equals(HConstants.TIME_STAMP)){
String name = t.getName();
String [] names = name.split(":");
if(names.length == 2) {
byte[] value = group.getBinary(name, 0).getBytes();
Cell cell = new KeyValue(rowKey, names[0].getBytes(), names[1].getBytes(), timestamp, value);
cells.add(cell);
}
}
}
}
return cells;
}
示例6: writeRecordFields
import parquet.schema.Type; //导入依赖的package包/类
private void writeRecordFields(GroupType schema, Schema tajoSchema,
Tuple tuple) {
List<Type> fields = schema.getFields();
// Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
int index = 0;
for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
Column column = tajoSchema.getColumn(tajoIndex);
if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
continue;
}
Datum datum = tuple.get(tajoIndex);
Type fieldType = fields.get(index);
if (!tuple.isNull(tajoIndex)) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, column, datum);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " +
column.getSimpleName());
}
++index;
}
}
示例7: toString
import parquet.schema.Type; //导入依赖的package包/类
public String toString(String indent) {
StringBuilder result = new StringBuilder();
int i = 0;
for (Type field : this.schema.getFields()) {
String name = field.getName();
List<Object> values = this.data[i];
for (Object value : values) {
result.append(indent).append(name);
if (value == null) {
result.append(": NULL\n");
} else if (value instanceof Group) {
result.append("\n").append(((ParquetGroup) value).toString(indent + " "));
} else {
result.append(": ").append(value.toString()).append("\n");
}
}
i++;
}
return result.toString();
}
示例8: convertField
import parquet.schema.Type; //导入依赖的package包/类
@Override
Object convertField(JsonElement value) {
ParquetGroup r1 = new ParquetGroup((GroupType) schema());
JsonObject inputRecord = value.getAsJsonObject();
for (Map.Entry<String, JsonElement> entry : inputRecord.entrySet()) {
String key = entry.getKey();
JsonElementConverter converter = this.converters.get(key);
Object convertedValue = converter.convert(entry.getValue());
boolean valueIsNull = convertedValue == null;
Type.Repetition repetition = converter.jsonSchema.optionalOrRequired();
if (valueIsNull && repetition.equals(OPTIONAL)) {
continue;
}
r1.add(key, convertedValue);
}
return r1;
}
示例9: buildSchema
import parquet.schema.Type; //导入依赖的package包/类
private Type buildSchema() {
JsonArray inputSchema = this.jsonSchema.getDataTypeValues();
List<Type> parquetTypes = new ArrayList<>();
for (JsonElement element : inputSchema) {
JsonObject map = (JsonObject) element;
JsonSchema elementSchema = new JsonSchema(map);
String columnName = elementSchema.getColumnName();
JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false);
Type schemaType = converter.schema();
this.converters.put(columnName, converter);
parquetTypes.add(schemaType);
}
String docName = this.jsonSchema.getColumnName();
switch (recordType) {
case ROOT:
return new MessageType(docName, parquetTypes);
case CHILD:
return new GroupType(this.jsonSchema.optionalOrRequired(), docName, parquetTypes);
default:
throw new RuntimeException("Unsupported Record type");
}
}
示例10: AvroUnionConverter
import parquet.schema.Type; //导入依赖的package包/类
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema,
Schema avroSchema) {
this.parent = parent;
GroupType parquetGroup = parquetSchema.asGroupType();
this.memberConverters = new Converter[ parquetGroup.getFieldCount()];
int parquetIndex = 0;
for (int index = 0; index < avroSchema.getTypes().size(); index++) {
Schema memberSchema = avroSchema.getTypes().get(index);
if (!memberSchema.getType().equals(Schema.Type.NULL)) {
Type memberType = parquetGroup.getType(parquetIndex);
memberConverters[parquetIndex] = newConverter(memberSchema, memberType, new ParentValueContainer() {
@Override
void add(Object value) {
Preconditions.checkArgument(memberValue==null, "Union is resolving to more than one type");
memberValue = value;
}
});
parquetIndex++; // Note for nulls the parquetIndex id not increased
}
}
}
示例11: writeRecordFields
import parquet.schema.Type; //导入依赖的package包/类
private void writeRecordFields(GroupType schema, Schema avroSchema,
Map record) {
List<Type> fields = schema.getFields();
List<Schema.Field> avroFields = avroSchema.getFields();
int index = 0; // parquet ignores Avro nulls, so index may differ
for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
Schema.Field avroField = avroFields.get(avroIndex);
if (avroField.schema().getType().equals(Schema.Type.NULL)) {
continue;
}
Type fieldType = fields.get(index);
Object value = record.get(fieldKeyword.invoke(avroField));
if (value != null) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, avroField.schema(), value);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " + avroField.name());
}
index++;
}
}
示例12: convertField
import parquet.schema.Type; //导入依赖的package包/类
private static SchemaDescription.Field convertField( SchemaDescription schema, Type t ) {
boolean allowNull = t.getRepetition() != Repetition.REQUIRED;
switch ( t.asPrimitiveType().getPrimitiveTypeName() ) {
case BINARY:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_STRING, allowNull );
case BOOLEAN:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_BOOLEAN, allowNull );
case DOUBLE:
case FLOAT:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_NUMBER, allowNull );
case INT32:
case INT64:
if ( t.getOriginalType() == OriginalType.DATE || t.getOriginalType() == OriginalType.TIME_MILLIS
|| t.getOriginalType() == OriginalType.TIMESTAMP_MILLIS ) {
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_DATE, allowNull );
} else {
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_INTEGER, allowNull );
}
case INT96:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_DATE, allowNull );
default:
throw new RuntimeException( "Undefined type: " + t );
}
}
示例13: init
import parquet.schema.Type; //导入依赖的package包/类
@Override
public ReadContext init( InitContext context ) {
String schemaStr = context.getConfiguration().get( ParquetConverter.PARQUET_SCHEMA_CONF_KEY );
if ( schemaStr == null ) {
throw new RuntimeException( "Schema not defined in the PentahoParquetSchema key" );
}
schema = SchemaDescription.unmarshall( schemaStr );
converter = new ParquetConverter( schema );
// get all fields from file's schema
MessageType fileSchema = context.getFileSchema();
List<Type> newFields = new ArrayList<>();
// use only required fields
for ( SchemaDescription.Field f : schema ) {
Type origField = fileSchema.getFields().get( fileSchema.getFieldIndex( f.formatFieldName ) );
newFields.add( origField );
}
if ( newFields.isEmpty() ) {
throw new RuntimeException( "Fields should be declared" );
}
MessageType newSchema = new MessageType( fileSchema.getName(), newFields );
return new ReadContext( newSchema, new HashMap<>() );
}
示例14: createConverter
import parquet.schema.Type; //导入依赖的package包/类
private Converter createConverter(Type field) {
if (field.isPrimitive()) {
OriginalType otype = field.getOriginalType();
if (otype != null) {
switch (otype) {
case MAP: break;
case LIST: break;
case UTF8: return new StringConverter(field.getName());
case MAP_KEY_VALUE: break;
case ENUM: break;
}
}
return new SimplePrimitiveConverter(field.getName());
}
return new SimpleRecordConverter(field.asGroupType(), field.getName(), this);
}
示例15: getParquetType
import parquet.schema.Type; //导入依赖的package包/类
private Type getParquetType(HDFSColumnHandle column, MessageType messageType)
{
if (messageType.containsField(column.getName())) {
return messageType.getType(column.getName());
}
// parquet is case-insensitive, all hdfs-columns get converted to lowercase
for (Type type : messageType.getFields()) {
if (type.getName().equalsIgnoreCase(column.getName())) {
return type;
}
}
return null;
}