本文整理汇总了Java中org.apache.spark.sql.types.StructField.dataType方法的典型用法代码示例。如果您正苦于以下问题:Java StructField.dataType方法的具体用法?Java StructField.dataType怎么用?Java StructField.dataType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.types.StructField
的用法示例。
在下文中一共展示了StructField.dataType方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createDataField
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
public DataField createDataField(FieldName name){
StructField field = this.schema.apply(name.getValue());
org.apache.spark.sql.types.DataType sparkDataType = field.dataType();
if(sparkDataType instanceof StringType){
return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
} else
if(sparkDataType instanceof IntegralType){
return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
} else
if(sparkDataType instanceof DoubleType){
return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
} else
if(sparkDataType instanceof BooleanType){
return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
} else
{
throw new IllegalArgumentException("Expected string, integral, double or boolean type, got " + sparkDataType.typeName() + " type");
}
}
示例2: sparkSchemaToIndexRSchema
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
public static SegmentSchema sparkSchemaToIndexRSchema(List<StructField> sparkSchema, IsIndexed isIndexed) {
List<ColumnSchema> columns = new ArrayList<>();
for (StructField f : sparkSchema) {
SQLType type;
if (f.dataType() instanceof IntegerType) {
type = SQLType.INT;
} else if (f.dataType() instanceof LongType) {
type = SQLType.BIGINT;
} else if (f.dataType() instanceof FloatType) {
type = SQLType.FLOAT;
} else if (f.dataType() instanceof DoubleType) {
type = SQLType.DOUBLE;
} else if (f.dataType() instanceof StringType) {
type = SQLType.VARCHAR;
} else if (f.dataType() instanceof DateType) {
type = SQLType.DATE;
} else if (f.dataType() instanceof TimestampType) {
type = SQLType.DATETIME;
} else {
throw new IllegalStateException("Unsupported type: " + f.dataType());
}
columns.add(new ColumnSchema(f.name(), type, isIndexed.apply(f.name())));
}
return new SegmentSchema(columns);
}
示例3: toAmpoolSchema
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
/**
* Convert Spark DataFrame schema to corresponding MTable schema. Map the
* DataTypes to MTable (DataType) names.
*
* @param sparkStruct the Spark DataFrame schema
* @return the table descriptor
*/
public static TableDescriptor toAmpoolSchema(final org.apache.spark.sql.types.StructType sparkStruct, TableDescriptor td, boolean isFTable) {
scala.collection.Iterator<StructField> itr = sparkStruct.iterator();
StructField sf;
DataType type;
String str;
Map<String, Function<Object, Object>> preMap, postMap;
while (itr.hasNext()) {
sf = itr.next();
final String converterDependency;
if (sf.dataType() instanceof UserDefinedType) {
preMap = CUSTOM_PRE_POST_MAP.getOrDefault(sf.dataType().simpleString()+"-pre", PRE_SER_MAP);
postMap = CUSTOM_PRE_POST_MAP.getOrDefault(sf.dataType().simpleString()+"-post", PRE_SER_MAP);
str = ((UserDefinedType) sf.dataType()).sqlType().simpleString();
converterDependency = ((UserDefinedType)sf.dataType()).getClass().getName();
} else {
preMap = PRE_SER_MAP;
postMap = POST_DES_MAP;
str = sf.dataType().simpleString();
converterDependency = AmpoolDataFrame.class.getName();
}
type = DataTypeFactory.getTypeFromString(str, TYPE_MAP_SPARK_TO_AMPOOL, preMap, postMap, converterDependency);
/* add the required converter-dependency so that pre/post converters are not
* de-serialized on server side.. these contain scala classes specific to Spark.
* TODO: refactor these in generic way..
*/
if (type instanceof WrapperType) {
((WrapperType) type).setConverterDependency(converterDependency);
}
if(isFTable) {
if (!FTableDescriptor.INSERTION_TIMESTAMP_COL_NAME.equals(sf.name())) {
td.addColumn(sf.name(), new MTableColumnType(type));
}
}else{
td.addColumn(sf.name(), new MTableColumnType(type));
}
}
return td;
}
示例4: toAmpoolSchema
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
/**
* Convert Spark DataFrame schema to corresponding MTable schema. Map the
* DataTypes to MTable (DataType) names.
*
* @param sparkStruct the Spark DataFrame schema
* @return the table descriptor
*/
public static TableDescriptor toAmpoolSchema(final org.apache.spark.sql.types.StructType sparkStruct, TableDescriptor td, boolean isFTable) {
scala.collection.Iterator<StructField> itr = sparkStruct.iterator();
StructField sf;
DataType type;
String str;
Schema.Builder sb = new Schema.Builder();
while (itr.hasNext()) {
sf = itr.next();
if (sf.dataType() instanceof UserDefinedType) {
str = ((UserDefinedType) sf.dataType()).sqlType().simpleString();
type = DataTypeFactory.getTypeFromString(str, TYPE_MAP_SPARK_TO_AMPOOL);
type = new WrapperType(type, ((UserDefinedType) sf.dataType()).getClass()
.getName(), null, null);
} else {
str = sf.dataType().simpleString();
type = DataTypeFactory.getTypeFromString(str, TYPE_MAP_SPARK_TO_AMPOOL);
}
if(isFTable) {
if (!FTableDescriptor.INSERTION_TIMESTAMP_COL_NAME.equals(sf.name())) {
sb.column(sf.name(), type);
}
}else{
sb.column(sf.name(), type);
}
}
td.setSchema(sb.build());
return td;
}
示例5: getField
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
/**
* Returns the type of a nested field.
*/
DataType getField(DataType dataType, boolean isNullable, String... names) {
StructType schema = dataType instanceof ArrayType
? (StructType) ((ArrayType) dataType).elementType()
: (StructType) dataType;
StructField field = Arrays.stream(schema.fields())
.filter(sf -> sf.name().equalsIgnoreCase(names[0]))
.findFirst()
.get();
DataType child = field.dataType();
// Recurse through children if there are more names.
if (names.length == 1) {
// Check the nullability.
Assert.assertEquals("Unexpected nullability of field " + field.name(),
isNullable,
field.nullable());
return child;
} else {
return getField(child, isNullable, Arrays.copyOfRange(names, 1, names.length));
}
}
示例6: writeDataFrame
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
@Override
public void writeDataFrame(String name, DataFrame df) {
for (StructField field : df.schema().fields()) {
String column = field.name();
// convert booleans to integers to avoid error in Spark 1.6.2
// "Cannot specify a column width on data type bit."
if (field.dataType() == DataTypes.BooleanType) {
df = df.withColumn(column + TMP_SUFFIX, df.col(column).cast(DataTypes.IntegerType))
.drop(column)
.withColumnRenamed(column + TMP_SUFFIX, column);
}
}
super.writeDataFrame(name, df);
}
示例7: getStructSchema
import org.apache.spark.sql.types.StructField; //导入方法依赖的package包/类
private StructType getStructSchema(StructType schema){
StructField structField = schema.apply(getStructCol());
return (StructType)structField.dataType();
}