本文整理汇总了Java中org.apache.spark.mllib.linalg.VectorUDT类的典型用法代码示例。如果您正苦于以下问题:Java VectorUDT类的具体用法?Java VectorUDT怎么用?Java VectorUDT使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
VectorUDT类属于org.apache.spark.mllib.linalg包,在下文中一共展示了VectorUDT类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: start
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder().appName("First Prediction").master("local").getOrCreate();
StructType schema = new StructType(
new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty()), });
// TODO this example is not working yet
}
示例2: exportToJson
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
public static String exportToJson(Set<String> columns, StructType dfSchema) {
//This would contain column name along with type of a dataframe
List<Field> schema = new ArrayList<>();
for (String column : columns) {
StructField field = dfSchema.fields()[ dfSchema.fieldIndex(column) ];
if (field.dataType() instanceof StringType) {
schema.add(new Field(field.name(), STRING));
} else if (field.dataType() instanceof BooleanType) {
schema.add(new Field(field.name(), BOOLEAN));
} else if (field.dataType() instanceof VectorUDT) {
schema.add(new Field(field.name(), DOUBLE_ARRAY));
} else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType ||
field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) {
schema.add(new Field(field.name(), DOUBLE));
} else if (field.dataType() instanceof ArrayType) {
if(((ArrayType)field.dataType()).elementType() instanceof StringType) {
schema.add(new Field(field.name(), STRING_ARRAY));
}else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) {
schema.add(new Field(field.name(), DOUBLE_ARRAY));
}else {
throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
}
}
else {
throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
}
}
return gson.toJson(schema);
}
示例3: exportSchemaToJson
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
public static String exportSchemaToJson(StructType dfSchema) {
//This would contain column name along with type of a dataframe
List<Field> schema = new ArrayList<>();
for (StructField field : dfSchema.fields()) {
if (field.dataType() instanceof StringType) {
schema.add(new Field(field.name(), STRING));
} else if (field.dataType() instanceof BooleanType) {
schema.add(new Field(field.name(), BOOLEAN));
} else if (field.dataType() instanceof VectorUDT) {
schema.add(new Field(field.name(), DOUBLE_ARRAY));
} else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType ||
field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) {
schema.add(new Field(field.name(), DOUBLE));
} else if (field.dataType() instanceof ArrayType) {
if(((ArrayType)field.dataType()).elementType() instanceof StringType) {
schema.add(new Field(field.name(), STRING_ARRAY));
}else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) {
schema.add(new Field(field.name(), DOUBLE_ARRAY));
}else {
throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
}
}
else {
throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
}
}
return gson.toJson(schema);
}
示例4: testVectorBinarizerDense
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorBinarizerDense() {
// prepare data
JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
RowFactory.create(0d, 1d, new DenseVector(new double[]{-2d, -3d, -4d, -1d, 6d, -7d, 8d, 0d, 0d, 0d, 0d, 0d})),
RowFactory.create(1d, 2d, new DenseVector(new double[]{4d, -5d, 6d, 7d, -8d, 9d, -10d, 0d, 0d, 0d, 0d, 0d})),
RowFactory.create(2d, 3d, new DenseVector(new double[]{-5d, 6d, -8d, 9d, 10d, 11d, 12d, 0d, 0d, 0d, 0d, 0d}))
));
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("vector1", new VectorUDT(), false, Metadata.empty())
});
DataFrame df = sqlContext.createDataFrame(jrdd, schema);
VectorBinarizer vectorBinarizer = new VectorBinarizer()
.setInputCol("vector1")
.setOutputCol("binarized")
.setThreshold(2d);
//Export this model
byte[] exportedModel = ModelExporter.export(vectorBinarizer, df);
//Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
//compare predictions
Row[] sparkOutput = vectorBinarizer.transform(df).orderBy("id").select("id", "value1", "vector1", "binarized").collect();
for (Row row : sparkOutput) {
Map<String, Object> data = new HashMap<>();
data.put(vectorBinarizer.getInputCol(), ((DenseVector) row.get(2)).toArray());
transformer.transform(data);
double[] output = (double[]) data.get(vectorBinarizer.getOutputCol());
assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
}
}
示例5: testSchema1
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
* Output :
[{"name":"id","datatype":"double"},{"name":"label","datatype":"double"},{"name":"features","datatype":"double []"}]
* */
@Test
public void testSchema1() {
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty())
});
System.out.println(SchemaExporter.exportSchemaToJson(schema));
}
示例6: testSchema3
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
* Output :
[{"name":"id","datatype":"double"},{"name":"value1","datatype":"double"},{"name":"vector1","datatype":"double []"}]
* */
@Test
public void testSchema3() {
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("vector1", new VectorUDT(), false, Metadata.empty())
});
System.out.println(SchemaExporter.exportSchemaToJson(schema));
}
示例7: testColumnExport1
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
* Output :
[{"name":"features","datatype":"double []"},{"name":"id","datatype":"double"}]
* */
@Test
public void testColumnExport1() {
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty())
});
System.out.println(SchemaExporter.exportToJson(new HashSet<String>(Arrays.asList("id", "features")),schema));
}
示例8: testColumnExport3
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
* Output :
[{"name":"id","datatype":"double"},{"name":"vector1","datatype":"double []"}]
* */
@Test
public void testColumnExport3() {
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("vector1", new VectorUDT(), false, Metadata.empty())
});
System.out.println(SchemaExporter.exportToJson(new HashSet<String>(Arrays.asList("id", "vector1")),schema));
}
示例9: init
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Override
public StructField init(Evaluator evaluator){
return DataTypes.createStructField(getColumnName(), new VectorUDT(), false);
}
示例10: testVectorBinarizerSparse
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorBinarizerSparse() {
// prepare data
int[] sparseArray1 = {5, 6, 11, 4, 7, 9, 8, 14, 13};
double[] sparseArray1Values = {-5d, 7d, 1d, -2d, -4d, -1d, 31d, -1d, -3d};
int[] sparseArray2 = {2, 6, 1};
double[] sparseArray2Values = {1d, 11d, 2d};
int[] sparseArray3 = {4, 6, 1};
double[] sparseArray3Values = {52d, 71d, 11d};
int[] sparseArray4 = {4, 1, 2};
double[] sparseArray4Values = {17d, 7d, 9d};
JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
RowFactory.create(3d, 4d, new SparseVector(20, sparseArray1, sparseArray1Values)),
RowFactory.create(4d, 5d, new SparseVector(20, sparseArray2, sparseArray2Values)),
RowFactory.create(5d, 5d, new SparseVector(20, sparseArray3, sparseArray3Values)),
RowFactory.create(6d, 5d, new SparseVector(20, sparseArray4, sparseArray4Values))
));
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("vector1", new VectorUDT(), false, Metadata.empty())
});
DataFrame df = sqlContext.createDataFrame(jrdd, schema);
VectorBinarizer vectorBinarizer = new VectorBinarizer()
.setInputCol("vector1")
.setOutputCol("binarized");
//Export this model
byte[] exportedModel = ModelExporter.export(vectorBinarizer, null);
//Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
//compare predictions
Row[] sparkOutput = vectorBinarizer.transform(df).orderBy("id").select("id", "value1", "vector1", "binarized").collect();
for (Row row : sparkOutput) {
Map<String, Object> data = new HashMap<>();
data.put(vectorBinarizer.getInputCol(), ((SparseVector) row.get(2)).toArray());
transformer.transform(data);
double[] output = (double[]) data.get(vectorBinarizer.getOutputCol());
assertArrayEquals(output, ((SparseVector)row.get(3)).toArray(), 0d);
}
}
示例11: testVectorAssembler
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorAssembler() {
// prepare data
JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
RowFactory.create(0d, 1d, new DenseVector(new double[]{2d, 3d})),
RowFactory.create(1d, 2d, new DenseVector(new double[]{3d, 4d})),
RowFactory.create(2d, 3d, new DenseVector(new double[]{4d, 5d})),
RowFactory.create(3d, 4d, new DenseVector(new double[]{5d, 6d})),
RowFactory.create(4d, 5d, new DenseVector(new double[]{6d, 7d}))
));
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("vector1", new VectorUDT(), false, Metadata.empty())
});
DataFrame df = sqlContext.createDataFrame(jrdd, schema);
VectorAssembler vectorAssembler = new VectorAssembler()
.setInputCols(new String[]{"value1", "vector1"})
.setOutputCol("feature");
//Export this model
byte[] exportedModel = ModelExporter.export(vectorAssembler, null);
String exportedModelJson = new String(exportedModel);
//Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
//compare predictions
Row[] sparkOutput = vectorAssembler.transform(df).orderBy("id").select("id", "value1", "vector1", "feature").collect();
for (Row row : sparkOutput) {
Map<String, Object> data = new HashMap<>();
data.put(vectorAssembler.getInputCols()[0], row.get(1));
data.put(vectorAssembler.getInputCols()[1], ((DenseVector) row.get(2)).toArray());
transformer.transform(data);
double[] output = (double[]) data.get(vectorAssembler.getOutputCol());
assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
}
}
示例12: testChiSqSelector
import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testChiSqSelector() {
// prepare data
JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
RowFactory.create(0d, 0d, new DenseVector(new double[]{8d, 7d, 0d})),
RowFactory.create(1d, 1d, new DenseVector(new double[]{0d, 9d, 6d})),
RowFactory.create(2d, 1d, new DenseVector(new double[]{0.0d, 9.0d, 8.0d})),
RowFactory.create(3d, 2d, new DenseVector(new double[]{8.0d, 9.0d, 5.0d}))
));
double[] preFilteredData = {0.0d, 6.0d, 8.0d, 5.0d};
StructType schema = new StructType(new StructField[]{
new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("features", new VectorUDT(), false, Metadata.empty())
});
DataFrame df = sqlContext.createDataFrame(jrdd, schema);
ChiSqSelector chiSqSelector = new ChiSqSelector();
chiSqSelector.setNumTopFeatures(1);
chiSqSelector.setFeaturesCol("features");
chiSqSelector.setLabelCol("label");
chiSqSelector.setOutputCol("output");
ChiSqSelectorModel chiSqSelectorModel = chiSqSelector.fit(df);
//Export this model
byte[] exportedModel = ModelExporter.export(chiSqSelectorModel, null);
String exportedModelJson = new String(exportedModel);
//Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
//compare predictions
Row[] sparkOutput = chiSqSelectorModel.transform(df).orderBy("id").select("id", "label", "features", "output").collect();
for (Row row : sparkOutput) {
Map<String, Object> data = new HashMap<>();
data.put(chiSqSelectorModel.getFeaturesCol(), ((DenseVector) row.get(2)).toArray());
transformer.transform(data);
double[] output = (double[]) data.get(chiSqSelectorModel.getOutputCol());
System.out.println(Arrays.toString(output));
assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
}
}