当前位置: 首页>>代码示例>>Java>>正文


Java VectorUDT类代码示例

本文整理汇总了Java中org.apache.spark.mllib.linalg.VectorUDT的典型用法代码示例。如果您正苦于以下问题:Java VectorUDT类的具体用法?Java VectorUDT怎么用?Java VectorUDT使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


VectorUDT类属于org.apache.spark.mllib.linalg包,在下文中一共展示了VectorUDT类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: start

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
private void start() {
	SparkSession spark = SparkSession.builder().appName("First Prediction").master("local").getOrCreate();

	StructType schema = new StructType(
			new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
					new StructField("features", new VectorUDT(), false, Metadata.empty()), });

	// TODO this example is not working yet
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:10,代码来源:FirstPrediction.java

示例2: exportToJson

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
public static String exportToJson(Set<String> columns, StructType dfSchema) {
    //This would contain column name along with type of a dataframe

    List<Field> schema = new ArrayList<>();

    for (String column : columns) {
        StructField field = dfSchema.fields()[ dfSchema.fieldIndex(column) ];

        if (field.dataType() instanceof StringType) {
            schema.add(new Field(field.name(), STRING));
        } else if (field.dataType() instanceof BooleanType) {
            schema.add(new Field(field.name(), BOOLEAN));
        } else if (field.dataType() instanceof VectorUDT) {
            schema.add(new Field(field.name(), DOUBLE_ARRAY));
        } else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType ||
                field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) {
            schema.add(new Field(field.name(), DOUBLE));
        } else if (field.dataType() instanceof ArrayType) {
            if(((ArrayType)field.dataType()).elementType() instanceof StringType) {
                schema.add(new Field(field.name(), STRING_ARRAY));
            }else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) {
                schema.add(new Field(field.name(), DOUBLE_ARRAY));
            }else {
                throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
            }
        }
        else {
            throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
        }
    }
    return gson.toJson(schema);
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:33,代码来源:SchemaExporter.java

示例3: exportSchemaToJson

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
public static String exportSchemaToJson(StructType dfSchema) {
    //This would contain column name along with type of a dataframe

    List<Field> schema = new ArrayList<>();

    for (StructField field : dfSchema.fields()) {
        if (field.dataType() instanceof StringType) {
            schema.add(new Field(field.name(), STRING));
        } else if (field.dataType() instanceof BooleanType) {
            schema.add(new Field(field.name(), BOOLEAN));
        } else if (field.dataType() instanceof VectorUDT) {
            schema.add(new Field(field.name(), DOUBLE_ARRAY));
        } else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType ||
                field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) {
            schema.add(new Field(field.name(), DOUBLE));
        } else if (field.dataType() instanceof ArrayType) {
            if(((ArrayType)field.dataType()).elementType() instanceof StringType) {
                schema.add(new Field(field.name(), STRING_ARRAY));
            }else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) {
                schema.add(new Field(field.name(), DOUBLE_ARRAY));
            }else {
                throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
            }
        }
        else {
            throw new UnsupportedOperationException("Cannot support data of type " + field.dataType());
        }
    }
    return gson.toJson(schema);
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:31,代码来源:SchemaExporter.java

示例4: testVectorBinarizerDense

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorBinarizerDense() {
    // prepare data

    JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
            RowFactory.create(0d, 1d, new DenseVector(new double[]{-2d, -3d, -4d, -1d, 6d, -7d, 8d, 0d, 0d, 0d, 0d, 0d})),
            RowFactory.create(1d, 2d, new DenseVector(new double[]{4d, -5d, 6d, 7d, -8d, 9d, -10d, 0d, 0d, 0d, 0d, 0d})),
            RowFactory.create(2d, 3d, new DenseVector(new double[]{-5d, 6d, -8d, 9d, 10d, 11d, 12d, 0d, 0d, 0d, 0d, 0d}))
    ));

    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("vector1", new VectorUDT(), false, Metadata.empty())
    });

    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
    VectorBinarizer vectorBinarizer = new VectorBinarizer()
            .setInputCol("vector1")
            .setOutputCol("binarized")
            .setThreshold(2d);


    //Export this model
    byte[] exportedModel = ModelExporter.export(vectorBinarizer, df);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
    //compare predictions
    Row[] sparkOutput = vectorBinarizer.transform(df).orderBy("id").select("id", "value1", "vector1", "binarized").collect();
    for (Row row : sparkOutput) {

        Map<String, Object> data = new HashMap<>();
        data.put(vectorBinarizer.getInputCol(), ((DenseVector) row.get(2)).toArray());
        transformer.transform(data);
        double[] output = (double[]) data.get(vectorBinarizer.getOutputCol());
        assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:40,代码来源:VectorBinarizerBridgeTest.java

示例5: testSchema1

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
 * Output :
 [{"name":"id","datatype":"double"},{"name":"label","datatype":"double"},{"name":"features","datatype":"double []"}]
 * */
@Test
public void testSchema1() {
    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("features", new VectorUDT(), false, Metadata.empty())
    });
    System.out.println(SchemaExporter.exportSchemaToJson(schema));
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:14,代码来源:SchemaExporterTest.java

示例6: testSchema3

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
 * Output :
 [{"name":"id","datatype":"double"},{"name":"value1","datatype":"double"},{"name":"vector1","datatype":"double []"}]
 * */
@Test
public void testSchema3() {
    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("vector1", new VectorUDT(), false, Metadata.empty())
    });
    System.out.println(SchemaExporter.exportSchemaToJson(schema));
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:14,代码来源:SchemaExporterTest.java

示例7: testColumnExport1

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
 * Output :
 [{"name":"features","datatype":"double []"},{"name":"id","datatype":"double"}]
 * */
@Test
public void testColumnExport1() {
    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("features", new VectorUDT(), false, Metadata.empty())
    });
    System.out.println(SchemaExporter.exportToJson(new HashSet<String>(Arrays.asList("id", "features")),schema));
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:14,代码来源:SchemaExporterTest.java

示例8: testColumnExport3

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
/**
 * Output :
 [{"name":"id","datatype":"double"},{"name":"vector1","datatype":"double []"}]
 * */
@Test
public void testColumnExport3() {
    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("vector1", new VectorUDT(), false, Metadata.empty())
    });
    System.out.println(SchemaExporter.exportToJson(new HashSet<String>(Arrays.asList("id", "vector1")),schema));
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:14,代码来源:SchemaExporterTest.java

示例9: init

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Override
public StructField init(Evaluator evaluator){
	return DataTypes.createStructField(getColumnName(), new VectorUDT(), false);
}
 
开发者ID:jeremyore,项目名称:spark-pmml-import,代码行数:5,代码来源:ProbabilityColumnProducer.java

示例10: testVectorBinarizerSparse

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorBinarizerSparse() {
    // prepare data

    int[] sparseArray1 = {5, 6, 11, 4, 7, 9, 8, 14, 13};
    double[] sparseArray1Values = {-5d, 7d, 1d, -2d, -4d, -1d, 31d, -1d, -3d};

    int[] sparseArray2 = {2, 6, 1};
    double[] sparseArray2Values = {1d, 11d, 2d};

    int[] sparseArray3 = {4, 6, 1};
    double[] sparseArray3Values = {52d, 71d, 11d};

    int[] sparseArray4 = {4, 1, 2};
    double[] sparseArray4Values = {17d, 7d, 9d};

    JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
            RowFactory.create(3d, 4d, new SparseVector(20, sparseArray1, sparseArray1Values)),
            RowFactory.create(4d, 5d, new SparseVector(20, sparseArray2, sparseArray2Values)),
            RowFactory.create(5d, 5d, new SparseVector(20, sparseArray3, sparseArray3Values)),
            RowFactory.create(6d, 5d, new SparseVector(20, sparseArray4, sparseArray4Values))
    ));

    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("vector1", new VectorUDT(), false, Metadata.empty())
    });

    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
    VectorBinarizer vectorBinarizer = new VectorBinarizer()
            .setInputCol("vector1")
            .setOutputCol("binarized");


    //Export this model
    byte[] exportedModel = ModelExporter.export(vectorBinarizer, null);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
    //compare predictions
    Row[] sparkOutput = vectorBinarizer.transform(df).orderBy("id").select("id", "value1", "vector1", "binarized").collect();
    for (Row row : sparkOutput) {

        Map<String, Object> data = new HashMap<>();
        data.put(vectorBinarizer.getInputCol(), ((SparseVector) row.get(2)).toArray());
        transformer.transform(data);
        double[] output = (double[]) data.get(vectorBinarizer.getOutputCol());
        assertArrayEquals(output, ((SparseVector)row.get(3)).toArray(), 0d);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:52,代码来源:VectorBinarizerBridgeTest.java

示例11: testVectorAssembler

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testVectorAssembler() {
    // prepare data

    JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
            RowFactory.create(0d, 1d, new DenseVector(new double[]{2d, 3d})),
            RowFactory.create(1d, 2d, new DenseVector(new double[]{3d, 4d})),
            RowFactory.create(2d, 3d, new DenseVector(new double[]{4d, 5d})),
            RowFactory.create(3d, 4d, new DenseVector(new double[]{5d, 6d})),
            RowFactory.create(4d, 5d, new DenseVector(new double[]{6d, 7d}))
    ));

    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("vector1", new VectorUDT(), false, Metadata.empty())
    });

    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
    VectorAssembler vectorAssembler = new VectorAssembler()
            .setInputCols(new String[]{"value1", "vector1"})
            .setOutputCol("feature");


    //Export this model
    byte[] exportedModel = ModelExporter.export(vectorAssembler, null);

    String exportedModelJson = new String(exportedModel);
    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
    //compare predictions
    Row[] sparkOutput = vectorAssembler.transform(df).orderBy("id").select("id", "value1", "vector1", "feature").collect();
    for (Row row : sparkOutput) {

        Map<String, Object> data = new HashMap<>();
        data.put(vectorAssembler.getInputCols()[0], row.get(1));
        data.put(vectorAssembler.getInputCols()[1], ((DenseVector) row.get(2)).toArray());
        transformer.transform(data);
        double[] output = (double[]) data.get(vectorAssembler.getOutputCol());
        assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:43,代码来源:VectorAssemblerBridgeTest.java

示例12: testChiSqSelector

import org.apache.spark.mllib.linalg.VectorUDT; //导入依赖的package包/类
@Test
public void testChiSqSelector() {
    // prepare data

    JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList(
            RowFactory.create(0d, 0d, new DenseVector(new double[]{8d, 7d, 0d})),
            RowFactory.create(1d, 1d, new DenseVector(new double[]{0d, 9d, 6d})),
            RowFactory.create(2d, 1d, new DenseVector(new double[]{0.0d, 9.0d, 8.0d})),
            RowFactory.create(3d, 2d, new DenseVector(new double[]{8.0d, 9.0d, 5.0d}))
    ));

    double[] preFilteredData = {0.0d, 6.0d, 8.0d, 5.0d};

    StructType schema = new StructType(new StructField[]{
            new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("features", new VectorUDT(), false, Metadata.empty())
    });

    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
    ChiSqSelector chiSqSelector = new ChiSqSelector();
    chiSqSelector.setNumTopFeatures(1);
    chiSqSelector.setFeaturesCol("features");
    chiSqSelector.setLabelCol("label");
    chiSqSelector.setOutputCol("output");

    ChiSqSelectorModel chiSqSelectorModel = chiSqSelector.fit(df);

    //Export this model
    byte[] exportedModel = ModelExporter.export(chiSqSelectorModel, null);

    String exportedModelJson = new String(exportedModel);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    Row[] sparkOutput = chiSqSelectorModel.transform(df).orderBy("id").select("id", "label", "features", "output").collect();
    for (Row row : sparkOutput) {
        Map<String, Object> data = new HashMap<>();
        data.put(chiSqSelectorModel.getFeaturesCol(), ((DenseVector) row.get(2)).toArray());
        transformer.transform(data);
        double[] output = (double[]) data.get(chiSqSelectorModel.getOutputCol());
        System.out.println(Arrays.toString(output));
        assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:48,代码来源:ChiSqSelectorBridgeTest.java


注:本文中的org.apache.spark.mllib.linalg.VectorUDT类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。