当前位置: 首页>>代码示例>>Java>>正文


Java SparseVector类代码示例

本文整理汇总了Java中org.apache.spark.ml.linalg.SparseVector的典型用法代码示例。如果您正苦于以下问题:Java SparseVector类的具体用法?Java SparseVector怎么用?Java SparseVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


SparseVector类属于org.apache.spark.ml.linalg包,在下文中一共展示了SparseVector类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testGradientBoostClassification

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);

	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassificationModel classificationModel = new GBTClassifier().fit(trainingData);

	byte[] exportedModel = ModelExporter.export(classificationModel);

	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput =
	        classificationModel.transform(testData).select("features", "prediction","label").collectAsList();

	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:35,代码来源:GradientBoostClassificationModelTest.java

示例2: testDecisionTreeRegressionPrediction

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeRegressionPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/regression_test.libsvm";
	
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = regressionModel.transform(testData).select("features", "prediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(regressionModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    System.out.println(transformer);
    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:34,代码来源:DecisionTreeRegressionModelBridgeTest.java

示例3: testDecisionTreeClassificationPrediction

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeClassificationPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(classifierModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:34,代码来源:DecisionTreeClassificationModelBridgeTest.java

示例4: call

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) 
	throws Exception 
{
	long rowix = arg0._2() + 1;
	
	//process row data
	int off = _containsID ? 1: 0;
	Object obj = _isVector ? arg0._1().get(off) : arg0._1();
	boolean sparse = (obj instanceof SparseVector);
	MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
	
	if( _isVector ) {
		Vector vect = (Vector) obj;
		if( vect instanceof SparseVector ) {
			SparseVector svect = (SparseVector) vect;
			int lnnz = svect.numNonzeros();
			for( int k=0; k<lnnz; k++ )
				mb.appendValue(0, svect.indices()[k], svect.values()[k]);
		}
		else { //dense
			for( int j=0; j<_clen; j++ )
				mb.appendValue(0, j, vect.apply(j));	
		}
	}
	else { //row
		Row row = (Row) obj;
		for( int j=off; j<off+_clen; j++ )
			mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
	}
	mb.examSparsity();
	return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}
 
开发者ID:apache,项目名称:systemml,代码行数:34,代码来源:RemoteDPParForSpark.java

示例5: testDecisionTreeRegressionPrediction

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
  public void testDecisionTreeRegressionPrediction() {
      // Load the data stored in LIBSVM format as a DataFrame.
  	String datapath = "src/test/resources/regression_test.libsvm";
  	
  	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


      // Split the data into training and test sets (30% held out for testing)
      Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
      Dataset<Row> trainingData = splits[0];
      Dataset<Row> testData = splits[1];

      StringIndexer indexer = new StringIndexer()
              .setInputCol("label")
              .setOutputCol("labelIndex").setHandleInvalid("skip");
      
DecisionTreeRegressor regressionModel =
        new DecisionTreeRegressor().setLabelCol("labelIndex").setFeaturesCol("features");

Pipeline pipeline = new Pipeline()
              .setStages(new PipelineStage[]{indexer, regressionModel});

PipelineModel sparkPipeline = pipeline.fit(trainingData);

      byte[] exportedModel = ModelExporter.export(sparkPipeline);

      Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
      List<Row> output = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();

      //compare predictions
      for (Row row : output) {
      	Map<String, Object> data_ = new HashMap<>();
          data_.put("features", ((SparseVector) row.get(0)).toArray());
          data_.put("label", (row.get(2)).toString());
          transformer.transform(data_);
          System.out.println(data_);
          System.out.println(data_.get("prediction"));
          assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
      }
  }
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:42,代码来源:DecisionTreeRegressionModelBridgePipelineTest.java

示例6: testGradientBoostClassification

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testGradientBoostClassification() {
	// Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/binary_classification_test.libsvm";

	Dataset<Row> data = spark.read().format("libsvm").load(datapath);
	StringIndexer indexer = new StringIndexer()
               .setInputCol("label")
               .setOutputCol("labelIndex");
	// Split the data into training and test sets (30% held out for testing)
	Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
	Dataset<Row> trainingData = splits[0];
	Dataset<Row> testData = splits[1];

	// Train a RandomForest model.
	GBTClassifier classificationModel = new GBTClassifier().setLabelCol("labelIndex")
               .setFeaturesCol("features");;

        Pipeline pipeline = new Pipeline()
                .setStages(new PipelineStage[]{indexer, classificationModel});


	 PipelineModel sparkPipeline = pipeline.fit(trainingData);

	// Export this model
	byte[] exportedModel = ModelExporter.export(sparkPipeline);

	// Import and get Transformer
	Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

	List<Row> sparkOutput = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();
	
	// compare predictions
	for (Row row : sparkOutput) {
		Map<String, Object> data_ = new HashMap<>();
		data_.put("features", ((SparseVector) row.get(0)).toArray());
		data_.put("label", (row.get(2)).toString());
		transformer.transform(data_);
		System.out.println(data_);
		System.out.println(data_.get("prediction")+" ,"+row.get(1));
		assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
	}

}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:45,代码来源:GradientBoostClassificationModelPipelineTest.java

示例7: testDecisionTreeClassificationWithPipeline

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeClassificationWithPipeline() {
	

    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);



    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});        

    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    StringIndexer indexer = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex");

    // Train a DecisionTree model.
    DecisionTreeClassifier classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features");

    Pipeline pipeline = new Pipeline()
            .setStages(new PipelineStage[]{indexer, classificationModel});


    // Train model.  This also runs the indexer.
    PipelineModel sparkPipeline = pipeline.fit(trainingData);

    //Export this model
    byte[] exportedModel = ModelExporter.export(sparkPipeline);

    //Import and get Transformer
    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    List<Row> output = sparkPipeline.transform(testData).select("features", "label","prediction","rawPrediction").collectAsList();

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(3)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        data_.put("label", (row.get(1)).toString());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(2), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:54,代码来源:DecisionTreeClassificationModelBridgePipelineTest.java

示例8: call

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0) 
	throws Exception 
{
	ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();

	int ncblks = (int)Math.ceil((double)_clen/_bclen);
	MatrixIndexes[] ix = new MatrixIndexes[ncblks];
	MatrixBlock[] mb = new MatrixBlock[ncblks];
	
	while( arg0.hasNext() )
	{
		Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
		org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
		boolean lsparse = _sparseX || (!_labels && 
				row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
		long rowix = tmp._2() + 1;
		
		long rix = UtilFunctions.computeBlockIndex(rowix, _brlen);
		int pos = UtilFunctions.computeCellInBlock(rowix, _brlen);
	
		//create new blocks for entire row
		if( ix[0] == null || ix[0].getRowIndex() != rix ) {
			if( ix[0] !=null )
				flushBlocksToList(ix, mb, ret);
			long len = UtilFunctions.computeBlockSize(_rlen, rix, _brlen);
			createBlocks(rowix, (int)len, ix, mb, lsparse);
		}
		
		//process row data
		if( _labels ) {
			double val = row.label();
			mb[0].appendValue(pos, 0, val);
			_aNnz.add((val != 0) ? 1 : 0);
		}
		else { //features
			int lnnz = row.features().numNonzeros();
			if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
			{
				org.apache.spark.mllib.linalg.SparseVector srow = 
						(org.apache.spark.mllib.linalg.SparseVector) row.features();
				for( int k=0; k<lnnz; k++ ) {
					int gix = srow.indices()[k]+1;
					int cix = (int)UtilFunctions.computeBlockIndex(gix, _bclen);
					int j = UtilFunctions.computeCellInBlock(gix, _bclen);
					mb[cix-1].appendValue(pos, j, srow.values()[k]);
				}
			}
			else { //dense
				for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
					int lclen = (int)UtilFunctions.computeBlockSize(_clen, cix, _bclen);
					for( int j=0; j<lclen; j++ )
						mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
				}
			}
			_aNnz.add(lnnz);
		}
	}

	//flush last blocks
	flushBlocksToList(ix, mb, ret);

	return ret.iterator();
}
 
开发者ID:apache,项目名称:systemml,代码行数:65,代码来源:RDDConverterUtils.java

示例9: predictAge

import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
public double predictAge(String document) throws InvalidFormatException, IOException {
	FeatureGenerator[] featureGenerators = model.getContext().getFeatureGenerators();

	List<Row> data = new ArrayList<Row>();

	String[] tokens = tokenizer.tokenize(document);

	double prob[] = classify.getProbabilities(tokens);
	String category = classify.getBestCategory(prob);

	Collection<String> context = new ArrayList<String>();

	for (FeatureGenerator featureGenerator : featureGenerators) {
		Collection<String> extractedFeatures = featureGenerator.extractFeatures(tokens);
		context.addAll(extractedFeatures);
	}

	if (category != null) {
		for (int i = 0; i < tokens.length / 18; i++) {
			context.add("cat=" + category);
		}
	}

	if (context.size() > 0) {
		data.add(RowFactory.create(document, context.toArray()));
	}

	StructType schema = new StructType(
			new StructField[] { new StructField("document", DataTypes.StringType, false, Metadata.empty()),
					new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });

	Dataset<Row> df = spark.createDataFrame(data, schema);

	CountVectorizerModel cvm = new CountVectorizerModel(model.getVocabulary()).setInputCol("text")
			.setOutputCol("feature");

	Dataset<Row> eventDF = cvm.transform(df);

	Normalizer normalizer = new Normalizer().setInputCol("feature").setOutputCol("normFeature").setP(1.0);

	JavaRDD<Row> normEventDF = normalizer.transform(eventDF).javaRDD();

	Row event = normEventDF.first();

	SparseVector sp = (SparseVector) event.getAs("normFeature");

	final LassoModel linModel = model.getModel();

	Vector testData = Vectors.sparse(sp.size(), sp.indices(), sp.values());
	return linModel.predict(testData.compressed());

}
 
开发者ID:USCDataScience,项目名称:AgePredictor,代码行数:53,代码来源:AgePredicterLocal.java


注:本文中的org.apache.spark.ml.linalg.SparseVector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。