本文整理汇总了Java中org.apache.spark.ml.linalg.SparseVector类的典型用法代码示例。如果您正苦于以下问题:Java SparseVector类的具体用法?Java SparseVector怎么用?Java SparseVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SparseVector类属于org.apache.spark.ml.linalg包,在下文中一共展示了SparseVector类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testGradientBoostClassification
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testGradientBoostClassification() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/binary_classification_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
// Train a RandomForest model.
GBTClassificationModel classificationModel = new GBTClassifier().fit(trainingData);
byte[] exportedModel = ModelExporter.export(classificationModel);
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
List<Row> sparkOutput =
classificationModel.transform(testData).select("features", "prediction","label").collectAsList();
// compare predictions
for (Row row : sparkOutput) {
Map<String, Object> data_ = new HashMap<>();
data_.put("features", ((SparseVector) row.get(0)).toArray());
data_.put("label", (row.get(2)).toString());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction")+" ,"+row.get(1));
assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:35,代码来源:GradientBoostClassificationModelTest.java
示例2: testDecisionTreeRegressionPrediction
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeRegressionPrediction() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/regression_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
// Train a DecisionTree model.
DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor().fit(trainingData);
trainingData.printSchema();
List<Row> output = regressionModel.transform(testData).select("features", "prediction").collectAsList();
byte[] exportedModel = ModelExporter.export(regressionModel);
DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);
System.out.println(transformer);
//compare predictions
for (Row row : output) {
Map<String, Object> data_ = new HashMap<>();
data_.put("features", ((SparseVector) row.get(0)).toArray());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction"));
assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:34,代码来源:DecisionTreeRegressionModelBridgeTest.java
示例3: testDecisionTreeClassificationPrediction
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeClassificationPrediction() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/classification_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
// Train a DecisionTree model.
DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
trainingData.printSchema();
List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
byte[] exportedModel = ModelExporter.export(classifierModel);
DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);
//compare predictions
for (Row row : output) {
Map<String, Object> data_ = new HashMap<>();
double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
data_.put("features", ((SparseVector) row.get(0)).toArray());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction"));
assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:34,代码来源:DecisionTreeClassificationModelBridgeTest.java
示例4: call
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Override
public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0)
throws Exception
{
long rowix = arg0._2() + 1;
//process row data
int off = _containsID ? 1: 0;
Object obj = _isVector ? arg0._1().get(off) : arg0._1();
boolean sparse = (obj instanceof SparseVector);
MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse);
if( _isVector ) {
Vector vect = (Vector) obj;
if( vect instanceof SparseVector ) {
SparseVector svect = (SparseVector) vect;
int lnnz = svect.numNonzeros();
for( int k=0; k<lnnz; k++ )
mb.appendValue(0, svect.indices()[k], svect.values()[k]);
}
else { //dense
for( int j=0; j<_clen; j++ )
mb.appendValue(0, j, vect.apply(j));
}
}
else { //row
Row row = (Row) obj;
for( int j=off; j<off+_clen; j++ )
mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j)));
}
mb.examSparsity();
return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb));
}
示例5: testDecisionTreeRegressionPrediction
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeRegressionPrediction() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/regression_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
StringIndexer indexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("labelIndex").setHandleInvalid("skip");
DecisionTreeRegressor regressionModel =
new DecisionTreeRegressor().setLabelCol("labelIndex").setFeaturesCol("features");
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{indexer, regressionModel});
PipelineModel sparkPipeline = pipeline.fit(trainingData);
byte[] exportedModel = ModelExporter.export(sparkPipeline);
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
List<Row> output = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();
//compare predictions
for (Row row : output) {
Map<String, Object> data_ = new HashMap<>();
data_.put("features", ((SparseVector) row.get(0)).toArray());
data_.put("label", (row.get(2)).toString());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction"));
assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:42,代码来源:DecisionTreeRegressionModelBridgePipelineTest.java
示例6: testGradientBoostClassification
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testGradientBoostClassification() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/binary_classification_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
StringIndexer indexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("labelIndex");
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
// Train a RandomForest model.
GBTClassifier classificationModel = new GBTClassifier().setLabelCol("labelIndex")
.setFeaturesCol("features");;
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{indexer, classificationModel});
PipelineModel sparkPipeline = pipeline.fit(trainingData);
// Export this model
byte[] exportedModel = ModelExporter.export(sparkPipeline);
// Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
List<Row> sparkOutput = sparkPipeline.transform(testData).select("features", "prediction", "label").collectAsList();
// compare predictions
for (Row row : sparkOutput) {
Map<String, Object> data_ = new HashMap<>();
data_.put("features", ((SparseVector) row.get(0)).toArray());
data_.put("label", (row.get(2)).toString());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction")+" ,"+row.get(1));
assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:45,代码来源:GradientBoostClassificationModelPipelineTest.java
示例7: testDecisionTreeClassificationWithPipeline
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Test
public void testDecisionTreeClassificationWithPipeline() {
// Load the data stored in LIBSVM format as a DataFrame.
String datapath = "src/test/resources/classification_test.libsvm";
Dataset<Row> data = spark.read().format("libsvm").load(datapath);
// Split the data into training and test sets (30% held out for testing)
Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
Dataset<Row> trainingData = splits[0];
Dataset<Row> testData = splits[1];
StringIndexer indexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("labelIndex");
// Train a DecisionTree model.
DecisionTreeClassifier classificationModel = new DecisionTreeClassifier()
.setLabelCol("labelIndex")
.setFeaturesCol("features");
Pipeline pipeline = new Pipeline()
.setStages(new PipelineStage[]{indexer, classificationModel});
// Train model. This also runs the indexer.
PipelineModel sparkPipeline = pipeline.fit(trainingData);
//Export this model
byte[] exportedModel = ModelExporter.export(sparkPipeline);
//Import and get Transformer
Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);
List<Row> output = sparkPipeline.transform(testData).select("features", "label","prediction","rawPrediction").collectAsList();
//compare predictions
for (Row row : output) {
Map<String, Object> data_ = new HashMap<>();
double [] actualRawPrediction = ((DenseVector) row.get(3)).toArray();
data_.put("features", ((SparseVector) row.get(0)).toArray());
data_.put("label", (row.get(1)).toString());
transformer.transform(data_);
System.out.println(data_);
System.out.println(data_.get("prediction"));
assertEquals((double)data_.get("prediction"), (double)row.get(2), EPSILON);
assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
}
}
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:54,代码来源:DecisionTreeClassificationModelBridgePipelineTest.java
示例8: call
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long>> arg0)
throws Exception
{
ArrayList<Tuple2<MatrixIndexes,MatrixBlock>> ret = new ArrayList<>();
int ncblks = (int)Math.ceil((double)_clen/_bclen);
MatrixIndexes[] ix = new MatrixIndexes[ncblks];
MatrixBlock[] mb = new MatrixBlock[ncblks];
while( arg0.hasNext() )
{
Tuple2<org.apache.spark.mllib.regression.LabeledPoint,Long> tmp = arg0.next();
org.apache.spark.mllib.regression.LabeledPoint row = tmp._1();
boolean lsparse = _sparseX || (!_labels &&
row.features() instanceof org.apache.spark.mllib.linalg.SparseVector);
long rowix = tmp._2() + 1;
long rix = UtilFunctions.computeBlockIndex(rowix, _brlen);
int pos = UtilFunctions.computeCellInBlock(rowix, _brlen);
//create new blocks for entire row
if( ix[0] == null || ix[0].getRowIndex() != rix ) {
if( ix[0] !=null )
flushBlocksToList(ix, mb, ret);
long len = UtilFunctions.computeBlockSize(_rlen, rix, _brlen);
createBlocks(rowix, (int)len, ix, mb, lsparse);
}
//process row data
if( _labels ) {
double val = row.label();
mb[0].appendValue(pos, 0, val);
_aNnz.add((val != 0) ? 1 : 0);
}
else { //features
int lnnz = row.features().numNonzeros();
if( row.features() instanceof org.apache.spark.mllib.linalg.SparseVector )
{
org.apache.spark.mllib.linalg.SparseVector srow =
(org.apache.spark.mllib.linalg.SparseVector) row.features();
for( int k=0; k<lnnz; k++ ) {
int gix = srow.indices()[k]+1;
int cix = (int)UtilFunctions.computeBlockIndex(gix, _bclen);
int j = UtilFunctions.computeCellInBlock(gix, _bclen);
mb[cix-1].appendValue(pos, j, srow.values()[k]);
}
}
else { //dense
for( int cix=1, pix=0; cix<=ncblks; cix++ ) {
int lclen = (int)UtilFunctions.computeBlockSize(_clen, cix, _bclen);
for( int j=0; j<lclen; j++ )
mb[cix-1].appendValue(pos, j, row.features().apply(pix++));
}
}
_aNnz.add(lnnz);
}
}
//flush last blocks
flushBlocksToList(ix, mb, ret);
return ret.iterator();
}
示例9: predictAge
import org.apache.spark.ml.linalg.SparseVector; //导入依赖的package包/类
public double predictAge(String document) throws InvalidFormatException, IOException {
FeatureGenerator[] featureGenerators = model.getContext().getFeatureGenerators();
List<Row> data = new ArrayList<Row>();
String[] tokens = tokenizer.tokenize(document);
double prob[] = classify.getProbabilities(tokens);
String category = classify.getBestCategory(prob);
Collection<String> context = new ArrayList<String>();
for (FeatureGenerator featureGenerator : featureGenerators) {
Collection<String> extractedFeatures = featureGenerator.extractFeatures(tokens);
context.addAll(extractedFeatures);
}
if (category != null) {
for (int i = 0; i < tokens.length / 18; i++) {
context.add("cat=" + category);
}
}
if (context.size() > 0) {
data.add(RowFactory.create(document, context.toArray()));
}
StructType schema = new StructType(
new StructField[] { new StructField("document", DataTypes.StringType, false, Metadata.empty()),
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });
Dataset<Row> df = spark.createDataFrame(data, schema);
CountVectorizerModel cvm = new CountVectorizerModel(model.getVocabulary()).setInputCol("text")
.setOutputCol("feature");
Dataset<Row> eventDF = cvm.transform(df);
Normalizer normalizer = new Normalizer().setInputCol("feature").setOutputCol("normFeature").setP(1.0);
JavaRDD<Row> normEventDF = normalizer.transform(eventDF).javaRDD();
Row event = normEventDF.first();
SparseVector sp = (SparseVector) event.getAs("normFeature");
final LassoModel linModel = model.getModel();
Vector testData = Vectors.sparse(sp.size(), sp.indices(), sp.values());
return linModel.predict(testData.compressed());
}