本文整理汇总了Scala中org.apache.spark.ml.linalg.Vectors类的典型用法代码示例。如果您正苦于以下问题:Scala Vectors类的具体用法?Scala Vectors怎么用?Scala Vectors使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Vectors类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: PCASampleDemo
//设置package包名称以及导入依赖的类
package com.chapter11.SparkMachineLearning
import org.apache.spark.ml.feature.PCA
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession
object PCASampleDemo {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("local[4]")
.appName("PCAExample")
.getOrCreate()
val data = Array(
Vectors.dense(3.5, 2.0, 5.0, 6.3, 5.60, 2.4),
Vectors.dense(4.40, 0.10, 3.0, 9.0, 7.0, 8.75),
Vectors.dense(3.20, 2.40, 0.0, 6.0, 7.4, 3.34)
)
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
df.show(false)
val pca = new PCA()
.setInputCol("features")
.setOutputCol("pcaFeatures")
.setK(4)
.fit(df)
val result = pca.transform(df).select("pcaFeatures")
result.show(false)
spark.stop()
}
}
示例2: TestLogisticRegression
//设置package包名称以及导入依赖的类
package com.zobot.ai.spark
import breeze.linalg.Matrix
import com.zobot.ai.spark.helpers.LogisticRegressionHelpers
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.linalg.Vectors
import org.specs2.Specification
class TestLogisticRegression extends Specification {
def is = s2"""
Logistic Regression
can train model $testTrainLogisticRegressionModel
"""
val context = new Context().spark
def testTrainLogisticRegressionModel = {
val model = LogisticRegressionHelpers.trainModel(new LogisticRegression, context.createDataFrame(Seq(
(1.0, Vectors.dense(0.0, 1.1, 0.1)),
(0.0, Vectors.dense(2.0, 1.0, -1.0)),
(0.0, Vectors.dense(2.0, 1.3, 1.0)),
(1.0, Vectors.dense(0.0, 1.2, -0.5))
)).toDF("label", "features"))
model.coefficientMatrix.toString().must_==("-19.086478256375067 16.278339464295065 -2.494930802874724 ")
}
}
示例3: LDA
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.clustering
import scala.collection.mutable.{HashMap => MHashMap}
import org.apache.commons.math3.random.Well19937c
import org.apache.spark.ml.Estimator
import org.apache.spark.ml
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import com.databricks.spark.sql.perf.mllib.{BenchmarkAlgorithm, MLBenchContext, TestFromTraining}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
object LDA extends BenchmarkAlgorithm with TestFromTraining {
// The LDA model is package private, no need to expose it.
override def trainingDataSet(ctx: MLBenchContext): DataFrame = {
import ctx.params._
val rdd = ctx.sqlContext.sparkContext.parallelize(
0L until numExamples,
numPartitions
)
val seed: Int = randomSeed
val docLength = ldaDocLength.get
val numVocab = ldaNumVocabulary.get
val data: RDD[(Long, Vector)] = rdd.mapPartitionsWithIndex { (idx, partition) =>
val rng = new Well19937c(seed ^ idx)
partition.map { docIndex =>
var currentSize = 0
val entries = MHashMap[Int, Int]()
while (currentSize < docLength) {
val index = rng.nextInt(numVocab)
entries(index) = entries.getOrElse(index, 0) + 1
currentSize += 1
}
val iter = entries.toSeq.map(v => (v._1, v._2.toDouble))
(docIndex, Vectors.sparse(numVocab, iter))
}
}
ctx.sqlContext.createDataFrame(data).toDF("docIndex", "features")
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.clustering.LDA()
.setK(k)
.setSeed(randomSeed.toLong)
.setMaxIter(maxIter)
.setOptimizer(optimizer)
}
// TODO(?) add a scoring method here.
}
示例4: LogisticRegression
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.classification
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
import org.apache.spark.ml.evaluation.{Evaluator, MulticlassClassificationEvaluator}
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import org.apache.spark.ml
import org.apache.spark.ml.linalg.Vectors
object LogisticRegression extends BenchmarkAlgorithm
with TestFromTraining with TrainingSetFromTransformer with ScoringWithEvaluator {
override protected def initialData(ctx: MLBenchContext) = {
import ctx.params._
DataGenerator.generateContinuousFeatures(
ctx.sqlContext,
numExamples,
ctx.seed(),
numPartitions,
numFeatures)
}
override protected def trueModel(ctx: MLBenchContext): Transformer = {
val rng = ctx.newGenerator()
val coefficients =
Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1))
// Small intercept to prevent some skew in the data.
val intercept = 0.01 * (2 * rng.nextDouble - 1)
ModelBuilder.newLogisticRegressionModel(coefficients, intercept)
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.classification.LogisticRegression()
.setTol(tol)
.setMaxIter(maxIter)
.setRegParam(regParam)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator =
new MulticlassClassificationEvaluator()
}
示例5: GLMRegression
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.regression
import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.GeneralizedLinearRegression
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
object GLMRegression extends BenchmarkAlgorithm with TestFromTraining with
TrainingSetFromTransformer with ScoringWithEvaluator {
override protected def initialData(ctx: MLBenchContext) = {
import ctx.params._
DataGenerator.generateContinuousFeatures(
ctx.sqlContext,
numExamples,
ctx.seed(),
numPartitions,
numFeatures)
}
override protected def trueModel(ctx: MLBenchContext): Transformer = {
import ctx.params._
val rng = ctx.newGenerator()
val coefficients =
Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1))
// Small intercept to prevent some skew in the data.
val intercept = 0.01 * (2 * rng.nextDouble - 1)
val m = ModelBuilder.newGLR(coefficients, intercept)
m.set(m.link, link.get)
m.set(m.family, family.get)
m
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new GeneralizedLinearRegression()
.setLink(link)
.setFamily(family)
.setRegParam(regParam)
.setMaxIter(maxIter)
.setTol(tol)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator =
new RegressionEvaluator()
}
示例6: LinearRegression
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.regression
import org.apache.spark.ml
import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
object LinearRegression extends BenchmarkAlgorithm with TestFromTraining with
TrainingSetFromTransformer with ScoringWithEvaluator {
override protected def initialData(ctx: MLBenchContext) = {
import ctx.params._
DataGenerator.generateContinuousFeatures(
ctx.sqlContext,
numExamples,
ctx.seed(),
numPartitions,
numFeatures)
}
override protected def trueModel(ctx: MLBenchContext): Transformer = {
val rng = ctx.newGenerator()
val coefficients =
Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1))
// Small intercept to prevent some skew in the data.
val intercept = 0.01 * (2 * rng.nextDouble - 1)
ModelBuilder.newLinearRegressionModel(coefficients, intercept)
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.regression.LinearRegression()
.setSolver("l-bfgs")
.setRegParam(regParam)
.setMaxIter(maxIter)
.setTol(tol)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator =
new RegressionEvaluator()
}
示例7: LocalDecisionTreeClassificationModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.tree.Node
class LocalDecisionTreeClassificationModel(override val sparkTransformer: DecisionTreeClassificationModel) extends LocalTransformer[DecisionTreeClassificationModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[DecisionTreeClassificationModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
method.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalDecisionTreeClassificationModel extends LocalModel[DecisionTreeClassificationModel] {
override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
createTree(metadata, data)
}
def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
val ctor = classOf[DecisionTreeClassificationModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int], classOf[Int])
ctor.setAccessible(true)
val inst = ctor.newInstance(
metadata.uid,
DataUtils.createNode(0, metadata, data),
metadata.numFeatures.get.asInstanceOf[java.lang.Integer],
metadata.numClasses.get.asInstanceOf[java.lang.Integer]
)
inst
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
.setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String])
.setRawPredictionCol(metadata.paramMap("rawPredictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
.set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
.set(inst.labelCol, metadata.paramMap("labelCol").toString)
.set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
.set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
.set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
.set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
.set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: DecisionTreeClassificationModel): LocalTransformer[DecisionTreeClassificationModel] = new LocalDecisionTreeClassificationModel(transformer)
}
示例8: LocalMultilayerPerceptronClassificationModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalMultilayerPerceptronClassificationModel(override val sparkTransformer: MultilayerPerceptronClassificationModel) extends LocalTransformer[MultilayerPerceptronClassificationModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[MultilayerPerceptronClassificationModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data map { feature =>
method.invoke(sparkTransformer, feature.asInstanceOf[Vector]).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalMultilayerPerceptronClassificationModel extends LocalModel[MultilayerPerceptronClassificationModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MultilayerPerceptronClassificationModel = {
val constructor = classOf[MultilayerPerceptronClassificationModel].getDeclaredConstructor(classOf[String], classOf[Array[Int]], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, data("layers").asInstanceOf[List[Int]].to[Array], Vectors.dense(data("weights").asInstanceOf[Map[String, Any]]("values").asInstanceOf[List[Double]].toArray))
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MultilayerPerceptronClassificationModel): LocalTransformer[MultilayerPerceptronClassificationModel] = new LocalMultilayerPerceptronClassificationModel(transformer)
}
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:35,代码来源:LocalMultilayerPerceptronClassificationModel.scala
示例9: LocalPolynomialExpansion
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.PolynomialExpansion
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalPolynomialExpansion(override val sparkTransformer: PolynomialExpansion) extends LocalTransformer[PolynomialExpansion] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[PolynomialExpansion].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
val vector: Vector = Vectors.dense(row)
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalPolynomialExpansion extends LocalModel[PolynomialExpansion] {
override def load(metadata: Metadata, data: Map[String, Any]): PolynomialExpansion = {
new PolynomialExpansion(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setDegree(metadata.paramMap("degree").asInstanceOf[Number].intValue())
}
override implicit def getTransformer(transformer: PolynomialExpansion): LocalTransformer[PolynomialExpansion] = new LocalPolynomialExpansion(transformer)
}
示例10: LocalMaxAbsScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.MaxAbsScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalMaxAbsScalerModel(override val sparkTransformer: MaxAbsScalerModel) extends LocalTransformer[MaxAbsScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val maxAbsUnzero = Vectors.dense(sparkTransformer.maxAbs.toArray.map(x => if (x == 0) 1 else x))
val newData = column.data.map(r => {
val vec: List[Double] = r match {
case d: SparseVector => d.toDense.toArray.toList
case d: DenseVector => d.toArray.toList
case d: List[Any @unchecked] => d map (_.toString.toDouble)
case d => throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: $d")
}
val brz = DataUtils.asBreeze(vec.toArray) / DataUtils.asBreeze(maxAbsUnzero.toArray)
DataUtils.fromBreeze(brz)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalMaxAbsScalerModel extends LocalModel[MaxAbsScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MaxAbsScalerModel = {
val maxAbsList = data("maxAbs").
asInstanceOf[Map[String, Any]].
getOrElse("values", List()).
asInstanceOf[List[Double]].toArray
val maxAbs = new DenseVector(maxAbsList)
val constructor = classOf[MaxAbsScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, maxAbs)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MaxAbsScalerModel): LocalTransformer[MaxAbsScalerModel] = new LocalMaxAbsScalerModel(transformer)
}
示例11: LocalDCT
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.DCT
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalDCT(override val sparkTransformer: DCT) extends LocalTransformer[DCT] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[DCT].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
val vector: Vector = Vectors.dense(row)
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalDCT extends LocalModel[DCT] {
override def load(metadata: Metadata, data: Map[String, Any]): DCT = {
new DCT(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setInverse(metadata.paramMap("inverse").asInstanceOf[Boolean])
}
override implicit def getTransformer(transformer: DCT): LocalTransformer[DCT] = new LocalDCT(transformer)
}
示例12: LocalNormalizer
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalNormalizer(override val sparkTransformer: Normalizer) extends LocalTransformer[Normalizer] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[Normalizer].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val vector = r match {
case x: List[Any] => Vectors.dense(x.map(_.toString.toDouble).toArray)
case x: SparseVector => x
case x: DenseVector => x
case unknown =>
throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: ${unknown.getClass}")
}
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalNormalizer extends LocalModel[Normalizer] {
override def load(metadata: Metadata, data: Map[String, Any]): Normalizer = {
new Normalizer(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setP(metadata.paramMap("p").toString.toDouble)
}
override implicit def getTransformer(transformer: Normalizer): LocalTransformer[Normalizer] = new LocalNormalizer(transformer)
}
示例13: LocalDecisionTreeRegressionModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
import org.apache.spark.ml.tree.Node
class LocalDecisionTreeRegressionModel(override val sparkTransformer: DecisionTreeRegressionModel) extends LocalTransformer[DecisionTreeRegressionModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[DecisionTreeRegressionModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
method.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalDecisionTreeRegressionModel extends LocalModel[DecisionTreeRegressionModel] {
override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
createTree(metadata, data)
}
def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
val ctor = classOf[DecisionTreeRegressionModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int])
ctor.setAccessible(true)
val inst = ctor.newInstance(
metadata.uid,
DataUtils.createNode(0, metadata, data),
metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
)
inst
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
.set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
.set(inst.labelCol, metadata.paramMap("labelCol").toString)
.set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
.set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
.set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
.set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
.set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: DecisionTreeRegressionModel): LocalTransformer[DecisionTreeRegressionModel] = new LocalDecisionTreeRegressionModel(transformer)
}
示例14: LocalRandomForestRegressionModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel}
class LocalRandomForestRegressionModel(override val sparkTransformer: RandomForestRegressionModel) extends LocalTransformer[RandomForestRegressionModel] {
override def transform(localData: LocalData): LocalData = {
val cls = classOf[RandomForestRegressionModel]
val predict = cls.getMethod("predict", classOf[Vector])
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val predictionCol = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map{ vector =>
predict.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(predictionCol)
case None => localData
}
}
}
object LocalRandomForestRegressionModel extends LocalModel[RandomForestRegressionModel] {
override def load(metadata: Metadata, data: Map[String, Any]): RandomForestRegressionModel = {
val treesMetadata = metadata.paramMap("treesMetadata").asInstanceOf[Map[String, Any]]
val trees = treesMetadata map { treeKv =>
val treeMeta = treeKv._2.asInstanceOf[Map[String, Any]]
val meta = treeMeta("metadata").asInstanceOf[Metadata]
LocalDecisionTreeRegressionModel.createTree(
meta,
data(treeKv._1).asInstanceOf[Map[String, Any]]
)
}
val ctor = classOf[RandomForestRegressionModel].getDeclaredConstructor(classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Int])
ctor.setAccessible(true)
val inst = ctor
.newInstance(
metadata.uid,
trees.to[Array],
metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
)
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.subsamplingRate, metadata.paramMap("subsamplingRate").toString.toDouble)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: RandomForestRegressionModel): LocalTransformer[RandomForestRegressionModel] = new LocalRandomForestRegressionModel(transformer)
}
示例15: SparkVector
//设置package包名称以及导入依赖的类
package linalg.vector
import org.apache.spark.ml.linalg.{Vector, Vectors}
object SparkVector {
def main(args: Array[String]): Unit = {
// Create a dense vector (1.0, 0.0, 3.0).
val dVectorOne: Vector = Vectors.dense(1.0, 0.0, 2.0)
println("dVectorOne:" + dVectorOne)
// Sparse vector (1.0, 0.0, 2.0, 3.0)
// corresponding to nonzero entries.
val sVectorOne: Vector = Vectors.sparse(4, Array(0, 2,3), Array(1.0, 2.0, 3.0))
// Create a sparse vector (1.0, 0.0, 2.0, 2.0) by specifying its
// nonzero entries.
val sVectorTwo: Vector = Vectors.sparse(4, Seq((0, 1.0), (2, 2.0), (3, 3.0)))
println("sVectorOne:" + sVectorOne)
println("sVectorTwo:" + sVectorTwo)
val sVectorOneMax = sVectorOne.argmax
val sVectorOneNumNonZeros = sVectorOne.numNonzeros
val sVectorOneSize = sVectorOne.size
val sVectorOneArray = sVectorOne.toArray
println("sVectorOneMax:" + sVectorOneMax)
println("sVectorOneNumNonZeros:" + sVectorOneNumNonZeros)
println("sVectorOneSize:" + sVectorOneSize)
println("sVectorOneArray:" + sVectorOneArray)
val dVectorOneToSparse = dVectorOne.toSparse
println("dVectorOneToSparse:" + dVectorOneToSparse)
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:39,代码来源:SparkVector.scala