本文整理汇总了Scala中org.apache.spark.ml.linalg.Vector类的典型用法代码示例。如果您正苦于以下问题:Scala Vector类的具体用法?Scala Vector怎么用?Scala Vector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Vector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1:
//设置package包名称以及导入依赖的类
import java.io.{File, FileOutputStream}
import java.nio.channels.FileChannel
import java.nio.file.{Paths, StandardOpenOption}
import com.indix.ml2npy.Ml2NpyCSR
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.scalatest.FlatSpec
import sys.process._
val nosetestspath="nosetests "
val pathToTest = getClass.getResource("/python/Npytest.py").getPath+":"
"ML2NpyFile" should "Convert to CSR matrix" in {
val csrGen = new Ml2NpyCSR
val data: Seq[Vector] = Seq(
new SparseVector(3, Array(0), Array(0.1)),
new SparseVector(3, Array(1), Array(0.2)),
new SparseVector(3, Array(2), Array(0.3))
)
val labels = Seq(
new DenseVector(Array(0, 1)),
new DenseVector(Array(1, 0)),
new DenseVector(Array(1, 0))
)
data.zip(labels).foreach(tup => csrGen.addRecord(tup._1, tup._2))
val fos = new FileOutputStream(new File("/tmp/data.npz"))
fos.write(csrGen.getBytes)
fos.close()
val command=nosetestspath + pathToTest+"test_5"
val response=command.!
assert(response==0)
}
}
示例2: LocalLinearRegressionModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression.LinearRegressionModel
class LocalLinearRegressionModel(override val sparkTransformer: LinearRegressionModel) extends LocalTransformer[LinearRegressionModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val predict = classOf[LinearRegressionModel].getMethod("predict", classOf[Vector])
predict.setAccessible(true)
val newCol = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map { data =>
val vector = data.asInstanceOf[Vector]
predict.invoke(sparkTransformer,vector).asInstanceOf[Double]
})
localData.withColumn(newCol)
case None =>
localData
}
}
}
object LocalLinearRegressionModel extends LocalModel[LinearRegressionModel] {
override def load(metadata: Metadata, data: Map[String, Any]): LinearRegressionModel = {
val intercept = data("intercept").asInstanceOf[java.lang.Double]
val coeffitientsMap = data("coefficients").asInstanceOf[Map[String, Any]]
val coeffitients = DataUtils.constructVector(coeffitientsMap)
val ctor = classOf[LinearRegressionModel].getConstructor(classOf[String], classOf[Vector], classOf[Double])
val inst = ctor.newInstance(metadata.uid, coeffitients, intercept)
inst
.set(inst.featuresCol, metadata.paramMap("featuresCol").asInstanceOf[String])
.set(inst.predictionCol, metadata.paramMap("predictionCol").asInstanceOf[String])
.set(inst.labelCol, metadata.paramMap("labelCol").asInstanceOf[String])
.set(inst.elasticNetParam, metadata.paramMap("elasticNetParam").toString.toDouble)
// NOTE: introduced in spark 2.1 for reducing iterations for big datasets, e.g unnecessary for us
//.set(inst.aggregationDepth, metadata.paramMap("aggregationDepth").asInstanceOf[Int])
.set(inst.maxIter, metadata.paramMap("maxIter").asInstanceOf[Number].intValue())
.set(inst.regParam, metadata.paramMap("regParam").toString.toDouble)
.set(inst.solver, metadata.paramMap("solver").asInstanceOf[String])
.set(inst.tol, metadata.paramMap("tol").toString.toDouble)
.set(inst.standardization, metadata.paramMap("standardization").asInstanceOf[Boolean])
.set(inst.fitIntercept, metadata.paramMap("fitIntercept").asInstanceOf[Boolean])
}
override implicit def getTransformer(transformer: LinearRegressionModel): LocalTransformer[LinearRegressionModel] = new LocalLinearRegressionModel(transformer)
}
示例3: buildMultiPerpectronNetwork
//设置package包名称以及导入依赖的类
package com.sircamp.algorithms.neuralnetwork
import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}
import com.sircamp.Application
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.linalg.VectorUDT
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.types._
val file = new java.io.File(TEMP_FILE_PATH)
if( file.exists){
file.delete()
}
val writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file)))
var sb = new StringBuilder()
trainingData.collect().foreach(f=>{
var arr = f.features.toArray
var features = ""
for(i <- arr.indices){
features = features +((i+1)+":"+arr(i))+" "
}
writer.write((f.label+" "+features) + "\n")
})
writer.close()
}
def buildMultiPerpectronNetwork(trainingData:Dataset[Row], layers:Array[Int], maxIter:Int):MultilayerPerceptronClassificationModel = {
val trainer = new MultilayerPerceptronClassifier()
.setLayers(layers)
.setBlockSize(blockSize)
.setSeed(seed)
.setMaxIter(maxIter)
trainer.fit(trainingData)
}
}
示例4: LDA
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.clustering
import scala.collection.mutable.{HashMap => MHashMap}
import org.apache.commons.math3.random.Well19937c
import org.apache.spark.ml.Estimator
import org.apache.spark.ml
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import com.databricks.spark.sql.perf.mllib.{BenchmarkAlgorithm, MLBenchContext, TestFromTraining}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
object LDA extends BenchmarkAlgorithm with TestFromTraining {
// The LDA model is package private, no need to expose it.
override def trainingDataSet(ctx: MLBenchContext): DataFrame = {
import ctx.params._
val rdd = ctx.sqlContext.sparkContext.parallelize(
0L until numExamples,
numPartitions
)
val seed: Int = randomSeed
val docLength = ldaDocLength.get
val numVocab = ldaNumVocabulary.get
val data: RDD[(Long, Vector)] = rdd.mapPartitionsWithIndex { (idx, partition) =>
val rng = new Well19937c(seed ^ idx)
partition.map { docIndex =>
var currentSize = 0
val entries = MHashMap[Int, Int]()
while (currentSize < docLength) {
val index = rng.nextInt(numVocab)
entries(index) = entries.getOrElse(index, 0) + 1
currentSize += 1
}
val iter = entries.toSeq.map(v => (v._1, v._2.toDouble))
(docIndex, Vectors.sparse(numVocab, iter))
}
}
ctx.sqlContext.createDataFrame(data).toDF("docIndex", "features")
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.clustering.LDA()
.setK(k)
.setSeed(randomSeed.toLong)
.setMaxIter(maxIter)
.setOptimizer(optimizer)
}
// TODO(?) add a scoring method here.
}
示例5: LocalGaussianMixtureModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.clustering
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.clustering.GaussianMixtureModel
import org.apache.spark.ml.linalg.{Matrix, Vector}
import org.apache.spark.ml.stat.distribution.MultivariateGaussian
class LocalGaussianMixtureModel(override val sparkTransformer: GaussianMixtureModel) extends LocalTransformer[GaussianMixtureModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val predictMethod = classOf[GaussianMixtureModel].getMethod("predict", classOf[Vector])
predictMethod.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data map { feature =>
predictMethod.invoke(sparkTransformer, feature.asInstanceOf[Vector]).asInstanceOf[Int]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalGaussianMixtureModel extends LocalModel[GaussianMixtureModel] {
override def load(metadata: Metadata, data: Map[String, Any]): GaussianMixtureModel = {
val weights = data("weights").asInstanceOf[List[Double]].toArray
val mus = data("mus").asInstanceOf[List[Vector]].toArray
val sigmas = data("sigmas").asInstanceOf[List[Matrix]].toArray
val gaussians = mus zip sigmas map {
case (mu, sigma) => new MultivariateGaussian(mu, sigma)
}
val constructor = classOf[GaussianMixtureModel].getDeclaredConstructor(
classOf[String],
classOf[Array[Double]],
classOf[Array[MultivariateGaussian]]
)
constructor.setAccessible(true)
var inst = constructor.newInstance(metadata.uid, weights, gaussians)
inst = inst.set(inst.probabilityCol, metadata.paramMap("probabilityCol").asInstanceOf[String])
inst = inst.set(inst.featuresCol, metadata.paramMap("featuresCol").asInstanceOf[String])
inst = inst.set(inst.predictionCol, metadata.paramMap("predictionCol").asInstanceOf[String])
inst
}
override implicit def getTransformer(transformer: GaussianMixtureModel): LocalTransformer[GaussianMixtureModel] = new LocalGaussianMixtureModel(transformer)
}
示例6: LocalRandomForestClassificationModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel}
import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}
class LocalRandomForestClassificationModel(override val sparkTransformer: RandomForestClassificationModel) extends LocalTransformer[RandomForestClassificationModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val cls = classOf[RandomForestClassificationModel]
val rawPredictionCol = LocalDataColumn(sparkTransformer.getRawPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
val predictRaw = cls.getDeclaredMethod("predictRaw", classOf[Vector])
val res = predictRaw.invoke(sparkTransformer, vector).asInstanceOf[Vector]
res.toArray
})
val probabilityCol = LocalDataColumn(sparkTransformer.getProbabilityCol, rawPredictionCol.data.map(Vectors.dense).map { vector =>
val raw2probabilityInPlace = cls.getDeclaredMethod("raw2probabilityInPlace", classOf[Vector])
val res = raw2probabilityInPlace.invoke(sparkTransformer, vector.copy).asInstanceOf[Vector]
res.toArray
})
val predictionCol = LocalDataColumn(sparkTransformer.getPredictionCol, rawPredictionCol.data.map(Vectors.dense).map { vector =>
val raw2prediction = cls.getMethod("raw2prediction", classOf[Vector])
raw2prediction.invoke(sparkTransformer, vector.copy)
})
localData.withColumn(rawPredictionCol)
.withColumn(probabilityCol)
.withColumn(predictionCol)
case None => localData
}
}
}
object LocalRandomForestClassificationModel extends LocalModel[RandomForestClassificationModel] {
override def load(metadata: Metadata, data: Map[String, Any]): RandomForestClassificationModel = {
val treesMetadata = metadata.paramMap("treesMetadata").asInstanceOf[Map[String, Any]]
val trees = treesMetadata map { treeKv =>
val treeMeta = treeKv._2.asInstanceOf[Map[String, Any]]
val meta = treeMeta("metadata").asInstanceOf[Metadata]
LocalDecisionTreeClassificationModel.createTree(
meta,
data(treeKv._1).asInstanceOf[Map[String, Any]]
)
}
val ctor = classOf[RandomForestClassificationModel].getDeclaredConstructor(classOf[String], classOf[Array[DecisionTreeClassificationModel]], classOf[Int], classOf[Int])
ctor.setAccessible(true)
ctor
.newInstance(
metadata.uid,
trees.to[Array],
metadata.numFeatures.get.asInstanceOf[java.lang.Integer],
metadata.numClasses.get.asInstanceOf[java.lang.Integer]
)
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
.setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: RandomForestClassificationModel): LocalTransformer[RandomForestClassificationModel] = new LocalRandomForestClassificationModel(transformer)
}
示例7: LocalDecisionTreeClassificationModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.tree.Node
class LocalDecisionTreeClassificationModel(override val sparkTransformer: DecisionTreeClassificationModel) extends LocalTransformer[DecisionTreeClassificationModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[DecisionTreeClassificationModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
method.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalDecisionTreeClassificationModel extends LocalModel[DecisionTreeClassificationModel] {
override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
createTree(metadata, data)
}
def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
val ctor = classOf[DecisionTreeClassificationModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int], classOf[Int])
ctor.setAccessible(true)
val inst = ctor.newInstance(
metadata.uid,
DataUtils.createNode(0, metadata, data),
metadata.numFeatures.get.asInstanceOf[java.lang.Integer],
metadata.numClasses.get.asInstanceOf[java.lang.Integer]
)
inst
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
.setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String])
.setRawPredictionCol(metadata.paramMap("rawPredictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
.set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
.set(inst.labelCol, metadata.paramMap("labelCol").toString)
.set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
.set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
.set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
.set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
.set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: DecisionTreeClassificationModel): LocalTransformer[DecisionTreeClassificationModel] = new LocalDecisionTreeClassificationModel(transformer)
}
示例8: LocalMultilayerPerceptronClassificationModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalMultilayerPerceptronClassificationModel(override val sparkTransformer: MultilayerPerceptronClassificationModel) extends LocalTransformer[MultilayerPerceptronClassificationModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[MultilayerPerceptronClassificationModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data map { feature =>
method.invoke(sparkTransformer, feature.asInstanceOf[Vector]).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalMultilayerPerceptronClassificationModel extends LocalModel[MultilayerPerceptronClassificationModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MultilayerPerceptronClassificationModel = {
val constructor = classOf[MultilayerPerceptronClassificationModel].getDeclaredConstructor(classOf[String], classOf[Array[Int]], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, data("layers").asInstanceOf[List[Int]].to[Array], Vectors.dense(data("weights").asInstanceOf[Map[String, Any]]("values").asInstanceOf[List[Double]].toArray))
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MultilayerPerceptronClassificationModel): LocalTransformer[MultilayerPerceptronClassificationModel] = new LocalMultilayerPerceptronClassificationModel(transformer)
}
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:35,代码来源:LocalMultilayerPerceptronClassificationModel.scala
示例9: LocalPolynomialExpansion
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.PolynomialExpansion
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalPolynomialExpansion(override val sparkTransformer: PolynomialExpansion) extends LocalTransformer[PolynomialExpansion] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[PolynomialExpansion].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
val vector: Vector = Vectors.dense(row)
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalPolynomialExpansion extends LocalModel[PolynomialExpansion] {
override def load(metadata: Metadata, data: Map[String, Any]): PolynomialExpansion = {
new PolynomialExpansion(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setDegree(metadata.paramMap("degree").asInstanceOf[Number].intValue())
}
override implicit def getTransformer(transformer: PolynomialExpansion): LocalTransformer[PolynomialExpansion] = new LocalPolynomialExpansion(transformer)
}
示例10: LocalMaxAbsScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.MaxAbsScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalMaxAbsScalerModel(override val sparkTransformer: MaxAbsScalerModel) extends LocalTransformer[MaxAbsScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val maxAbsUnzero = Vectors.dense(sparkTransformer.maxAbs.toArray.map(x => if (x == 0) 1 else x))
val newData = column.data.map(r => {
val vec: List[Double] = r match {
case d: SparseVector => d.toDense.toArray.toList
case d: DenseVector => d.toArray.toList
case d: List[Any @unchecked] => d map (_.toString.toDouble)
case d => throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: $d")
}
val brz = DataUtils.asBreeze(vec.toArray) / DataUtils.asBreeze(maxAbsUnzero.toArray)
DataUtils.fromBreeze(brz)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalMaxAbsScalerModel extends LocalModel[MaxAbsScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MaxAbsScalerModel = {
val maxAbsList = data("maxAbs").
asInstanceOf[Map[String, Any]].
getOrElse("values", List()).
asInstanceOf[List[Double]].toArray
val maxAbs = new DenseVector(maxAbsList)
val constructor = classOf[MaxAbsScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, maxAbs)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MaxAbsScalerModel): LocalTransformer[MaxAbsScalerModel] = new LocalMaxAbsScalerModel(transformer)
}
示例11: LocalDCT
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.DCT
import org.apache.spark.ml.linalg.{Vector, Vectors}
class LocalDCT(override val sparkTransformer: DCT) extends LocalTransformer[DCT] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[DCT].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
val vector: Vector = Vectors.dense(row)
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalDCT extends LocalModel[DCT] {
override def load(metadata: Metadata, data: Map[String, Any]): DCT = {
new DCT(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setInverse(metadata.paramMap("inverse").asInstanceOf[Boolean])
}
override implicit def getTransformer(transformer: DCT): LocalTransformer[DCT] = new LocalDCT(transformer)
}
示例12: LocalStandardScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.StandardScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.apache.spark.mllib.feature.{StandardScalerModel => OldStandardScalerModel}
import org.apache.spark.mllib.linalg.{DenseVector => OldDenseVector, SparseVector => OldSparseVector, Vector => OldVector, Vectors => OldVectors}
class LocalStandardScalerModel(override val sparkTransformer: StandardScalerModel) extends LocalTransformer[StandardScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val scaler = new OldStandardScalerModel(
OldVectors.fromML(sparkTransformer.std.asInstanceOf[Vector]),
OldVectors.fromML(sparkTransformer.mean.asInstanceOf[Vector]),
sparkTransformer.getWithStd,
sparkTransformer.getWithMean
)
val newData = column.data.map(r => {
val vec: OldVector = r match {
case d: Array[Double @unchecked] => OldVectors.dense(d)
case d: List[Any @unchecked] => OldVectors.dense(d.map(_.toString.toDouble).toArray)
case d: SparseVector => OldVectors.sparse(d.size, d.indices, d.values)
case d: DenseVector => OldVectors.dense(d.toArray)
case d: OldDenseVector => d
case d: OldSparseVector => d.toDense
case d => throw new IllegalArgumentException(s"Unknown data type for LocalStandardScaler: $d")
}
val result = scaler.transform(vec)
result.toArray
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalStandardScalerModel extends LocalModel[StandardScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): StandardScalerModel = {
val constructor = classOf[StandardScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector], classOf[Vector])
constructor.setAccessible(true)
val stdVals = data("std").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val std = new DenseVector(stdVals)
val meanVals = data("mean").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val mean = new DenseVector(meanVals)
constructor
.newInstance(metadata.uid, std, mean)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: StandardScalerModel): LocalTransformer[StandardScalerModel] = new LocalStandardScalerModel(transformer)
}
示例13: LocalNormalizer
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalNormalizer(override val sparkTransformer: Normalizer) extends LocalTransformer[Normalizer] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[Normalizer].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val vector = r match {
case x: List[Any] => Vectors.dense(x.map(_.toString.toDouble).toArray)
case x: SparseVector => x
case x: DenseVector => x
case unknown =>
throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: ${unknown.getClass}")
}
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalNormalizer extends LocalModel[Normalizer] {
override def load(metadata: Metadata, data: Map[String, Any]): Normalizer = {
new Normalizer(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setP(metadata.paramMap("p").toString.toDouble)
}
override implicit def getTransformer(transformer: Normalizer): LocalTransformer[Normalizer] = new LocalNormalizer(transformer)
}
示例14: LocalDecisionTreeRegressionModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
import org.apache.spark.ml.tree.Node
class LocalDecisionTreeRegressionModel(override val sparkTransformer: DecisionTreeRegressionModel) extends LocalTransformer[DecisionTreeRegressionModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val method = classOf[DecisionTreeRegressionModel].getMethod("predict", classOf[Vector])
method.setAccessible(true)
val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
method.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalDecisionTreeRegressionModel extends LocalModel[DecisionTreeRegressionModel] {
override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
createTree(metadata, data)
}
def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
val ctor = classOf[DecisionTreeRegressionModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int])
ctor.setAccessible(true)
val inst = ctor.newInstance(
metadata.uid,
DataUtils.createNode(0, metadata, data),
metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
)
inst
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
.set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
.set(inst.labelCol, metadata.paramMap("labelCol").toString)
.set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
.set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
.set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
.set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
.set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: DecisionTreeRegressionModel): LocalTransformer[DecisionTreeRegressionModel] = new LocalDecisionTreeRegressionModel(transformer)
}
示例15: LocalRandomForestRegressionModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel}
class LocalRandomForestRegressionModel(override val sparkTransformer: RandomForestRegressionModel) extends LocalTransformer[RandomForestRegressionModel] {
override def transform(localData: LocalData): LocalData = {
val cls = classOf[RandomForestRegressionModel]
val predict = cls.getMethod("predict", classOf[Vector])
localData.column(sparkTransformer.getFeaturesCol) match {
case Some(column) =>
val predictionCol = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map{ vector =>
predict.invoke(sparkTransformer, vector).asInstanceOf[Double]
})
localData.withColumn(predictionCol)
case None => localData
}
}
}
object LocalRandomForestRegressionModel extends LocalModel[RandomForestRegressionModel] {
override def load(metadata: Metadata, data: Map[String, Any]): RandomForestRegressionModel = {
val treesMetadata = metadata.paramMap("treesMetadata").asInstanceOf[Map[String, Any]]
val trees = treesMetadata map { treeKv =>
val treeMeta = treeKv._2.asInstanceOf[Map[String, Any]]
val meta = treeMeta("metadata").asInstanceOf[Metadata]
LocalDecisionTreeRegressionModel.createTree(
meta,
data(treeKv._1).asInstanceOf[Map[String, Any]]
)
}
val ctor = classOf[RandomForestRegressionModel].getDeclaredConstructor(classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Int])
ctor.setAccessible(true)
val inst = ctor
.newInstance(
metadata.uid,
trees.to[Array],
metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
)
.setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
.setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
inst
.set(inst.seed, metadata.paramMap("seed").toString.toLong)
.set(inst.subsamplingRate, metadata.paramMap("subsamplingRate").toString.toDouble)
.set(inst.impurity, metadata.paramMap("impurity").toString)
}
override implicit def getTransformer(transformer: RandomForestRegressionModel): LocalTransformer[RandomForestRegressionModel] = new LocalRandomForestRegressionModel(transformer)
}