当前位置: 首页>>代码示例>>Scala>>正文


Scala Vector类代码示例

本文整理汇总了Scala中org.apache.spark.ml.linalg.Vector的典型用法代码示例。如果您正苦于以下问题:Scala Vector类的具体用法?Scala Vector怎么用?Scala Vector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Vector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1:

//设置package包名称以及导入依赖的类
import java.io.{File, FileOutputStream}
import java.nio.channels.FileChannel
import java.nio.file.{Paths, StandardOpenOption}

import com.indix.ml2npy.Ml2NpyCSR
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.scalatest.FlatSpec
import sys.process._


    val nosetestspath="nosetests "
    val pathToTest = getClass.getResource("/python/Npytest.py").getPath+":"

    "ML2NpyFile" should "Convert to CSR matrix" in {

      val csrGen = new Ml2NpyCSR
      val data: Seq[Vector] = Seq(
        new SparseVector(3, Array(0), Array(0.1)),
        new SparseVector(3, Array(1), Array(0.2)),
        new SparseVector(3, Array(2), Array(0.3))
      )
      val labels = Seq(
        new DenseVector(Array(0, 1)),
        new DenseVector(Array(1, 0)),
        new DenseVector(Array(1, 0))
      )
      data.zip(labels).foreach(tup => csrGen.addRecord(tup._1, tup._2))
      val fos = new FileOutputStream(new File("/tmp/data.npz"))
      fos.write(csrGen.getBytes)
      fos.close()

      val command=nosetestspath + pathToTest+"test_5"
      val response=command.!
      assert(response==0)
    }
} 
开发者ID:indix,项目名称:ml2npy,代码行数:37,代码来源:Ml2NpyCSRSpec.scala

示例2: LocalLinearRegressionModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression.LinearRegressionModel

class LocalLinearRegressionModel(override val sparkTransformer: LinearRegressionModel) extends LocalTransformer[LinearRegressionModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val predict = classOf[LinearRegressionModel].getMethod("predict", classOf[Vector])
        predict.setAccessible(true)
        val newCol = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map { data =>
          val vector = data.asInstanceOf[Vector]
          predict.invoke(sparkTransformer,vector).asInstanceOf[Double]
        })
        localData.withColumn(newCol)
      case None =>
        localData
    }
  }
}

object LocalLinearRegressionModel extends LocalModel[LinearRegressionModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): LinearRegressionModel = {
    val intercept = data("intercept").asInstanceOf[java.lang.Double]
    val coeffitientsMap = data("coefficients").asInstanceOf[Map[String, Any]]
    val coeffitients = DataUtils.constructVector(coeffitientsMap)

    val ctor = classOf[LinearRegressionModel].getConstructor(classOf[String], classOf[Vector], classOf[Double])
    val inst = ctor.newInstance(metadata.uid, coeffitients, intercept)
    inst
      .set(inst.featuresCol, metadata.paramMap("featuresCol").asInstanceOf[String])
      .set(inst.predictionCol, metadata.paramMap("predictionCol").asInstanceOf[String])
      .set(inst.labelCol, metadata.paramMap("labelCol").asInstanceOf[String])
      .set(inst.elasticNetParam, metadata.paramMap("elasticNetParam").toString.toDouble)
      // NOTE: introduced in spark 2.1 for reducing iterations for big datasets, e.g unnecessary for us
      //.set(inst.aggregationDepth, metadata.paramMap("aggregationDepth").asInstanceOf[Int])
      .set(inst.maxIter, metadata.paramMap("maxIter").asInstanceOf[Number].intValue())
      .set(inst.regParam, metadata.paramMap("regParam").toString.toDouble)
      .set(inst.solver, metadata.paramMap("solver").asInstanceOf[String])
      .set(inst.tol, metadata.paramMap("tol").toString.toDouble)
      .set(inst.standardization, metadata.paramMap("standardization").asInstanceOf[Boolean])
      .set(inst.fitIntercept, metadata.paramMap("fitIntercept").asInstanceOf[Boolean])
  }

  override implicit def getTransformer(transformer: LinearRegressionModel): LocalTransformer[LinearRegressionModel] = new LocalLinearRegressionModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:49,代码来源:LocalLinearRegressionModel.scala

示例3: buildMultiPerpectronNetwork

//设置package包名称以及导入依赖的类
package com.sircamp.algorithms.neuralnetwork

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}

import com.sircamp.Application
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.linalg.VectorUDT
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.types._


    val file = new java.io.File(TEMP_FILE_PATH)
    if( file.exists){
      file.delete()
    }

    val writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file)))


    var sb = new StringBuilder()
    trainingData.collect().foreach(f=>{
      var arr = f.features.toArray
      var features = ""
      for(i <- arr.indices){
        features = features +((i+1)+":"+arr(i))+" "
      }
      writer.write((f.label+" "+features) + "\n")
    })
    writer.close()
  }

  def buildMultiPerpectronNetwork(trainingData:Dataset[Row], layers:Array[Int], maxIter:Int):MultilayerPerceptronClassificationModel = {

    val trainer = new MultilayerPerceptronClassifier()
      .setLayers(layers)
      .setBlockSize(blockSize)
      .setSeed(seed)
      .setMaxIter(maxIter)

    trainer.fit(trainingData)

  }
} 
开发者ID:sirCamp,项目名称:mushrooms-ml-classfier-scala-spark,代码行数:47,代码来源:NeuralNetworkBuilder.scala

示例4: LDA

//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.clustering

import scala.collection.mutable.{HashMap => MHashMap}

import org.apache.commons.math3.random.Well19937c

import org.apache.spark.ml.Estimator
import org.apache.spark.ml
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.ml.linalg.{Vector, Vectors}

import com.databricks.spark.sql.perf.mllib.{BenchmarkAlgorithm, MLBenchContext, TestFromTraining}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._


object LDA extends BenchmarkAlgorithm with TestFromTraining {
  // The LDA model is package private, no need to expose it.

  override def trainingDataSet(ctx: MLBenchContext): DataFrame = {
    import ctx.params._
    val rdd = ctx.sqlContext.sparkContext.parallelize(
      0L until numExamples,
      numPartitions
    )
    val seed: Int = randomSeed
    val docLength = ldaDocLength.get
    val numVocab = ldaNumVocabulary.get
    val data: RDD[(Long, Vector)] = rdd.mapPartitionsWithIndex { (idx, partition) =>
      val rng = new Well19937c(seed ^ idx)
      partition.map { docIndex =>
        var currentSize = 0
        val entries = MHashMap[Int, Int]()
        while (currentSize < docLength) {
          val index = rng.nextInt(numVocab)
          entries(index) = entries.getOrElse(index, 0) + 1
          currentSize += 1
        }

        val iter = entries.toSeq.map(v => (v._1, v._2.toDouble))
        (docIndex, Vectors.sparse(numVocab, iter))
      }
    }
    ctx.sqlContext.createDataFrame(data).toDF("docIndex", "features")
  }

  override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
    import ctx.params._
    new ml.clustering.LDA()
      .setK(k)
      .setSeed(randomSeed.toLong)
      .setMaxIter(maxIter)
      .setOptimizer(optimizer)
  }

  // TODO(?) add a scoring method here.
} 
开发者ID:summerDG,项目名称:spark-sql-perf,代码行数:58,代码来源:LDA.scala

示例5: LocalGaussianMixtureModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.clustering

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.clustering.GaussianMixtureModel
import org.apache.spark.ml.linalg.{Matrix, Vector}
import org.apache.spark.ml.stat.distribution.MultivariateGaussian

class LocalGaussianMixtureModel(override val sparkTransformer: GaussianMixtureModel) extends LocalTransformer[GaussianMixtureModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val predictMethod = classOf[GaussianMixtureModel].getMethod("predict", classOf[Vector])
        predictMethod.setAccessible(true)
        val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data map { feature =>
          predictMethod.invoke(sparkTransformer, feature.asInstanceOf[Vector]).asInstanceOf[Int]
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalGaussianMixtureModel extends LocalModel[GaussianMixtureModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): GaussianMixtureModel = {
    val weights = data("weights").asInstanceOf[List[Double]].toArray
    val mus = data("mus").asInstanceOf[List[Vector]].toArray
    val sigmas = data("sigmas").asInstanceOf[List[Matrix]].toArray
    val gaussians = mus zip sigmas map {
      case (mu, sigma) => new MultivariateGaussian(mu, sigma)
    }

    val constructor = classOf[GaussianMixtureModel].getDeclaredConstructor(
        classOf[String],
        classOf[Array[Double]],
        classOf[Array[MultivariateGaussian]]
    )
    constructor.setAccessible(true)
    var inst = constructor.newInstance(metadata.uid, weights, gaussians)
    inst = inst.set(inst.probabilityCol, metadata.paramMap("probabilityCol").asInstanceOf[String])
    inst = inst.set(inst.featuresCol, metadata.paramMap("featuresCol").asInstanceOf[String])
    inst = inst.set(inst.predictionCol, metadata.paramMap("predictionCol").asInstanceOf[String])
    inst
  }


  override implicit def getTransformer(transformer: GaussianMixtureModel): LocalTransformer[GaussianMixtureModel] = new LocalGaussianMixtureModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:48,代码来源:LocalGaussianMixtureModel.scala

示例6: LocalRandomForestClassificationModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel}
import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}

class LocalRandomForestClassificationModel(override val sparkTransformer: RandomForestClassificationModel) extends LocalTransformer[RandomForestClassificationModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val cls = classOf[RandomForestClassificationModel]
        val rawPredictionCol = LocalDataColumn(sparkTransformer.getRawPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
          val predictRaw = cls.getDeclaredMethod("predictRaw", classOf[Vector])
          val res = predictRaw.invoke(sparkTransformer, vector).asInstanceOf[Vector]
          res.toArray
        })
        val probabilityCol = LocalDataColumn(sparkTransformer.getProbabilityCol, rawPredictionCol.data.map(Vectors.dense).map { vector =>
          val raw2probabilityInPlace = cls.getDeclaredMethod("raw2probabilityInPlace", classOf[Vector])
          val res = raw2probabilityInPlace.invoke(sparkTransformer, vector.copy).asInstanceOf[Vector]
          res.toArray
        })
        val predictionCol = LocalDataColumn(sparkTransformer.getPredictionCol, rawPredictionCol.data.map(Vectors.dense).map { vector =>
          val raw2prediction = cls.getMethod("raw2prediction", classOf[Vector])
          raw2prediction.invoke(sparkTransformer, vector.copy)
        })
        localData.withColumn(rawPredictionCol)
          .withColumn(probabilityCol)
          .withColumn(predictionCol)
      case None => localData
    }
  }
}

object LocalRandomForestClassificationModel extends LocalModel[RandomForestClassificationModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): RandomForestClassificationModel = {
    val treesMetadata = metadata.paramMap("treesMetadata").asInstanceOf[Map[String, Any]]
    val trees = treesMetadata map { treeKv =>
      val treeMeta = treeKv._2.asInstanceOf[Map[String, Any]]
      val meta = treeMeta("metadata").asInstanceOf[Metadata]
      LocalDecisionTreeClassificationModel.createTree(
        meta,
        data(treeKv._1).asInstanceOf[Map[String, Any]]
      )
    }
    val ctor = classOf[RandomForestClassificationModel].getDeclaredConstructor(classOf[String], classOf[Array[DecisionTreeClassificationModel]], classOf[Int], classOf[Int])
    ctor.setAccessible(true)
    ctor
      .newInstance(
        metadata.uid,
        trees.to[Array],
        metadata.numFeatures.get.asInstanceOf[java.lang.Integer],
        metadata.numClasses.get.asInstanceOf[java.lang.Integer]
      )
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
      .setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: RandomForestClassificationModel): LocalTransformer[RandomForestClassificationModel] = new LocalRandomForestClassificationModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:61,代码来源:LocalRandomForestClassificationModel.scala

示例7: LocalDecisionTreeClassificationModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.tree.Node

class LocalDecisionTreeClassificationModel(override val sparkTransformer: DecisionTreeClassificationModel) extends LocalTransformer[DecisionTreeClassificationModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val method = classOf[DecisionTreeClassificationModel].getMethod("predict", classOf[Vector])
        method.setAccessible(true)
        val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
          method.invoke(sparkTransformer, vector).asInstanceOf[Double]
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalDecisionTreeClassificationModel extends LocalModel[DecisionTreeClassificationModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
    createTree(metadata, data)
  }

  def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeClassificationModel = {
    val ctor = classOf[DecisionTreeClassificationModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int], classOf[Int])
    ctor.setAccessible(true)
    val inst = ctor.newInstance(
      metadata.uid,
      DataUtils.createNode(0, metadata, data),
      metadata.numFeatures.get.asInstanceOf[java.lang.Integer],
      metadata.numClasses.get.asInstanceOf[java.lang.Integer]
    )
    inst
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
      .setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String])
      .setRawPredictionCol(metadata.paramMap("rawPredictionCol").asInstanceOf[String])
    inst
      .set(inst.seed, metadata.paramMap("seed").toString.toLong)
      .set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
      .set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
      .set(inst.labelCol, metadata.paramMap("labelCol").toString)
      .set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
      .set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
      .set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
      .set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
      .set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
      .set(inst.impurity, metadata.paramMap("impurity").toString)
  }

  override implicit def getTransformer(transformer: DecisionTreeClassificationModel): LocalTransformer[DecisionTreeClassificationModel] = new LocalDecisionTreeClassificationModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:57,代码来源:LocalDecisionTreeClassificationModel.scala

示例8: LocalMultilayerPerceptronClassificationModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.classification

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}

class LocalMultilayerPerceptronClassificationModel(override val sparkTransformer: MultilayerPerceptronClassificationModel) extends LocalTransformer[MultilayerPerceptronClassificationModel] {

  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val method = classOf[MultilayerPerceptronClassificationModel].getMethod("predict", classOf[Vector])
        method.setAccessible(true)
        val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data map { feature =>
          method.invoke(sparkTransformer, feature.asInstanceOf[Vector]).asInstanceOf[Double]
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalMultilayerPerceptronClassificationModel extends LocalModel[MultilayerPerceptronClassificationModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): MultilayerPerceptronClassificationModel = {
    val constructor = classOf[MultilayerPerceptronClassificationModel].getDeclaredConstructor(classOf[String], classOf[Array[Int]], classOf[Vector])
    constructor.setAccessible(true)
    constructor
      .newInstance(metadata.uid, data("layers").asInstanceOf[List[Int]].to[Array], Vectors.dense(data("weights").asInstanceOf[Map[String, Any]]("values").asInstanceOf[List[Double]].toArray))
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: MultilayerPerceptronClassificationModel): LocalTransformer[MultilayerPerceptronClassificationModel] = new LocalMultilayerPerceptronClassificationModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:35,代码来源:LocalMultilayerPerceptronClassificationModel.scala

示例9: LocalPolynomialExpansion

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.PolynomialExpansion
import org.apache.spark.ml.linalg.{Vector, Vectors}

class LocalPolynomialExpansion(override val sparkTransformer: PolynomialExpansion) extends LocalTransformer[PolynomialExpansion] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val method = classOf[PolynomialExpansion].getMethod("createTransformFunc")
        val newData = column.data.map(r => {
          val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
          val vector: Vector = Vectors.dense(row)
          method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
        })
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalPolynomialExpansion extends LocalModel[PolynomialExpansion] {
  override def load(metadata: Metadata, data: Map[String, Any]): PolynomialExpansion = {
    new PolynomialExpansion(metadata.uid)
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .setDegree(metadata.paramMap("degree").asInstanceOf[Number].intValue())
  }

  override implicit def getTransformer(transformer: PolynomialExpansion): LocalTransformer[PolynomialExpansion] = new LocalPolynomialExpansion(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:33,代码来源:LocalPolynomialExpansion.scala

示例10: LocalMaxAbsScalerModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.MaxAbsScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}

class LocalMaxAbsScalerModel(override val sparkTransformer: MaxAbsScalerModel) extends LocalTransformer[MaxAbsScalerModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val maxAbsUnzero = Vectors.dense(sparkTransformer.maxAbs.toArray.map(x => if (x == 0) 1 else x))
        val newData = column.data.map(r => {
          val vec: List[Double] = r match {
            case d: SparseVector => d.toDense.toArray.toList
            case d: DenseVector => d.toArray.toList
            case d: List[Any @unchecked] => d map (_.toString.toDouble)
            case d => throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: $d")
          }
          val brz = DataUtils.asBreeze(vec.toArray) / DataUtils.asBreeze(maxAbsUnzero.toArray)
          DataUtils.fromBreeze(brz)
        })
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalMaxAbsScalerModel extends LocalModel[MaxAbsScalerModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): MaxAbsScalerModel = {
    val maxAbsList = data("maxAbs").
      asInstanceOf[Map[String, Any]].
      getOrElse("values", List()).
      asInstanceOf[List[Double]].toArray
    val maxAbs = new DenseVector(maxAbsList)

    val constructor = classOf[MaxAbsScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector])
    constructor.setAccessible(true)
    constructor
      .newInstance(metadata.uid, maxAbs)
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: MaxAbsScalerModel): LocalTransformer[MaxAbsScalerModel] = new LocalMaxAbsScalerModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:46,代码来源:LocalMaxAbsScalerModel.scala

示例11: LocalDCT

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.DCT
import org.apache.spark.ml.linalg.{Vector, Vectors}

class LocalDCT(override val sparkTransformer: DCT) extends LocalTransformer[DCT] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val method = classOf[DCT].getMethod("createTransformFunc")
        val newData = column.data.map(r => {
          val row = r.asInstanceOf[List[Any]].map(_.toString.toDouble).toArray
          val vector: Vector = Vectors.dense(row)
          method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
        })
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalDCT extends LocalModel[DCT] {
  override def load(metadata: Metadata, data: Map[String, Any]): DCT = {
    new DCT(metadata.uid)
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .setInverse(metadata.paramMap("inverse").asInstanceOf[Boolean])
  }

  override implicit def getTransformer(transformer: DCT): LocalTransformer[DCT] = new LocalDCT(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:33,代码来源:LocalDCT.scala

示例12: LocalStandardScalerModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.StandardScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.apache.spark.mllib.feature.{StandardScalerModel => OldStandardScalerModel}
import org.apache.spark.mllib.linalg.{DenseVector => OldDenseVector, SparseVector => OldSparseVector, Vector => OldVector, Vectors => OldVectors}

class LocalStandardScalerModel(override val sparkTransformer: StandardScalerModel) extends LocalTransformer[StandardScalerModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val scaler = new OldStandardScalerModel(
          OldVectors.fromML(sparkTransformer.std.asInstanceOf[Vector]),
          OldVectors.fromML(sparkTransformer.mean.asInstanceOf[Vector]),
          sparkTransformer.getWithStd,
          sparkTransformer.getWithMean
        )

        val newData = column.data.map(r => {
          val vec: OldVector = r match {
            case d: Array[Double @unchecked] => OldVectors.dense(d)
            case d: List[Any @unchecked] => OldVectors.dense(d.map(_.toString.toDouble).toArray)
            case d: SparseVector => OldVectors.sparse(d.size, d.indices, d.values)
            case d: DenseVector => OldVectors.dense(d.toArray)
            case d: OldDenseVector => d
            case d: OldSparseVector => d.toDense
            case d => throw new IllegalArgumentException(s"Unknown data type for LocalStandardScaler: $d")
          }
          val result = scaler.transform(vec)
          result.toArray
        })
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalStandardScalerModel extends LocalModel[StandardScalerModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): StandardScalerModel = {
    val constructor = classOf[StandardScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector], classOf[Vector])
    constructor.setAccessible(true)

    val stdVals = data("std").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
    val std = new DenseVector(stdVals)

    val meanVals = data("mean").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
    val mean = new DenseVector(meanVals)
    constructor
      .newInstance(metadata.uid, std, mean)
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: StandardScalerModel): LocalTransformer[StandardScalerModel] = new LocalStandardScalerModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:57,代码来源:LocalStandardScalerModel.scala

示例13: LocalNormalizer

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}

class LocalNormalizer(override val sparkTransformer: Normalizer) extends LocalTransformer[Normalizer] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val method = classOf[Normalizer].getMethod("createTransformFunc")
        val newData = column.data.map(r => {
          val vector = r match {
            case x: List[Any] => Vectors.dense(x.map(_.toString.toDouble).toArray)
            case x: SparseVector => x
            case x: DenseVector => x
            case unknown =>
              throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: ${unknown.getClass}")
          }
          method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
        })
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalNormalizer extends LocalModel[Normalizer] {
  override def load(metadata: Metadata, data: Map[String, Any]): Normalizer = {
    new Normalizer(metadata.uid)
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .setP(metadata.paramMap("p").toString.toDouble)
  }

  override implicit def getTransformer(transformer: Normalizer): LocalTransformer[Normalizer] = new LocalNormalizer(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:38,代码来源:LocalNormalizer.scala

示例14: LocalDecisionTreeRegressionModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.DecisionTreeRegressionModel
import org.apache.spark.ml.tree.Node

class LocalDecisionTreeRegressionModel(override val sparkTransformer: DecisionTreeRegressionModel) extends LocalTransformer[DecisionTreeRegressionModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val method = classOf[DecisionTreeRegressionModel].getMethod("predict", classOf[Vector])
        method.setAccessible(true)
        val newColumn = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map { vector =>
          method.invoke(sparkTransformer, vector).asInstanceOf[Double]
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalDecisionTreeRegressionModel extends LocalModel[DecisionTreeRegressionModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
    createTree(metadata, data)
  }

  def createTree(metadata: Metadata, data: Map[String, Any]): DecisionTreeRegressionModel = {
    val ctor = classOf[DecisionTreeRegressionModel].getDeclaredConstructor(classOf[String], classOf[Node], classOf[Int])
    ctor.setAccessible(true)
    val inst = ctor.newInstance(
      metadata.uid,
      DataUtils.createNode(0, metadata, data),
      metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
    )
    inst
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
    inst
      .set(inst.seed, metadata.paramMap("seed").toString.toLong)
      .set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean)
      .set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt)
      .set(inst.labelCol, metadata.paramMap("labelCol").toString)
      .set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble)
      .set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt)
      .set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt)
      .set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt)
      .set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt)
      .set(inst.impurity, metadata.paramMap("impurity").toString)
  }

  override implicit def getTransformer(transformer: DecisionTreeRegressionModel): LocalTransformer[DecisionTreeRegressionModel] = new LocalDecisionTreeRegressionModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:54,代码来源:LocalDecisionTreeRegressionModel.scala

示例15: LocalRandomForestRegressionModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.regression

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel}


class LocalRandomForestRegressionModel(override val sparkTransformer: RandomForestRegressionModel) extends LocalTransformer[RandomForestRegressionModel] {
  override def transform(localData: LocalData): LocalData = {
    val cls = classOf[RandomForestRegressionModel]
    val predict = cls.getMethod("predict", classOf[Vector])
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val predictionCol = LocalDataColumn(sparkTransformer.getPredictionCol, column.data.map(f => Vectors.dense(f.asInstanceOf[Array[Double]])).map{ vector =>
          predict.invoke(sparkTransformer, vector).asInstanceOf[Double]
        })
        localData.withColumn(predictionCol)
      case None => localData
    }
  }
}

object LocalRandomForestRegressionModel extends LocalModel[RandomForestRegressionModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): RandomForestRegressionModel = {
    val treesMetadata = metadata.paramMap("treesMetadata").asInstanceOf[Map[String, Any]]
    val trees = treesMetadata map { treeKv =>
      val treeMeta = treeKv._2.asInstanceOf[Map[String, Any]]
      val meta = treeMeta("metadata").asInstanceOf[Metadata]
      LocalDecisionTreeRegressionModel.createTree(
        meta,
        data(treeKv._1).asInstanceOf[Map[String, Any]]
      )
    }
    val ctor = classOf[RandomForestRegressionModel].getDeclaredConstructor(classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Int])
    ctor.setAccessible(true)
    val inst = ctor
      .newInstance(
        metadata.uid,
        trees.to[Array],
        metadata.numFeatures.get.asInstanceOf[java.lang.Integer]
      )
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])

    inst
      .set(inst.seed, metadata.paramMap("seed").toString.toLong)
      .set(inst.subsamplingRate, metadata.paramMap("subsamplingRate").toString.toDouble)
      .set(inst.impurity, metadata.paramMap("impurity").toString)
  }

  override implicit def getTransformer(transformer: RandomForestRegressionModel): LocalTransformer[RandomForestRegressionModel] = new LocalRandomForestRegressionModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:53,代码来源:LocalRandomForestRegressionModel.scala


注:本文中的org.apache.spark.ml.linalg.Vector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。