本文整理汇总了Scala中org.apache.spark.ml.linalg.SparseVector类的典型用法代码示例。如果您正苦于以下问题:Scala SparseVector类的具体用法?Scala SparseVector怎么用?Scala SparseVector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SparseVector类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1:
//设置package包名称以及导入依赖的类
import java.io.{File, FileOutputStream}
import java.nio.channels.FileChannel
import java.nio.file.{Paths, StandardOpenOption}
import com.indix.ml2npy.Ml2NpyCSR
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.scalatest.FlatSpec
import sys.process._
val nosetestspath="nosetests "
val pathToTest = getClass.getResource("/python/Npytest.py").getPath+":"
"ML2NpyFile" should "Convert to CSR matrix" in {
val csrGen = new Ml2NpyCSR
val data: Seq[Vector] = Seq(
new SparseVector(3, Array(0), Array(0.1)),
new SparseVector(3, Array(1), Array(0.2)),
new SparseVector(3, Array(2), Array(0.3))
)
val labels = Seq(
new DenseVector(Array(0, 1)),
new DenseVector(Array(1, 0)),
new DenseVector(Array(1, 0))
)
data.zip(labels).foreach(tup => csrGen.addRecord(tup._1, tup._2))
val fos = new FileOutputStream(new File("/tmp/data.npz"))
fos.write(csrGen.getBytes)
fos.close()
val command=nosetestspath + pathToTest+"test_5"
val response=command.!
assert(response==0)
}
}
示例2: LocalMaxAbsScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.MaxAbsScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalMaxAbsScalerModel(override val sparkTransformer: MaxAbsScalerModel) extends LocalTransformer[MaxAbsScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val maxAbsUnzero = Vectors.dense(sparkTransformer.maxAbs.toArray.map(x => if (x == 0) 1 else x))
val newData = column.data.map(r => {
val vec: List[Double] = r match {
case d: SparseVector => d.toDense.toArray.toList
case d: DenseVector => d.toArray.toList
case d: List[Any @unchecked] => d map (_.toString.toDouble)
case d => throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: $d")
}
val brz = DataUtils.asBreeze(vec.toArray) / DataUtils.asBreeze(maxAbsUnzero.toArray)
DataUtils.fromBreeze(brz)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalMaxAbsScalerModel extends LocalModel[MaxAbsScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MaxAbsScalerModel = {
val maxAbsList = data("maxAbs").
asInstanceOf[Map[String, Any]].
getOrElse("values", List()).
asInstanceOf[List[Double]].toArray
val maxAbs = new DenseVector(maxAbsList)
val constructor = classOf[MaxAbsScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, maxAbs)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MaxAbsScalerModel): LocalTransformer[MaxAbsScalerModel] = new LocalMaxAbsScalerModel(transformer)
}
示例3: LocalStandardScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.StandardScalerModel
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector}
import org.apache.spark.mllib.feature.{StandardScalerModel => OldStandardScalerModel}
import org.apache.spark.mllib.linalg.{DenseVector => OldDenseVector, SparseVector => OldSparseVector, Vector => OldVector, Vectors => OldVectors}
class LocalStandardScalerModel(override val sparkTransformer: StandardScalerModel) extends LocalTransformer[StandardScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val scaler = new OldStandardScalerModel(
OldVectors.fromML(sparkTransformer.std.asInstanceOf[Vector]),
OldVectors.fromML(sparkTransformer.mean.asInstanceOf[Vector]),
sparkTransformer.getWithStd,
sparkTransformer.getWithMean
)
val newData = column.data.map(r => {
val vec: OldVector = r match {
case d: Array[Double @unchecked] => OldVectors.dense(d)
case d: List[Any @unchecked] => OldVectors.dense(d.map(_.toString.toDouble).toArray)
case d: SparseVector => OldVectors.sparse(d.size, d.indices, d.values)
case d: DenseVector => OldVectors.dense(d.toArray)
case d: OldDenseVector => d
case d: OldSparseVector => d.toDense
case d => throw new IllegalArgumentException(s"Unknown data type for LocalStandardScaler: $d")
}
val result = scaler.transform(vec)
result.toArray
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalStandardScalerModel extends LocalModel[StandardScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): StandardScalerModel = {
val constructor = classOf[StandardScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector], classOf[Vector])
constructor.setAccessible(true)
val stdVals = data("std").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val std = new DenseVector(stdVals)
val meanVals = data("mean").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val mean = new DenseVector(meanVals)
constructor
.newInstance(metadata.uid, std, mean)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: StandardScalerModel): LocalTransformer[StandardScalerModel] = new LocalStandardScalerModel(transformer)
}
示例4: LocalNormalizer
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalNormalizer(override val sparkTransformer: Normalizer) extends LocalTransformer[Normalizer] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[Normalizer].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val vector = r match {
case x: List[Any] => Vectors.dense(x.map(_.toString.toDouble).toArray)
case x: SparseVector => x
case x: DenseVector => x
case unknown =>
throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: ${unknown.getClass}")
}
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalNormalizer extends LocalModel[Normalizer] {
override def load(metadata: Metadata, data: Map[String, Any]): Normalizer = {
new Normalizer(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setP(metadata.paramMap("p").toString.toDouble)
}
override implicit def getTransformer(transformer: Normalizer): LocalTransformer[Normalizer] = new LocalNormalizer(transformer)
}
示例5: LocalMaxAbsScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.feature.MaxAbsScalerModel
import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors, SparseVector}
class LocalMaxAbsScalerModel(override val sparkTransformer: MaxAbsScalerModel) extends LocalTransformer[MaxAbsScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val maxAbsUnzero = Vectors.dense(sparkTransformer.maxAbs.toArray.map(x => if (x == 0) 1 else x))
val newData = column.data.map(r => {
val vec: List[Double] = r match {
case d: SparseVector => d.toDense.toArray.toList
case d: DenseVector => d.toArray.toList
case d: List[Any @unchecked] => d map (_.toString.toDouble)
case d => throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: $d")
}
val brz = DataUtils.asBreeze(vec.toArray) / DataUtils.asBreeze(maxAbsUnzero.toArray)
DataUtils.fromBreeze(brz)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalMaxAbsScalerModel extends LocalModel[MaxAbsScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): MaxAbsScalerModel = {
val maxAbsList = data("maxAbs").
asInstanceOf[Map[String, Any]].
getOrElse("values", List()).
asInstanceOf[List[Double]].toArray
val maxAbs = new DenseVector(maxAbsList)
val constructor = classOf[MaxAbsScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector])
constructor.setAccessible(true)
constructor
.newInstance(metadata.uid, maxAbs)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: MaxAbsScalerModel): LocalTransformer[MaxAbsScalerModel] = new LocalMaxAbsScalerModel(transformer)
}
示例6: LocalStandardScalerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.feature.StandardScalerModel
import org.apache.spark.mllib.feature.{StandardScalerModel => OldStandardScalerModel}
import org.apache.spark.mllib.linalg.{
Vector => OldVector,
Vectors => OldVectors,
SparseVector => OldSparseVector,
DenseVector => OldDenseVector
}
import org.apache.spark.ml.linalg.{DenseVector, Vector, SparseVector}
class LocalStandardScalerModel(override val sparkTransformer: StandardScalerModel) extends LocalTransformer[StandardScalerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val scaler = new OldStandardScalerModel(
OldVectors.fromML(sparkTransformer.std.asInstanceOf[Vector]),
OldVectors.fromML(sparkTransformer.mean.asInstanceOf[Vector]),
sparkTransformer.getWithStd,
sparkTransformer.getWithMean
)
val newData = column.data.map(r => {
val vec: OldVector = r match {
case d: List[Any @unchecked] => OldVectors.dense(d.map(_.toString.toDouble).toArray)
case d: SparseVector => OldVectors.sparse(d.size, d.indices, d.values)
case d: DenseVector => OldVectors.dense(d.toArray)
case d: OldDenseVector => d
case d: OldSparseVector => d.toDense
case d => throw new IllegalArgumentException(s"Unknown data type for LocalStandardScaler: $d")
}
scaler.transform(vec)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalStandardScalerModel extends LocalModel[StandardScalerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): StandardScalerModel = {
val constructor = classOf[StandardScalerModel].getDeclaredConstructor(classOf[String], classOf[Vector], classOf[Vector])
constructor.setAccessible(true)
val stdVals = data("std").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val std = new DenseVector(stdVals)
val meanVals = data("mean").asInstanceOf[Map[String, Any]].getOrElse("values", List()).asInstanceOf[List[Double]].toArray
val mean = new DenseVector(meanVals)
constructor
.newInstance(metadata.uid, std, mean)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: StandardScalerModel): LocalTransformer[StandardScalerModel] = new LocalStandardScalerModel(transformer)
}
示例7: LocalNormalizer
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.feature.Normalizer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
class LocalNormalizer(override val sparkTransformer: Normalizer) extends LocalTransformer[Normalizer] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val method = classOf[Normalizer].getMethod("createTransformFunc")
val newData = column.data.map(r => {
val vector = r match {
case x: List[Any] => Vectors.dense(x.map(_.toString.toDouble).toArray)
case x: SparseVector => x
case x: DenseVector => x
case unknown =>
throw new IllegalArgumentException(s"Unknown data type for LocalMaxAbsScaler: ${unknown.getClass}")
}
method.invoke(sparkTransformer).asInstanceOf[Vector => Vector](vector)
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalNormalizer extends LocalModel[Normalizer] {
override def load(metadata: Metadata, data: Map[String, Any]): Normalizer = {
new Normalizer(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setP(metadata.paramMap("p").toString.toDouble)
}
override implicit def getTransformer(transformer: Normalizer): LocalTransformer[Normalizer] = new LocalNormalizer(transformer)
}