本文整理汇总了Scala中org.apache.spark.ml.feature.VectorSlicer类的典型用法代码示例。如果您正苦于以下问题:Scala VectorSlicer类的具体用法?Scala VectorSlicer怎么用?Scala VectorSlicer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了VectorSlicer类的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: VectorSlicerJob
//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import java.util
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
import org.apache.spark.ml.feature.VectorSlicer
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.StructType
object VectorSlicerJob extends MLMistJob{
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(savePath: String): Map[String, Any] = {
val data = util.Arrays.asList(
Row(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3)))),
Row(Vectors.dense(-2.0, 2.3, 0.0))
)
val defaultAttr = NumericAttribute.defaultAttr
val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])
val df = session.createDataFrame(data, StructType(Array(attrGroup.toStructField())))
val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
slicer.setIndices(Array(1)).setNames(Array("f3"))
val pipeline = new Pipeline().setStages(Array(slicer))
val model = pipeline.fit(df)
model.write.overwrite().save(savePath)
Map.empty[String, Any]
}
def serve(modelPath: String, features: List[List[Double]]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(modelPath)
val data = LocalData(
LocalDataColumn("userFeatures", features)
)
val result: LocalData = pipeline.transform(data)
Map("result" -> result.select("userFeatures", "features").toMapList)
}
}