本文整理汇总了Scala中org.apache.spark.ml.feature.OneHotEncoder类的典型用法代码示例。如果您正苦于以下问题:Scala OneHotEncoder类的具体用法?Scala OneHotEncoder怎么用?Scala OneHotEncoder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OneHotEncoder类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: OneHotEncoderExample
//设置package包名称以及导入依赖的类
package org.sparksamples.regression.bikesharing
import org.apache.spark.sql.SparkSession
object OneHotEncoderExample {
def main(args: Array[String]): Unit = {
import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
val spark = SparkSession
.builder()
.appName("Spark SQL basic example").master("local[1]")
.config("spark.some.config.option", "some-value")
.getOrCreate()
// For implicit conversions like converting RDDs to DataFrames
val df = spark.createDataFrame(Seq(
(0, 3),
(1, 2),
(2, 4),
(3, 3),
(4, 3),
(5, 4)
)).toDF("id", "category")
val indexer = new StringIndexer()
.setInputCol("category")
.setOutputCol("categoryIndex")
.fit(df)
val indexed = indexer.transform(df)
val encoder = new OneHotEncoder()
.setInputCol("categoryIndex")
.setOutputCol("categoryVec")
val encoded = encoder.transform(indexed)
encoded.select("id", "categoryVec").show()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:41,代码来源:OneHotEncoderExample.scala
示例2: LocalOneHotEncoder
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.ml.feature.OneHotEncoder
import org.apache.spark.ml.linalg.Vectors
class LocalOneHotEncoder(override val sparkTransformer: OneHotEncoder) extends LocalTransformer[OneHotEncoder] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val col = column.data.asInstanceOf[List[Double]]
col.foreach(x =>
assert(x >= 0.0 && x == x.toInt,
s"Values from column ${sparkTransformer.getInputCol} must be indices, but got $x.")
)
val size = col.max.toInt
val newData = col.map(r => {
val res = Array.fill(size){0.0}
if (r < size) {
res.update(r.toInt, 1.0)
}
res
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalOneHotEncoder extends LocalModel[OneHotEncoder] {
override def load(metadata: Metadata, data: Map[String, Any]): OneHotEncoder = {
var ohe = new OneHotEncoder(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
metadata.paramMap.get("dropLast").foreach{ x => ohe = ohe.setDropLast(x.asInstanceOf[Boolean])}
ohe
}
override implicit def getTransformer(transformer: OneHotEncoder): LocalTransformer[OneHotEncoder] = new LocalOneHotEncoder(transformer)
}
示例3: LocalOneHotEncoder
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.feature.OneHotEncoder
import org.apache.spark.ml.linalg.Vectors
class LocalOneHotEncoder(override val sparkTransformer: OneHotEncoder) extends LocalTransformer[OneHotEncoder] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val col = column.data.asInstanceOf[List[Double]]
col.foreach(x =>
assert(x >= 0.0 && x == x.toInt,
s"Values from column ${sparkTransformer.getInputCol} must be indices, but got $x.")
)
val size = col.max.toInt
val newData = col.map(r => {
val res = Array.fill(size){0.0}
if (r < size) {
res.update(r.toInt, 1.0)
}
res
})
localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
case None => localData
}
}
}
object LocalOneHotEncoder extends LocalModel[OneHotEncoder] {
override def load(metadata: Metadata, data: Map[String, Any]): OneHotEncoder = {
var ohe = new OneHotEncoder(metadata.uid)
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
metadata.paramMap.get("dropLast").foreach{ x => ohe = ohe.setDropLast(x.asInstanceOf[Boolean])}
ohe
}
override implicit def getTransformer(transformer: OneHotEncoder): LocalTransformer[OneHotEncoder] = new LocalOneHotEncoder(transformer)
}
示例4: OneHotEncoderJob
//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
import org.apache.spark.ml.linalg.{Vector => LVector}
import org.apache.spark.sql.SparkSession
object OneHotEncoderJob extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(savePath: String): Map[String, Any] = {
val df = session.createDataFrame(Seq(
(0, "a"), (1, "b"), (2, "c"),
(3, "a"), (4, "a"), (5, "c")
)).toDF("id", "category")
val indexer = new StringIndexer()
.setInputCol("category")
.setOutputCol("categoryIndex")
.fit(df)
val encoder = new OneHotEncoder()
.setInputCol("categoryIndex")
.setOutputCol("categoryVec")
val pipeline = new Pipeline().setStages(Array(indexer, encoder))
val model = pipeline.fit(df)
model.write.overwrite().save(savePath)
Map.empty[String, Any]
}
def serve(modelPath: String, features: List[String]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(modelPath)
val data = LocalData(LocalDataColumn("category", features))
val result = pipeline.transform(data)
val response = result.select("category", "categoryVec").toMapList.map(rowMap => {
val mapped = rowMap("categoryVec").asInstanceOf[Array[Double]]
rowMap + ("categoryVec" -> mapped)
})
Map("result" -> response)
}
}