本文整理汇总了Scala中org.apache.spark.ml.feature.PCA类的典型用法代码示例。如果您正苦于以下问题:Scala PCA类的具体用法?Scala PCA怎么用?Scala PCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PCA类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: PCASampleDemo
//设置package包名称以及导入依赖的类
package com.chapter11.SparkMachineLearning
import org.apache.spark.ml.feature.PCA
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession
object PCASampleDemo {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("local[4]")
.appName("PCAExample")
.getOrCreate()
val data = Array(
Vectors.dense(3.5, 2.0, 5.0, 6.3, 5.60, 2.4),
Vectors.dense(4.40, 0.10, 3.0, 9.0, 7.0, 8.75),
Vectors.dense(3.20, 2.40, 0.0, 6.0, 7.4, 3.34)
)
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
df.show(false)
val pca = new PCA()
.setInputCol("features")
.setOutputCol("pcaFeatures")
.setK(4)
.fit(df)
val result = pca.transform(df).select("pcaFeatures")
result.show(false)
spark.stop()
}
}
示例2: Pca
//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark.example
import com.github.dongjinleekr.spark.dataset.Iris
import com.github.dongjinleekr.spark.dataset.Iris._
import org.apache.spark.ml.feature.{PCA, VectorAssembler}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SparkSession
object Pca {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("PCA Example")
.getOrCreate()
// Read the file
val raw = spark.read
.schema(Iris.schema)
.option("header", true)
.csv("hdfs:///datasets/iris/data.csv")
// Normalize:
// 1. Combine the features into vector.
// 2. Convert enumerating value into Int type.
val assembler = new VectorAssembler()
.setInputCols(Iris.schema.fields.map(_.name).slice(1, 5))
.setOutputCol("features")
def speciesToInt: (String => Int) = { s: String => Species.toInt(s) }
val newSpecies = udf(speciesToInt).apply(col("species"))
val df = assembler.transform(raw)
.withColumn("species", newSpecies)
.select("id", "features", "species")
// PCA (2)
val pca = new PCA()
.setInputCol("features")
.setOutputCol("pcaFeatures")
.setK(2)
.fit(df)
val result = pca.transform(df).select("pcaFeatures")
result.show(false)
}
}
示例3: PCAJob
//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.PCA
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession
//TODO: why model return vector from mllib??
import org.apache.spark.mllib.linalg.{Vector => OldVector}
object PCAJob extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(savePath: String): Map[String, Any] = {
val data = Array(
Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
)
val df = session.createDataFrame(data.map(Tuple1.apply)).toDF("features")
val pca = new PCA()
.setInputCol("features")
.setOutputCol("pcaFeatures")
.setK(3)
val pipeline = new Pipeline().setStages(Array(pca))
val model = pipeline.fit(df)
model.write.overwrite().save(savePath)
Map.empty[String, Any]
}
def serve(modelPath: String, features: List[Array[Double]]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(modelPath)
val data = LocalData(LocalDataColumn("features", features))
val result = pipeline.transform(data).toMapList.map(rowMap => {
rowMap + ("pcaFeatures" -> rowMap("pcaFeatures").asInstanceOf[OldVector].toArray)
})
Map("result" -> result)
}
}
示例4: TestPcaExample
//设置package包名称以及导入依赖的类
package com.burness.algorithm.feature
import org.apache.spark.SparkConf
import org.apache.spark.ml.feature.PCA
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession
object TestPcaExample{
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[*]")
val spark = SparkSession
.builder
.appName("PCAExample")
.config(sparkConf)
.getOrCreate()
// $example on$
val data = Array(
Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
)
val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
val pca = new PCA()
.setInputCol("features")
.setOutputCol("pcaFeatures")
.setK(3)
.fit(df)
val pcaDF = pca.transform(df)
val result = pcaDF.select("pcaFeatures")
result.rdd.foreach{
case s =>
println(s)
}
// $example off$
spark.stop()
}
}