本文整理汇总了Scala中org.apache.spark.mllib.clustering.KMeansModel类的典型用法代码示例。如果您正苦于以下问题:Scala KMeansModel类的具体用法?Scala KMeansModel怎么用?Scala KMeansModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KMeansModel类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: KmeansExample
//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import scala.util.Try
object KmeansExample {
import ApplicationContext._
def main(args: Array[String]) {
val pressureRead = sc.textFile("src/main/resources/PamarcoPressure.txt")
val vibrationText = sc.textFile("src/main/resources/PamarcoVibration.txt")
val pressureRDD = pressureRead.map(_.split(","))
val vibrationRDD = vibrationText.map(_.split("\t")).persist()
val vibrationVector = vibrationRDD.map { row =>
Try(Vectors.dense(row(1).toDouble, row(2).toDouble, row(3).toDouble)).toOption
}.filter(_.isDefined).map(_.get)
val splittedRDD = vibrationVector.randomSplit(Array(0.6, 0.4))
val trainRDD = splittedRDD(0)
val testRDD = splittedRDD(1)
// Cluster the data into two classes using KMeans
val numClusters = 2
val numIterations = 20
val clusters = KMeans.train(trainRDD, numClusters, numIterations)
// Evaluate clustering by computing Within Set Sum of Squared Errors
val WSSSE = clusters.computeCost(vibrationVector)
println("Within Set Sum of Squared Errors = " + WSSSE)
// Save and load model
clusters.save(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
val sameModel = KMeansModel.load(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
import spark.implicits._
val foo = sameModel.predict(testRDD)
foo.toDF.show
sc.stop()
}
}
示例2: Predict
//设置package包名称以及导入依赖的类
package com.databricks.apps.twitterClassifier
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeansModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Predict extends App {
import SparkSetup._
val options = PredictOptions.parse(args)
val ssc = new StreamingContext(sc, Seconds(options.intervalInSecs))
Predictor.doIt(options, sc, ssc)
}
object Predictor {
def doIt(options: PredictOptions, sc: SparkContext, ssc: StreamingContext) {
println("Initializing the the KMeans model...")
val model: KMeansModel = new KMeansModel(sc.objectFile[Vector](options.modelDirectory.getCanonicalPath).collect)
println("Materializing Twitter stream...")
TwitterUtils.createStream(ssc, maybeTwitterAuth)
.map(_.getText)
.foreachRDD { rdd =>
rdd.filter(t => model.predict(featurize(t)) == options.clusterNumber)
.foreach(print) // register DStream as an output stream and materialize it
}
println("Initialization complete, starting streaming computation.")
ssc.start()
ssc.awaitTermination()
}
}
示例3: KMeansCases
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
class KMeansCases(sc: SparkContext, dataFile: String, numOfCenters: Int, maxIterations:Int) {
//hide logger from console
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
val data = sc.textFile(dataFile)
val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble))).cache()
def KMeansInitialCenters() = {
val initStartTime = System.nanoTime()
val centers = new KMeansInitialization().run(sc, dataFile, numOfCenters)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization to find centers took " + "%.3f".format(initTimeInSeconds) + " seconds.")
val initStartTime1 = System.nanoTime()
val model = new KMeansModel(centers)
val clusterModel = new KMeans().setK(numOfCenters).setMaxIterations(maxIterations).setInitialModel(model).run(parsedData)
val initTimeInSeconds1 = (System.nanoTime() - initStartTime1) / 1e9
println(s"Initialization with custom took " + "%.3f".format(initTimeInSeconds1) + " seconds.")
println("\nnumber of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansParallel() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.K_MEANS_PARALLEL)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeansParaller took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansRandom() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.RANDOM)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeasRandom took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
}