本文整理汇总了Scala中org.apache.spark.mllib.clustering.PowerIterationClustering类的典型用法代码示例。如果您正苦于以下问题:Scala PowerIterationClustering类的具体用法?Scala PowerIterationClustering怎么用?Scala PowerIterationClustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PowerIterationClustering类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: Clustering
//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.math.learning
import com.paypal.risk.smunf.util.Json
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD
object Clustering {
def powerIterationClustering(
similarities: RDD[(Long, Long, Double)],
numClusters: Int,
maxIteration: Int)
: Seq[(Long, Int)] = {
val pic = new PowerIterationClustering()
.setK(numClusters)
.setMaxIterations(maxIteration)
val model = pic.run(similarities.cache())
val clusters = model.assignments.collect()
clusters.map(item => (item.id, item.cluster))
}
def clustersToJson(clusters: Seq[(Int, Seq[String])]): String = {
val clustersReformatted = clusters.map(x => Map("cluster" -> x._1, "variables" -> x._2))
Json.toPrettyJsonString(clustersReformatted)
}
}
示例2: PICTest
//设置package包名称以及导入依赖的类
package mllib.perf.clustering
import org.json4s.JValue
import org.json4s.JsonDSL._
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD
import mllib.perf.PerfTest
class PICTest(sc: SparkContext) extends PerfTest {
val NUM_EXAMPLES = ("num-examples", "number of examples")
val NODE_DEGREE = ("node-degree", "number of neighbors each node is connected to")
val NUM_CENTERS = ("num-centers", "number of centers for clustering tests")
val NUM_ITERATIONS = ("num-iterations", "number of iterations for the algorithm")
intOptions ++= Seq(NODE_DEGREE, NUM_CENTERS, NUM_ITERATIONS)
longOptions ++= Seq(NUM_EXAMPLES)
val options = intOptions ++ stringOptions ++ booleanOptions ++ longOptions ++ doubleOptions
addOptionsToParser()
var data: RDD[(Long, Long, Double)] = _
override def createInputData(seed: Long): Unit = {
val numExamples = longOptionValue(NUM_EXAMPLES)
val nodeDegree = intOptionValue(NODE_DEGREE)
val numPartitions = intOptionValue(NUM_PARTITIONS)
// Generates a periodic banded matrix with bandwidth = nodeDegree
data = sc.parallelize(0L to numExamples, numPartitions)
.flatMap { id =>
(((id - nodeDegree / 2) % numExamples) until id).map { nbr =>
(id, (nbr + numExamples) % numExamples, 1D)
}
}
logInfo(s"Generated ${data.count()} pairwise similarities.")
}
override def run(): JValue = {
val numIterations = intOptionValue(NUM_ITERATIONS)
val k = intOptionValue(NUM_CENTERS)
val start = System.currentTimeMillis()
val pic = new PowerIterationClustering()
.setK(k)
.setMaxIterations(numIterations)
val model = pic.run(data)
model.assignments.count()
val duration = (System.currentTimeMillis() - start) / 1e3
"time" -> duration
}
}
示例3: PICTest
//设置package包名称以及导入依赖的类
package mllib.perf.clustering
import org.json4s.JValue
import org.json4s.JsonDSL._
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD
import mllib.perf.PerfTest
class PICTest(sc: SparkContext) extends PerfTest {
val NUM_POINTS = ("num-points", "number of points")
val NODE_DEGREE = ("node-degree", "number of neighbors each node is connected to")
val NUM_CENTERS = ("num-centers", "number of centers for clustering tests")
val NUM_ITERATIONS = ("num-iterations", "number of iterations for the algorithm")
intOptions ++= Seq(NODE_DEGREE, NUM_CENTERS, NUM_ITERATIONS)
longOptions ++= Seq(NUM_POINTS)
val options = intOptions ++ stringOptions ++ booleanOptions ++ longOptions ++ doubleOptions
addOptionsToParser()
var data: RDD[(Long, Long, Double)] = _
override def createInputData(seed: Long): Unit = {
val numPoints = longOptionValue(NUM_POINTS)
val nodeDegree = intOptionValue(NODE_DEGREE)
val numPartitions = intOptionValue(NUM_PARTITIONS)
// Generates a periodic banded matrix with bandwidth = nodeDegree
val data = sc.parallelize(0L to numPoints, numPartitions)
.flatMap { id =>
(((id - nodeDegree / 2) % numPoints) until id).map { nbr =>
(id, (nbr + numPoints) % numPoints, 1D)
}
}
logInfo(s"Generated ${data.count()} pairwise similarities.")
}
override def run(): JValue = {
val numIterations = intOptionValue(NUM_ITERATIONS)
val k = intOptionValue(NUM_CENTERS)
val start = System.currentTimeMillis()
val pic = new PowerIterationClustering()
.setK(k)
.setMaxIterations(numIterations)
val model = pic.run(data)
val duration = (System.currentTimeMillis() - start) / 1e3
"time" -> duration
}
}