本文整理汇总了Scala中org.apache.spark.mllib.linalg.SparseVector类的典型用法代码示例。如果您正苦于以下问题:Scala SparseVector类的具体用法?Scala SparseVector怎么用?Scala SparseVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SparseVector类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: Utils
//设置package包名称以及导入依赖的类
package mapper.utils
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{ Vector, DenseVector, SparseVector }
import breeze.linalg.{ DenseVector => BDV, SparseVector => BSV, Vector => BV }
import org.apache.spark.mllib.linalg.distributed.{ IndexedRowMatrix, IndexedRow, BlockMatrix }
object Utils {
def toBlockMatrix(x: RDD[Vector], rowsPerBlock: Int = 1024, colsPerBlock: Int = 1024): BlockMatrix = {
new IndexedRowMatrix(
x.zipWithIndex().map({ xi => IndexedRow(xi._2, xi._1) })
).toBlockMatrix(rowsPerBlock, colsPerBlock)
}
def toBreeze(v: Vector): BV[Double] = v match {
case DenseVector(values) => new BDV[Double](values)
case SparseVector(size, indices, values) => new BSV[Double](indices, values, size)
}
def toSpark(bv: BV[Double]): Vector = bv match {
case v: BDV[Double] => new DenseVector(v.toArray)
case v: BSV[Double] => new SparseVector(v.length, v.index, v.data)
}
def cartesian[A](xs: Traversable[Traversable[A]]): Seq[Seq[A]] =
xs.foldLeft(Seq(Seq.empty[A])) { (x, y) => for (a <- x; b <- y) yield a :+ b }
}
示例2: Controller
//设置package包名称以及导入依赖的类
package controller
import com.typesafe.config.ConfigFactory
import core.Predictor
import iomanager.{InputManager, OutputManager}
import modelmanager.ModelFactory
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Controller {
def main(args: Array[String]) {
val defaultConfig = ConfigFactory.load("application.conf")
val config = ConfigFactory.load.withFallback(defaultConfig)
val eventsCount = config.getInt("eventsCount")
//Set up spark environment
val conf = new SparkConf().setAppName("Predictor")
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
val ssc = new StreamingContext(conf, Seconds(config.getInt("input.streaming.batchTime")))
Logger.getRootLogger.setLevel(Level.ERROR)
val models = ModelFactory.prepareModels(ssc, config)
Predictor.setUpModels(ssc, models)
val stream = InputManager.createInputStream(ssc, config)
OutputManager.prepareOutputStream(config)
stream.foreachRDD((rdd,time) => {
val arr = rdd.reduce(_++_).toArray.sorted
val predictions = Predictor.getPredictions(
new SparseVector(eventsCount, arr, Array.fill(arr.length)(1.0)))
OutputManager.sendPredictions(predictions.toParArray.partition(_._2 == 1.0),time)
})
ssc.start()
ssc.awaitTermination()
}
}
示例3: HashTableEntry
//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors.lsh
import scala.collection.immutable.BitSet
import org.apache.spark.mllib.linalg.SparseVector
private[neighbors] sealed abstract class HashTableEntry[+S <: Signature[_]] {
val id: Long
val table: Int
val signature: S
val point: SparseVector
def sigElements: Array[Int]
}
private[neighbors] final case class BitHashTableEntry(
id: Long,
table: Int,
signature: BitSignature,
point: SparseVector
) extends HashTableEntry[BitSignature] {
def sigElements: Array[Int] = {
signature.elements.toArray
}
}
private[neighbors] final case class IntHashTableEntry(
id: Long,
table: Int,
signature: IntSignature,
point: SparseVector
) extends HashTableEntry[IntSignature] {
def sigElements: Array[Int] = {
signature.elements
}
}
示例4: TestHelpers
//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors
import scala.util.Random
import org.apache.spark.mllib.linalg.SparseVector
object TestHelpers {
def generateRandomPoints(quantity: Int, dimensions: Int, density: Double) = {
val numElements = math.floor(dimensions * density).toInt
val points = new Array[SparseVector](quantity)
var i = 0
while (i < quantity) {
val indices = generateIndices(numElements, dimensions)
val values = generateValues(numElements)
points(i) = new SparseVector(dimensions, indices, values)
i += 1
}
points
}
def generateIndices(quantity: Int, dimensions: Int) = {
val indices = new Array[Int](quantity)
var i = 0
while (i < quantity) {
val possible = Random.nextInt(dimensions)
if (!indices.contains(possible)) {
indices(i) = possible
i += 1
}
}
indices
}
def generateValues(quantity: Int) = {
val values = new Array[Double](quantity)
var i = 0
while (i < quantity) {
values(i) = Random.nextGaussian()
i += 1
}
values
}
}
示例5: CollisionStrategySuite
//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors
import org.scalatest.FunSuite
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.SparseVector
class CollisionStrategySuite extends FunSuite with TestSparkContext {
val numPoints = 1000
val dimensions = 100
val density = 0.5
var points: RDD[(Long, SparseVector)] = _
override def beforeAll() {
super.beforeAll()
val localPoints = TestHelpers.generateRandomPoints(numPoints, dimensions, density)
points = sc.parallelize(localPoints).zipWithIndex.map(_.swap)
}
test("SimpleCollisionStrategy produces the correct number of tuples") {
val ann =
new ANN(dimensions, "cosine")
.setTables(1)
.setSignatureLength(8)
val model = ann.train(points)
val hashTables = model.hashTables
val collidable = model.collisionStrategy(hashTables)
assert(collidable.count() == numPoints)
}
test("BandingCollisionStrategy produces the correct number of tuples") {
val numBands = 4
val ann =
new ANN(dimensions, "jaccard")
.setTables(1)
.setSignatureLength(8)
.setBands(numBands)
.setPrimeModulus(739)
val model = ann.train(points)
val hashTables = model.hashTables
val collidable = model.collisionStrategy(hashTables)
assert(collidable.count() == numPoints * numBands)
}
}