Scala SparseVector类代码示例

本文整理汇总了Scala中org.apache.spark.mllib.linalg.SparseVector类的典型用法代码示例。如果您正苦于以下问题：Scala SparseVector类的具体用法？Scala SparseVector怎么用？Scala SparseVector使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SparseVector类的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: Utils

//设置package包名称以及导入依赖的类
package mapper.utils

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{ Vector, DenseVector, SparseVector }
import breeze.linalg.{ DenseVector => BDV, SparseVector => BSV, Vector => BV }
import org.apache.spark.mllib.linalg.distributed.{ IndexedRowMatrix, IndexedRow, BlockMatrix }

object Utils {
  def toBlockMatrix(x: RDD[Vector], rowsPerBlock: Int = 1024, colsPerBlock: Int = 1024): BlockMatrix = {
    new IndexedRowMatrix(
      x.zipWithIndex().map({ xi => IndexedRow(xi._2, xi._1) })
    ).toBlockMatrix(rowsPerBlock, colsPerBlock)
  }

  def toBreeze(v: Vector): BV[Double] = v match {
    case DenseVector(values) => new BDV[Double](values)
    case SparseVector(size, indices, values) => new BSV[Double](indices, values, size)
  }

  def toSpark(bv: BV[Double]): Vector = bv match {
    case v: BDV[Double] => new DenseVector(v.toArray)
    case v: BSV[Double] => new SparseVector(v.length, v.index, v.data)
  }

  def cartesian[A](xs: Traversable[Traversable[A]]): Seq[Seq[A]] =
    xs.foldLeft(Seq(Seq.empty[A])) { (x, y) => for (a <- x; b <- y) yield a :+ b }

}

开发者ID:log0ymxm，项目名称:spark-mapper，代码行数:29，代码来源:Utils.scala

示例2: Controller

//设置package包名称以及导入依赖的类
package controller

import com.typesafe.config.ConfigFactory
import core.Predictor
import iomanager.{InputManager, OutputManager}
import modelmanager.ModelFactory
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.streaming.{Seconds, StreamingContext}

object Controller {
  def main(args: Array[String]) {
    val defaultConfig = ConfigFactory.load("application.conf")
    val config = ConfigFactory.load.withFallback(defaultConfig)
    val eventsCount = config.getInt("eventsCount")

    //Set up spark environment
    val conf = new SparkConf().setAppName("Predictor")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    val ssc = new StreamingContext(conf, Seconds(config.getInt("input.streaming.batchTime")))
    Logger.getRootLogger.setLevel(Level.ERROR)
    val models = ModelFactory.prepareModels(ssc, config)
    Predictor.setUpModels(ssc, models)
    val stream = InputManager.createInputStream(ssc, config)
    OutputManager.prepareOutputStream(config)
    stream.foreachRDD((rdd,time) => {
          val arr = rdd.reduce(_++_).toArray.sorted
          val predictions = Predictor.getPredictions(
            new SparseVector(eventsCount, arr, Array.fill(arr.length)(1.0)))
          OutputManager.sendPredictions(predictions.toParArray.partition(_._2 == 1.0),time)
      })
    ssc.start()
    ssc.awaitTermination()

  }

}

开发者ID:jandion，项目名称:SparkOFP，代码行数:39，代码来源:Controller.scala

示例3: HashTableEntry

//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors.lsh

import scala.collection.immutable.BitSet

import org.apache.spark.mllib.linalg.SparseVector


private[neighbors] sealed abstract class HashTableEntry[+S <: Signature[_]] {
  val id: Long
  val table: Int
  val signature: S
  val point: SparseVector

  def sigElements: Array[Int]
}

private[neighbors] final case class BitHashTableEntry(
    id: Long,
    table: Int,
    signature: BitSignature,
    point: SparseVector
) extends HashTableEntry[BitSignature] {
  def sigElements: Array[Int] = {
    signature.elements.toArray
  }
}

private[neighbors] final case class IntHashTableEntry(
    id: Long,
    table: Int,
    signature: IntSignature,
    point: SparseVector
) extends HashTableEntry[IntSignature] {
  def sigElements: Array[Int] = {
    signature.elements
  }
}

开发者ID:karlhigley，项目名称:spark-neighbors，代码行数:38，代码来源:Signature.scala

示例4: TestHelpers

//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors

import scala.util.Random

import org.apache.spark.mllib.linalg.SparseVector

object TestHelpers {
  def generateRandomPoints(quantity: Int, dimensions: Int, density: Double) = {
    val numElements = math.floor(dimensions * density).toInt
    val points = new Array[SparseVector](quantity)
    var i = 0
    while (i < quantity) {
      val indices = generateIndices(numElements, dimensions)
      val values = generateValues(numElements)
      points(i) = new SparseVector(dimensions, indices, values)
      i += 1
    }
    points
  }

  def generateIndices(quantity: Int, dimensions: Int) = {
    val indices = new Array[Int](quantity)
    var i = 0
    while (i < quantity) {
      val possible = Random.nextInt(dimensions)
      if (!indices.contains(possible)) {
        indices(i) = possible
        i += 1
      }
    }
    indices
  }

  def generateValues(quantity: Int) = {
    val values = new Array[Double](quantity)
    var i = 0
    while (i < quantity) {
      values(i) = Random.nextGaussian()
      i += 1
    }
    values
  }
}

开发者ID:karlhigley，项目名称:spark-neighbors，代码行数:44，代码来源:TestHelpers.scala

示例5: CollisionStrategySuite

//设置package包名称以及导入依赖的类
package com.github.karlhigley.spark.neighbors

import org.scalatest.FunSuite

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.SparseVector

class CollisionStrategySuite extends FunSuite with TestSparkContext {
  val numPoints = 1000
  val dimensions = 100
  val density = 0.5

  var points: RDD[(Long, SparseVector)] = _

  override def beforeAll() {
    super.beforeAll()
    val localPoints = TestHelpers.generateRandomPoints(numPoints, dimensions, density)
    points = sc.parallelize(localPoints).zipWithIndex.map(_.swap)
  }

  test("SimpleCollisionStrategy produces the correct number of tuples") {
    val ann =
      new ANN(dimensions, "cosine")
        .setTables(1)
        .setSignatureLength(8)

    val model = ann.train(points)

    val hashTables = model.hashTables
    val collidable = model.collisionStrategy(hashTables)

    assert(collidable.count() == numPoints)
  }

  test("BandingCollisionStrategy produces the correct number of tuples") {
    val numBands = 4

    val ann =
      new ANN(dimensions, "jaccard")
        .setTables(1)
        .setSignatureLength(8)
        .setBands(numBands)
        .setPrimeModulus(739)

    val model = ann.train(points)

    val hashTables = model.hashTables
    val collidable = model.collisionStrategy(hashTables)

    assert(collidable.count() == numPoints * numBands)
  }

}

开发者ID:karlhigley，项目名称:spark-neighbors，代码行数:54，代码来源:CollisionStrategySuite.scala

注：本文中的org.apache.spark.mllib.linalg.SparseVector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。