当前位置: 首页>>代码示例>>Scala>>正文


Scala DenseVector类代码示例

本文整理汇总了Scala中org.apache.spark.mllib.linalg.DenseVector的典型用法代码示例。如果您正苦于以下问题:Scala DenseVector类的具体用法?Scala DenseVector怎么用?Scala DenseVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DenseVector类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: PCAClustering

//设置package包名称以及导入依赖的类
package graph

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{EdgeDirection, Edge, Graph}
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.{DenseVector, Vector, Matrix, Vectors}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.rdd.RDD
import scala.collection.mutable


class PCAClustering {
  def matrixToRDD(sc:SparkContext, m: Matrix): RDD[Vector] = {
    val columns = m.toArray.grouped(m.numRows)
    val rows = columns.toSeq.transpose // Skip this if you want a column-major RDD.
    val vectors = rows.map(row => new DenseVector(row.toArray))
    sc.parallelize(vectors)
  }

  def run(inputGraph: Graph[Any, Any], clusterNum: Int, eigsNum: Int,sc:SparkContext ): Graph[Int, Any] = {
    val numNode = inputGraph.numVertices.toInt
    val mapping = new mutable.HashMap[Long,Int]()
    val revMapping = new mutable.HashMap[Int, Long]()

    val verticeIds = inputGraph.vertices.map( u => u._1 ).collect()
    for(i<-0 to numNode - 1) {
      mapping.put(verticeIds.apply(i), i)
      revMapping.put(i, verticeIds.apply(i))
    }

    //reindex the verteces from 0 to the num of nodes
    val nVertices = inputGraph.vertices.map( u=> (mapping.apply(u._1).toLong, u._2))
    val nEdges = inputGraph.edges.map(e=> Edge(mapping.apply(e.srcId).toLong, mapping.apply(e.dstId).toLong, e.attr))
    val ngraph = Graph(nVertices, nEdges)

    val output = ngraph.collectNeighborIds(EdgeDirection.Out)
    val spvec = output.mapValues(r => Vectors.sparse( numNode,  r.map(e=>e.toInt) , r.map(e=> 1.0/r.length )))
    val rows = spvec.map(v=>v._2)
    val order = spvec.map(v=>v._1)
    val mat = new RowMatrix(rows)

    val pc = mat.computePrincipalComponents(eigsNum)


    val pcRDD = matrixToRDD(sc, pc)
    val clusters = KMeans.train(pcRDD, clusterNum, 100)

    val clusterArray = pcRDD.map(p=> clusters.predict(p) ).collect()
    val assignedClusters = order.map( o => (o, clusterArray.apply(o.toInt)))
    val origVerextRDD = assignedClusters.map{case (vid, value)=> (revMapping.apply(vid.toInt), value)}
    Graph(origVerextRDD, inputGraph.edges)

  }

} 
开发者ID:HPCL,项目名称:GalacticSpark,代码行数:56,代码来源:PCAClustering.scala

示例2: RGB

//设置package包名称以及导入依赖的类
package example

import java.awt.Color

import io.flatmap.ml.som.GaussianSelfOrganizingMap
import io.flatmap.ml.util.Plot
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.sql.SparkSession

object RGB {

  def main(args: Array[String]): Unit = {
    implicit val sparkSession =
      SparkSession
        .builder
        .appName("rgb-clustering")
        .getOrCreate()
    val rgb = sparkSession.sparkContext
      .textFile("data/rgb.csv")
      .map(_.split(",").map(_.toDouble / 255.0))
      .map(new DenseVector(_))
    val (som, params) =
      GaussianSelfOrganizingMap(24, 24, sigma = 0.5, learningRate = 0.3)
        .initialize(rgb)
        .train(rgb, 20)
    Plot.errors(params.errors.reverse)
    Plot.som(f"Trained SOM (error=${params.errors.head}%1.4f)", som.codeBook, "trained_som.png") {
      case red :: green :: blue :: Nil =>
        new Color((red*255.0).toInt, (green*255.0).toInt, (blue*255.0).toInt).getRGB.toDouble
      case _ => Color.white.getRGB.toDouble
    }
  }

} 
开发者ID:ShokuninSan,项目名称:som,代码行数:35,代码来源:RGB.scala

示例3: SelfOrganizingMapSpec

//设置package包名称以及导入依赖的类
package io.flatmap.ml.som

import breeze.numerics.closeTo
import breeze.linalg.DenseMatrix
import io.flatmap.ml.som.SelfOrganizingMap.Shape
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.random.RandomRDDs
import org.scalatest._
import util.{FakeDecayFunction, FakeMetrics, FakeNeighborhoodKernel, TestSparkContext}

class SelfOrganizingMapSpec extends FlatSpec with Matchers with BeforeAndAfterEach with TestSparkContext {

  def SOM(width: Int, height: Int) =
    new SelfOrganizingMap with FakeNeighborhoodKernel with FakeDecayFunction with FakeMetrics {
      override val shape: Shape = (width, height)
      override val learningRate: Double = 0.1
      override val sigma: Double = 0.2
    }

  "instantiation" should "create a SOM with codebook of zeros" in {
    val som = SOM(6, 6)
    som.codeBook should === (DenseMatrix.fill[Array[Double]](6, 6)(Array.emptyDoubleArray))
  }

  "initialize" should "copy random data points from RDD into codebook" in {
    val data = RandomRDDs.normalVectorRDD(sparkSession.sparkContext, numRows = 512L, numCols = 3)
    val som = SOM(6, 6)
    som.initialize(data).codeBook should !== (DenseMatrix.fill[Array[Double]](6, 6)(Array.emptyDoubleArray))
  }

  "winner" should "return best matching unit (BMU)" in {
    val som = SOM(6, 6)
    som.codeBook.keysIterator.foreach { case (x, y) => som.codeBook(x, y) = Array(0.2, 0.2, 0.2) }
    som.codeBook(3, 3) = Array(0.3, 0.3, 0.3)
    som.winner(new DenseVector(Array(2.0, 2.0, 2.0)), som.codeBook) should equal ((3, 3))
    som.winner(new DenseVector(Array(0.26, 0.26, 0.26)), som.codeBook) should equal ((3, 3))
  }

  "winner" should "return last best matching unit (BMU) index in case of multiple BMUs" in {
    val som = SOM(6, 6)
    som.codeBook.keysIterator.foreach { case (x, y) => som.codeBook(x, y) = Array(0.2, 0.2, 0.2) }
    som.codeBook(3, 3) = Array(0.3, 0.3, 0.3)
    som.winner(new DenseVector(Array(0.25, 0.25, 0.25)), som.codeBook) should equal ((5, 5))
  }

  "classify" should "return the best matching unit along with Euclidean distance" in {
    val som = SOM(6, 6)
    som.codeBook.keysIterator.foreach { case (x, y) => som.codeBook(x, y) = Array(0.2, 0.2, 0.2) }
    som.codeBook(3, 3) = Array(0.3, 0.3, 0.3)
    val (bmu, distance) = som.classify(new DenseVector(Array(0.26, 0.26, 0.26)))
    bmu should === ((3, 3))
    assert(closeTo(distance, 0.06, relDiff = 1e-2))
  }

} 
开发者ID:ShokuninSan,项目名称:som,代码行数:56,代码来源:SelfOrganizingMapSpec.scala

示例4: GaussianSelfOrganizingMapSpec

//设置package包名称以及导入依赖的类
package io.flatmap.ml.som

import java.awt.Color

import breeze.linalg.DenseMatrix
import breeze.numerics.closeTo
import io.flatmap.ml.util.Plot
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.random.RandomRDDs
import org.scalatest._
import util.TestSparkContext

class GaussianSelfOrganizingMapSpec extends FlatSpec with Matchers with BeforeAndAfterEach with TestSparkContext {

  "train" should "return a fitted SOM instance" in {
    val path = getClass.getResource("/rgb.csv").getPath
    val rgb = sparkSession.sparkContext
      .textFile(path)
      .map(_.split(",").map(_.toDouble / 255.0))
      .map(new DenseVector(_))
    val som = GaussianSelfOrganizingMap(6, 6, _sigma = 0.5, _learningRate = 0.3).initialize(rgb)
    val initialCodeBook = som.codeBook.copy
    val codeBookVectorToRGB: List[Double] => Double = {
      case red :: green :: blue :: Nil =>
        new Color((red*255.0).toInt, (green*255.0).toInt, (blue*255.0).toInt).getRGB.toDouble
      case _ => Color.white.getRGB.toDouble
    }
    Plot.som("Initial SOM", som.codeBook, "initial_som.png")(codeBookVectorToRGB)
    val (newSom, params) = som.train(rgb, 20)
    Plot.som(f"Trained SOM (error=${params.errors.head}%1.4f)", newSom.codeBook, "trained_som.png")(codeBookVectorToRGB)
    Plot.errors(params.errors.reverse)
    newSom.codeBook should not equal initialCodeBook
    assert(closeTo(params.errors.head, 0.15, relDiff = 1e-2))
  }

} 
开发者ID:ShokuninSan,项目名称:som,代码行数:37,代码来源:GaussianSelfOrganizingMapSpec.scala

示例5: Predict

//设置package包名称以及导入依赖的类
package TelCom


import org.apache.spark.mllib.linalg.{DenseVector, Vectors, Vector}
import org.apache.spark.sql.catalyst.expressions.Row
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, LogisticRegressionModel}
import org.apache.spark.mllib.linalg.{DenseVector, Vectors, Vector}

object Predict {
  val sizeOfSample:Int = 22
  def main(args: Array[String]): Unit = {


    val conf = new SparkConf()
    conf.setAppName("logic")
    val sc = new SparkContext(conf)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)

    var fileRoot = "H:\\????_???????_contest_data"
    val data = sc.textFile(fileRoot + "/toPredict.txt")
    val Model1 = LogisticRegressionModel.load(sc, fileRoot + "/model")
    val Model2 = LogisticRegressionModel.load(sc, fileRoot + "/model")

    //??????
    val feature1 = data.map(x => x.split("\t")).map(x =>toPredict(x,Model1,Model2)).filter(x => x != "Normal")
    print(feature1)
    feature1.randomSplit(Array(1, 0), seed = 11L)(0).repartition(1).saveAsTextFile(fileRoot + "/out")

  }

  def toPredict(x:Array[String],model1:LogisticRegressionModel,model2: LogisticRegressionModel): String =
  {
    var z:Array[Double] = new Array[Double](sizeOfSample  - 3)
    for(i <-  2 to x.size-1)
      z(i-2) = x(i).toDouble

    val v:Vector = Vectors.dense(z)
    val result1 =  model1.predict(v)
    val result2 =  model1.predict(v)
    var returnResult:String = null
    if (result1 > 0.95){
      returnResult = x(0).toString + "\t" + x(1).toString + "\t" + "1"}
  else if (result2 > 1){
    returnResult = x(0).toString + "\t" + x(1).toString + "\t" + "2"}
  else
  {returnResult = "Normal"
  }

  return returnResult

  }

} 
开发者ID:sunyinglong,项目名称:SecondTelcomCompetition,代码行数:55,代码来源:Predict.scala

示例6: PipelineClassifier

//设置package包名称以及导入依赖的类
import org.apache.spark.ml.PipelineModel
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame

class PipelineClassifier(val pipeline: PipelineModel) extends UnifiedClassifier with Serializable {
  override def predict[T](data: DataFrame): RDD[(T, Double)] = {
    val singletonDF = ModelHelpers.addMetadata(data)
    val predictions = pipeline.transform(singletonDF)
    predictions.map(row => {
      val firstClass = row.getAs[DenseVector](DataFrameColumns.RAW_PREDICTION)(1)
      val zeroClass = row.getAs[DenseVector](DataFrameColumns.RAW_PREDICTION)(0)
      val prob = firstClass.toDouble / (firstClass.toDouble + zeroClass.toDouble)
      (row.getAs[T](DataFrameColumns.KEY), prob)
    })
  }
} 
开发者ID:Spikhalskiy,项目名称:hackaton,代码行数:18,代码来源:PipelineClassifier.scala

示例7: Utils

//设置package包名称以及导入依赖的类
package mapper.utils

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{ Vector, DenseVector, SparseVector }
import breeze.linalg.{ DenseVector => BDV, SparseVector => BSV, Vector => BV }
import org.apache.spark.mllib.linalg.distributed.{ IndexedRowMatrix, IndexedRow, BlockMatrix }

object Utils {
  def toBlockMatrix(x: RDD[Vector], rowsPerBlock: Int = 1024, colsPerBlock: Int = 1024): BlockMatrix = {
    new IndexedRowMatrix(
      x.zipWithIndex().map({ xi => IndexedRow(xi._2, xi._1) })
    ).toBlockMatrix(rowsPerBlock, colsPerBlock)
  }

  def toBreeze(v: Vector): BV[Double] = v match {
    case DenseVector(values) => new BDV[Double](values)
    case SparseVector(size, indices, values) => new BSV[Double](indices, values, size)
  }

  def toSpark(bv: BV[Double]): Vector = bv match {
    case v: BDV[Double] => new DenseVector(v.toArray)
    case v: BSV[Double] => new SparseVector(v.length, v.index, v.data)
  }

  def cartesian[A](xs: Traversable[Traversable[A]]): Seq[Seq[A]] =
    xs.foldLeft(Seq(Seq.empty[A])) { (x, y) => for (a <- x; b <- y) yield a :+ b }

} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:29,代码来源:Utils.scala

示例8: MapperSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.{ SparkSession, Row }
import org.apache.spark.mllib.linalg.distributed.{ CoordinateMatrix, IndexedRow, IndexedRowMatrix, MatrixEntry }
import org.apache.spark.mllib.linalg.{ DenseVector, Vector, Vectors }

class MapperSpec extends FunSuite with SharedSparkContext {

  test("simple mapper on noisy circle") {
    val spark = SparkSession.builder().getOrCreate()

    val fileLoc = getClass.getClassLoader.getResource("circles.csv").getPath()
    val circle = spark.read
      .option("header", false)
      .option("inferSchema", true)
      .csv(fileLoc)

    assert(circle.count == 400)

    val indexedRDD = circle.rdd.zipWithIndex.map {
      case (Row(x: Double, y: Double), i) =>
        val v: Vector = new DenseVector(Array(x, y))
        IndexedRow(i, v)
    }
    val matrix = new IndexedRowMatrix(indexedRDD)
    val similarities = matrix.toCoordinateMatrix
      .transpose()
      .toIndexedRowMatrix()
      .columnSimilarities()
    val distances = new CoordinateMatrix(
      similarities
        .entries
        .map((entry) => new MatrixEntry(entry.i, entry.j, 1 - entry.value))
    )

    val filtration = new IndexedRowMatrix(indexedRDD.map({ row =>
      IndexedRow(row.index, new DenseVector(Array(
        Vectors.norm(row.vector, 2)
      )))
    }))

    //Mapper.writeAsJson(graph, "mapper-vis/circle-graph.json")
    val graph = Mapper.mapper(sc, distances, filtration, 100, 2.0)

    assert(graph.vertices.count == 160)
    assert(graph.edges.count == 327)
  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:51,代码来源:MapperSpec.scala

示例9: CoverSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.linalg.distributed.{ IndexedRow, IndexedRowMatrix }

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext

class CoverSpec extends FunSuite with SharedSparkContext {
  test("cover") {
    val rdd = sc.parallelize((0 to 10).toSeq)
    val filtration = new IndexedRowMatrix(
      rdd.map({ x =>
        new IndexedRow(x, new DenseVector(Array(x * 2, scala.math.sin(x))))
      })
    )

    val cover = new Cover(filtration, 4, 0.5)

    assert(cover.numCoverSegments == 16)
    assert(cover.filterRanges(0) == NumericBoundary(0.0, 20.0))
    assert(cover.filterRanges(1).lower >= -1.0)
    assert(cover.filterRanges(1).upper <= 1.0)

    assert(cover.coverAssignment(new DenseVector(Array(8.33, 0.5))) == List(CoverSegmentKey(6), CoverSegmentKey(7)))

  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:29,代码来源:CoverSpec.scala

示例10: LabelPropagationClassifierTest

//设置package包名称以及导入依赖的类
package cz.cvut.fit.palicand.vocloud.ssl.ml

import com.holdenkarau.spark.testing._
import cz.cvut.fit.palicand.vocloud.ssl.ml.classification.LabelPropagationClassifier
import org.apache.spark.mllib.linalg.distributed.IndexedRow
import org.apache.spark.mllib.linalg.{DenseVector, VectorUDT, Vectors}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}
import org.scalatest.{Matchers, FlatSpec}


class LabelPropagationClassifierTest extends FlatSpec with SharedSparkContext with Matchers {

  behavior of "LabelPropagationTest"

  it should "train" in {
    val sqlContext = new SQLContext(sc)
    val rdd: RDD[Row] = sc.parallelize(Row(0L, 0.0, Vectors.dense(0.0, 1.0)) :: Row(1L, 1.0, Vectors.dense(1.0, 0.0)) :: Row(2L, 2.0, Vectors.dense(0.0, 0.0)) :: Nil)
    val df = sqlContext.createDataFrame(rdd, StructType(List(StructField("rowNo", LongType), StructField("label", DoubleType), StructField("features", new VectorUDT))))
    val clf = new LabelPropagationClassifier()
    clf.setKNeighbours(2)
    clf.setLabelCol("label")
    clf.setFeaturesCol("features")
    val model = clf.fit(df)
    model.labelWeights.toIndexedRowMatrix().rows.collect() should be(createIndexedRow(0, 1.0, 0.0) ::
      createIndexedRow(1, 0.0, 1.0) :: createIndexedRow(2, 1.0, 0) :: Nil)
  }

  def createIndexedRow(i: Int, vals: Double*): IndexedRow = {
    new IndexedRow(i, new DenseVector(vals.toArray))
  }
} 
开发者ID:palicand,项目名称:graph_ssl,代码行数:34,代码来源:LabelPropagationClassifierTest.scala

示例11: DataGenerator

//设置package包名称以及导入依赖的类
package com.bistel.wordcount.logisticRegression

import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.regression.LabeledPoint

import scala.collection.mutable.ArrayBuffer
import scala.util.Random



class DataGenerator(numTasks: Int)(implicit sc: SparkContext) {
  final val SIGMA = 2.0
  private def f(mean: Double): Double =
    mean + SIGMA *( Random.nextDouble - 0.5)

  def apply(half: Int, mu: Double): Array[LabeledPoint] = {
    val trainObs =
      ArrayBuffer.fill(half)(Array[Double](f(1.0),f(1.0),f(1.0))) ++
        ArrayBuffer.fill(half)(Array[Double](f(mu),f(mu),f(mu)))

    val labels = ArrayBuffer.fill(half)(0.0) ++
      ArrayBuffer.fill(half)(1.0)
    labels.zip(trainObs).map{ case (y, ar) =>
      LabeledPoint(y, new DenseVector(ar)) }.toArray
  }
} 
开发者ID:jacob119,项目名称:WordCount,代码行数:28,代码来源:DataGenerator.scala


注:本文中的org.apache.spark.mllib.linalg.DenseVector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。