当前位置: 首页>>代码示例>>Scala>>正文


Scala VectorUDT类代码示例

本文整理汇总了Scala中org.apache.spark.mllib.linalg.VectorUDT的典型用法代码示例。如果您正苦于以下问题:Scala VectorUDT类的具体用法?Scala VectorUDT怎么用?Scala VectorUDT使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了VectorUDT类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: SimpleApp

//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf

import org.apache.spark.ml.clustering.LDA
import org.apache.spark.mllib.linalg.{VectorUDT, Vectors}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{StructField, StructType}


object SimpleApp {
  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("Simple Application").set("spark.ui.enabled", "false")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    // Loads data
    val rowRDD = sc.textFile("/tmp/lda_data.txt").filter(_.nonEmpty)
      .map(_.split(" ").map(_.toDouble)).map(Vectors.dense).map(Row(_))
    val schema = StructType(Array(StructField("name", new VectorUDT, false)))
    val dataset = sqlContext.createDataFrame(rowRDD, schema)
    dataset.show()

    val lda = new LDA()
      .setK(10)
      .setMaxIter(10)
      .setFeaturesCol("name")
    val model = lda.fit(dataset)
    val transformed = model.transform(dataset)

    val ll = model.logLikelihood(dataset)
    val lp = model.logPerplexity(dataset)

    // describeTopics
    val topics = model.describeTopics(3)

    // Shows the result
    topics.show(false)
    transformed.show(false)
  }
} 
开发者ID:mykumar,项目名称:SparkScalaInternalExperiements,代码行数:41,代码来源:SimpleApp.scala

示例2: StructTypeToSpark

//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter

import com.truecar.mleap.runtime.types
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._


case class StructTypeToSpark(schema: types.StructType) {
  def toSpark: StructType = {
    val fields = schema.fields.map {
      field =>
        field.dataType match {
          case types.DoubleType => StructField(field.name, DoubleType)
          case types.StringType => StructField(field.name, StringType)
          case types.VectorType => StructField(field.name, new VectorUDT())
          case types.StringArrayType => StructField(field.name, new ArrayType(StringType, containsNull = false))
        }
    }

    StructType(fields)
  }
} 
开发者ID:TrueCar,项目名称:mleap,代码行数:23,代码来源:StructTypeToSpark.scala

示例3: StructTypeToMleap

//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter

import com.truecar.mleap.runtime.types
import org.apache.spark.SparkException
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._


case class StructTypeToMleap(schema: StructType) {
  def toMleap: types.StructType = {
    val leapFields = schema.fields.map {
      field =>
        val sparkType = field.dataType
        val sparkTypeName = sparkType.typeName
        val dataType = sparkType match {
          case _: NumericType | BooleanType => types.DoubleType
          case _: StringType => types.StringType
          case _: VectorUDT => types.VectorType
          case dataType: ArrayType if dataType.elementType == StringType => types.StringArrayType
          case _ => throw new SparkException(s"unsupported MLeap datatype: $sparkTypeName")
        }

        types.StructField(field.name, dataType)
    }
    types.StructType(leapFields)
  }
} 
开发者ID:TrueCar,项目名称:mleap,代码行数:28,代码来源:StructTypeToMleap.scala

示例4: LabelPropagationClassifierTest

//设置package包名称以及导入依赖的类
package cz.cvut.fit.palicand.vocloud.ssl.ml

import com.holdenkarau.spark.testing._
import cz.cvut.fit.palicand.vocloud.ssl.ml.classification.LabelPropagationClassifier
import org.apache.spark.mllib.linalg.distributed.IndexedRow
import org.apache.spark.mllib.linalg.{DenseVector, VectorUDT, Vectors}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}
import org.scalatest.{Matchers, FlatSpec}


class LabelPropagationClassifierTest extends FlatSpec with SharedSparkContext with Matchers {

  behavior of "LabelPropagationTest"

  it should "train" in {
    val sqlContext = new SQLContext(sc)
    val rdd: RDD[Row] = sc.parallelize(Row(0L, 0.0, Vectors.dense(0.0, 1.0)) :: Row(1L, 1.0, Vectors.dense(1.0, 0.0)) :: Row(2L, 2.0, Vectors.dense(0.0, 0.0)) :: Nil)
    val df = sqlContext.createDataFrame(rdd, StructType(List(StructField("rowNo", LongType), StructField("label", DoubleType), StructField("features", new VectorUDT))))
    val clf = new LabelPropagationClassifier()
    clf.setKNeighbours(2)
    clf.setLabelCol("label")
    clf.setFeaturesCol("features")
    val model = clf.fit(df)
    model.labelWeights.toIndexedRowMatrix().rows.collect() should be(createIndexedRow(0, 1.0, 0.0) ::
      createIndexedRow(1, 0.0, 1.0) :: createIndexedRow(2, 1.0, 0) :: Nil)
  }

  def createIndexedRow(i: Int, vals: Double*): IndexedRow = {
    new IndexedRow(i, new DenseVector(vals.toArray))
  }
} 
开发者ID:palicand,项目名称:graph_ssl,代码行数:34,代码来源:LabelPropagationClassifierTest.scala


注:本文中的org.apache.spark.mllib.linalg.VectorUDT类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。