本文整理汇总了Scala中org.apache.spark.mllib.linalg.VectorUDT类的典型用法代码示例。如果您正苦于以下问题:Scala VectorUDT类的具体用法?Scala VectorUDT怎么用?Scala VectorUDT使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了VectorUDT类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: SimpleApp
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.LDA
import org.apache.spark.mllib.linalg.{VectorUDT, Vectors}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{StructField, StructType}
object SimpleApp {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Simple Application").set("spark.ui.enabled", "false")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
// Loads data
val rowRDD = sc.textFile("/tmp/lda_data.txt").filter(_.nonEmpty)
.map(_.split(" ").map(_.toDouble)).map(Vectors.dense).map(Row(_))
val schema = StructType(Array(StructField("name", new VectorUDT, false)))
val dataset = sqlContext.createDataFrame(rowRDD, schema)
dataset.show()
val lda = new LDA()
.setK(10)
.setMaxIter(10)
.setFeaturesCol("name")
val model = lda.fit(dataset)
val transformed = model.transform(dataset)
val ll = model.logLikelihood(dataset)
val lp = model.logPerplexity(dataset)
// describeTopics
val topics = model.describeTopics(3)
// Shows the result
topics.show(false)
transformed.show(false)
}
}
示例2: StructTypeToSpark
//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter
import com.truecar.mleap.runtime.types
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._
case class StructTypeToSpark(schema: types.StructType) {
def toSpark: StructType = {
val fields = schema.fields.map {
field =>
field.dataType match {
case types.DoubleType => StructField(field.name, DoubleType)
case types.StringType => StructField(field.name, StringType)
case types.VectorType => StructField(field.name, new VectorUDT())
case types.StringArrayType => StructField(field.name, new ArrayType(StringType, containsNull = false))
}
}
StructType(fields)
}
}
示例3: StructTypeToMleap
//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter
import com.truecar.mleap.runtime.types
import org.apache.spark.SparkException
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._
case class StructTypeToMleap(schema: StructType) {
def toMleap: types.StructType = {
val leapFields = schema.fields.map {
field =>
val sparkType = field.dataType
val sparkTypeName = sparkType.typeName
val dataType = sparkType match {
case _: NumericType | BooleanType => types.DoubleType
case _: StringType => types.StringType
case _: VectorUDT => types.VectorType
case dataType: ArrayType if dataType.elementType == StringType => types.StringArrayType
case _ => throw new SparkException(s"unsupported MLeap datatype: $sparkTypeName")
}
types.StructField(field.name, dataType)
}
types.StructType(leapFields)
}
}
示例4: LabelPropagationClassifierTest
//设置package包名称以及导入依赖的类
package cz.cvut.fit.palicand.vocloud.ssl.ml
import com.holdenkarau.spark.testing._
import cz.cvut.fit.palicand.vocloud.ssl.ml.classification.LabelPropagationClassifier
import org.apache.spark.mllib.linalg.distributed.IndexedRow
import org.apache.spark.mllib.linalg.{DenseVector, VectorUDT, Vectors}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}
import org.scalatest.{Matchers, FlatSpec}
class LabelPropagationClassifierTest extends FlatSpec with SharedSparkContext with Matchers {
behavior of "LabelPropagationTest"
it should "train" in {
val sqlContext = new SQLContext(sc)
val rdd: RDD[Row] = sc.parallelize(Row(0L, 0.0, Vectors.dense(0.0, 1.0)) :: Row(1L, 1.0, Vectors.dense(1.0, 0.0)) :: Row(2L, 2.0, Vectors.dense(0.0, 0.0)) :: Nil)
val df = sqlContext.createDataFrame(rdd, StructType(List(StructField("rowNo", LongType), StructField("label", DoubleType), StructField("features", new VectorUDT))))
val clf = new LabelPropagationClassifier()
clf.setKNeighbours(2)
clf.setLabelCol("label")
clf.setFeaturesCol("features")
val model = clf.fit(df)
model.labelWeights.toIndexedRowMatrix().rows.collect() should be(createIndexedRow(0, 1.0, 0.0) ::
createIndexedRow(1, 0.0, 1.0) :: createIndexedRow(2, 1.0, 0) :: Nil)
}
def createIndexedRow(i: Int, vals: Double*): IndexedRow = {
new IndexedRow(i, new DenseVector(vals.toArray))
}
}