本文整理汇总了Scala中org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD类的典型用法代码示例。如果您正苦于以下问题:Scala StreamingLinearRegressionWithSGD类的具体用法?Scala StreamingLinearRegressionWithSGD怎么用?Scala StreamingLinearRegressionWithSGD使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StreamingLinearRegressionWithSGD类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: StreamingSimpleModel
//设置package包名称以及导入依赖的类
package com.bigchange.streaming
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object StreamingSimpleModel {
def main(args: Array[String]) {
val ssc = new StreamingContext("local","test",Seconds(10))
val stream = ssc.socketTextStream("localhost",9999)
val numberFeatures = 100
val zeroVector = DenseVector.zeros[Double](numberFeatures)
val model = new StreamingLinearRegressionWithSGD()
.setInitialWeights(Vectors.dense(zeroVector.data))
.setNumIterations(1)
.setStepSize(0.01)
val labeledStream = stream.map { event =>
val split = event.split("\t")
val y = split(0).toDouble
val features = split(1).split(",").map(_.toDouble)
LabeledPoint(label = y, features = Vectors.dense(features))
}
model.trainOn(labeledStream)
// ??DStream?????
val predictAndTrue = labeledStream.transform { rdd =>
val latestModel = model.latestModel()
rdd.map { point =>
val predict = latestModel.predict(point.features)
predict - point.label
}
}
// ??MSE
predictAndTrue.foreachRDD { rdd =>
val mse = rdd.map(x => x * x).mean()
val rmse = math.sqrt(mse)
println(s"current batch, MSE: $mse, RMSE:$rmse")
}
ssc.start()
ssc.awaitTermination()
}
}
示例2: LinearRegressionApp
//设置package包名称以及导入依赖的类
package org.apress.prospark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
object LinearRegressionApp {
def main(args: Array[String]) {
if (args.length != 4) {
System.err.println(
"Usage: LinearRegressionApp <appname> <batchInterval> <hostname> <port>")
System.exit(1)
}
val Seq(appName, batchInterval, hostname, port) = args.toSeq
val conf = new SparkConf()
.setAppName(appName)
.setJars(SparkContext.jarOfClass(this.getClass).toSeq)
val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
val substream = ssc.socketTextStream(hostname, port.toInt)
.filter(!_.contains("NaN"))
.map(_.split(" "))
.filter(f => f(1) != "0")
val datastream = substream.map(f => Array(f(2).toDouble, f(3).toDouble, f(4).toDouble, f(5).toDouble, f(6).toDouble))
.map(f => LabeledPoint(f(0), Vectors.dense(f.slice(1, 5))))
val test = datastream.transform(rdd => rdd.randomSplit(Array(0.3, 0.7))(0))
val train = datastream.transformWith(test, (r1: RDD[LabeledPoint], r2: RDD[LabeledPoint]) => r1.subtract(r2)).cache()
val model = new StreamingLinearRegressionWithSGD()
.setInitialWeights(Vectors.zeros(4))
.setStepSize(0.0001)
.setNumIterations(1)
model.trainOn(train)
model.predictOnValues(test.map(v => (v.label, v.features))).foreachRDD(rdd => println("MSE: %f".format(rdd
.map(v => math.pow((v._1 - v._2), 2)).mean())))
ssc.start()
ssc.awaitTermination()
}
}