本文整理汇总了Scala中org.apache.spark.ml.regression.LinearRegression类的典型用法代码示例。如果您正苦于以下问题:Scala LinearRegression类的具体用法?Scala LinearRegression怎么用?Scala LinearRegression使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LinearRegression类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: LinearRegressionPipeline
//设置package包名称以及导入依赖的类
package org.sparksamples.regression.bikesharing
import org.apache.log4j.Logger
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.sql.{DataFrame, SparkSession}
object LinearRegressionPipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def linearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = {
val lr = new LinearRegression()
.setFeaturesCol("features")
.setLabelCol("label")
.setRegParam(0.1)
.setElasticNetParam(1.0)
.setMaxIter(10)
val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr))
val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345)
val model = pipeline.fit(training)
val fullPredictions = model.transform(test).cache()
val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
val labels = fullPredictions.select("label").rdd.map(_.getDouble(0))
val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError
println(s" Root mean squared error (RMSE): $RMSE")
}
def linearRegressionWithSVMFormat(spark: SparkSession) = {
// Load training data
val training = spark.read.format("libsvm")
.load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt")
val lr = new LinearRegression()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.8)
// Fit the model
val lrModel = lr.fit(training)
// Print the coefficients and intercept for linear regression
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// Summarize the model over the training set and print out some metrics
val trainingSummary = lrModel.summary
println(s"numIterations: ${trainingSummary.totalIterations}")
println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
trainingSummary.residuals.show()
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
println(s"r2: ${trainingSummary.r2}")
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:61,代码来源:LinearRegressionPipeline.scala
示例2: EvaluateModel
//设置package包名称以及导入依赖的类
package songs
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.slf4j.LoggerFactory
object EvaluateModel {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(Config.appName)
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val logger = LoggerFactory.getLogger(getClass.getName)
logger.info(s"Loading Linear Regression Model from ${Config.modelOut}")
val model = LinearRegressionModel.load(Config.modelOut)
logger.info("Loading datasets")
val datasets = SongML.loadModelData(sqlContext = sqlContext)
val pipelineModel = SongML.transformPipeline.fit(datasets.training)
val testData = pipelineModel.transform(datasets.test).select(SongML.labelColumn,SongML.featuresColumn)
logger.info("Calculating Regression Metrics")
val testPredictions = model.transform(testData)
.select(SongML.labelColumn,SongML.predictionColumn)
.map(r => (r.getAs[Double](SongML.predictionColumn), r.getAs[Double](SongML.labelColumn)))
val rm = new RegressionMetrics(testPredictions)
logger.info(SongML.printStats(model,rm,"Testing"))
logger.info("Exiting")
sc.stop()
}
}
示例3: EjecutaRegresor
//设置package包名称以及导入依赖的类
package es.upm.ging.EjecutaRegresor
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.apache.spark.sql.SparkSession
object EjecutaRegresor {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().getOrCreate()
spark.sparkContext.setLogLevel("OFF")
val carga = new Carga(spark)
// midf = carga.json()
val midf = carga.mysql()
midf.cache()
// Imprimimos el esquema detectado
midf.printSchema()
// Construimos el modelo
val iteraciones = 100
val model = LinearRegression.train(midf, iteraciones)
// Evaluar el modelo para el dataset de entrenamiento
val valoresYPrediccion = midf.map { punto =>
val prediccion = model.predict(punto.features)
(punto.label, prediccion)
}
val MSE = valoresYPrediccion.map{case(v, p) => math.pow((v - p), 2)}.mean()
println("Mean Squared Error del Entrenador = " + MSE)
// Carga y guarda
model.save(sc, "MiModeloLinReg")
val MiModelo = LinearRegressionModel.load(sc, "MiModeloLinReg")
println(valuesAndPreds.collect)
}
}
示例4:
//设置package包名称以及导入依赖的类
import org.apache.spark.ml.regression.LinearRegression
import com.redislabs.client.redisml.MLClient
import redis.clients.jedis.{Jedis, _}
// Load training data and train
val training = spark.read.format("libsvm").load("data/mllib/sample_linear_regression_data.txt")
val lr = new LinearRegression().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8)
val lrModel = lr.fit(training)
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// Connect to Redis
val jedis = new Jedis("localhost")
// Load model to Redis
val cmd = "my_lr_model" +: lrModel.intercept.toString +: lrModel.coefficients.toArray.mkString(",").split(",")
jedis.getClient.sendCommand(MLClient.ModuleCommand.LINREG_SET, cmd: _*)
jedis.getClient().getStatusCodeReply
// Perform prediction with Redis
val cmd = Array("my_lr_model", "1", "2", "5")
jedis.getClient.sendCommand(MLClient.ModuleCommand.LINREG_PREDICT, cmd: _*)
jedis.getClient().getStatusCodeReply
示例5: LinearRegressionJob
//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.sql.SparkSession
object LinearRegressionJob extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(savePath: String, datasetPath: String): Map[String, Any] = {
val df = session.read.format("libsvm").load(datasetPath)
val lr = new LinearRegression()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.8)
val pipeline = new Pipeline().setStages(Array(lr))
val model = pipeline.fit(df)
model.write.overwrite().save(savePath)
Map.empty
}
def serve(modelPath: String, features: List[List[Double]]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(modelPath)
val data = LocalData(LocalDataColumn("features", features.map(_.toArray).map(Vectors.dense)))
val result: LocalData = pipeline.transform(data)
Map("result" -> result.select("prediction").toMapList)
}
}