本文整理汇总了Scala中org.apache.spark.ml.evaluation.RegressionEvaluator类的典型用法代码示例。如果您正苦于以下问题:Scala RegressionEvaluator类的具体用法?Scala RegressionEvaluator怎么用?Scala RegressionEvaluator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RegressionEvaluator类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ALSModeling
//设置package包名称以及导入依赖的类
package com.spark.recommendation
import java.util
import com.spark.recommendation.FeatureExtraction.{Rating, parseRating}
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.sql.{Row, DataFrame, DataFrameWriter}
object ALSModeling {
def createALSModel() {
val ratings = FeatureExtraction.getFeatures();
val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))
println(training.first())
// Build the recommendation model using ALS on the training data
val als = new ALS()
.setMaxIter(5)
.setRegParam(0.01)
.setUserCol("userId")
.setItemCol("movieId")
.setRatingCol("rating")
val model = als.fit(training)
println(model.userFactors.count())
println(model.itemFactors.count())
val predictions = model.transform(test)
println(predictions.printSchema())
val evaluator = new RegressionEvaluator()
.setMetricName("rmse")
.setLabelCol("rating")
.setPredictionCol("prediction")
val rmse = evaluator.evaluate(predictions)
println(s"Root-mean-square error = $rmse")
}
def main(args: Array[String]) {
createALSModel()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:48,代码来源:ALSModeling.scala
示例2: GLMRegression
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.regression
import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.GeneralizedLinearRegression
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
object GLMRegression extends BenchmarkAlgorithm with TestFromTraining with
TrainingSetFromTransformer with ScoringWithEvaluator {
override protected def initialData(ctx: MLBenchContext) = {
import ctx.params._
DataGenerator.generateContinuousFeatures(
ctx.sqlContext,
numExamples,
ctx.seed(),
numPartitions,
numFeatures)
}
override protected def trueModel(ctx: MLBenchContext): Transformer = {
import ctx.params._
val rng = ctx.newGenerator()
val coefficients =
Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1))
// Small intercept to prevent some skew in the data.
val intercept = 0.01 * (2 * rng.nextDouble - 1)
val m = ModelBuilder.newGLR(coefficients, intercept)
m.set(m.link, link.get)
m.set(m.family, family.get)
m
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new GeneralizedLinearRegression()
.setLink(link)
.setFamily(family)
.setRegParam(regParam)
.setMaxIter(maxIter)
.setTol(tol)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator =
new RegressionEvaluator()
}
示例3: LinearRegression
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.regression
import org.apache.spark.ml
import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
object LinearRegression extends BenchmarkAlgorithm with TestFromTraining with
TrainingSetFromTransformer with ScoringWithEvaluator {
override protected def initialData(ctx: MLBenchContext) = {
import ctx.params._
DataGenerator.generateContinuousFeatures(
ctx.sqlContext,
numExamples,
ctx.seed(),
numPartitions,
numFeatures)
}
override protected def trueModel(ctx: MLBenchContext): Transformer = {
val rng = ctx.newGenerator()
val coefficients =
Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1))
// Small intercept to prevent some skew in the data.
val intercept = 0.01 * (2 * rng.nextDouble - 1)
ModelBuilder.newLinearRegressionModel(coefficients, intercept)
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.regression.LinearRegression()
.setSolver("l-bfgs")
.setRegParam(regParam)
.setMaxIter(maxIter)
.setTol(tol)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator =
new RegressionEvaluator()
}
示例4: ALS
//设置package包名称以及导入依赖的类
package com.databricks.spark.sql.perf.mllib.recommendation
import org.apache.spark.ml
import org.apache.spark.ml.evaluation.{RegressionEvaluator, Evaluator}
import org.apache.spark.ml.{Transformer, Estimator}
import org.apache.spark.sql._
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
import com.databricks.spark.sql.perf.mllib.{ScoringWithEvaluator, BenchmarkAlgorithm, MLBenchContext}
object ALS extends BenchmarkAlgorithm with ScoringWithEvaluator {
override def trainingDataSet(ctx: MLBenchContext): DataFrame = {
import ctx.params._
DataGenerator.generateRatings(
ctx.sqlContext,
numUsers,
numItems,
numExamples,
numTestExamples,
implicitPrefs = false,
numPartitions,
ctx.seed())._1
}
override def testDataSet(ctx: MLBenchContext): DataFrame = {
import ctx.params._
DataGenerator.generateRatings(
ctx.sqlContext,
numUsers,
numItems,
numExamples,
numTestExamples,
implicitPrefs = false,
numPartitions,
ctx.seed())._2
}
override def getEstimator(ctx: MLBenchContext): Estimator[_] = {
import ctx.params._
new ml.recommendation.ALS()
.setSeed(ctx.seed())
.setRegParam(regParam)
.setNumBlocks(numPartitions)
.setRank(rank)
.setMaxIter(maxIter)
}
override protected def evaluator(ctx: MLBenchContext): Evaluator = {
new RegressionEvaluator().setLabelCol("rating")
}
}
示例5: SitelinkEntry
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.ml.regression.RandomForestRegressor
import org.apache.spark.sql.{DataFrame, SparkSession}
case class SitelinkEntry(id: String, site: String, title: String)
case class PagecountEntry(site: String, title: String, pageviews: Double)
case class SitelinkPageviewsEntry(id: String, site: String, title: String, pageviews: Double)
case class RankedEntry(id: String, site: String, title: String, pageviews: Double, rank: Double)
object Utils {
val FEATURES = "features"
val LABEL = "label"
val PREDICTION = "prediction"
val EXISTS = 1.0
val NOT_EXISTS = 0.0
val REGRESSOR: RandomForestRegressor = new RandomForestRegressor()
.setLabelCol(LABEL)
.setFeaturesCol(FEATURES)
val EVALUATOR: RegressionEvaluator = new RegressionEvaluator()
.setLabelCol(LABEL)
.setPredictionCol(PREDICTION)
.setMetricName("rmse")
def getWorkData(spark: SparkSession, data: DataFrame, target: String, exists: Boolean = true): DataFrame = {
val workData: DataFrame = data.filter(row =>
row(row.fieldIndex("exists_" + target)) == (if (exists) EXISTS else NOT_EXISTS))
import spark.implicits._
val labeledData = workData.map(row =>
(
row.getString(row.fieldIndex("id")),
row.getDouble(row.fieldIndex("rank_" + target)),
new DenseVector((
(1 until row.fieldIndex("pageviews_" + target)).map(row.getDouble) ++
(row.fieldIndex("exists_" + target) + 1 until row.length).map(row.getDouble)
).toArray)
)
).rdd
spark.createDataFrame(labeledData).toDF("id", LABEL, FEATURES)
}
}