本文整理汇总了Scala中org.apache.spark.ml.PipelineModel类的典型用法代码示例。如果您正苦于以下问题:Scala PipelineModel类的具体用法?Scala PipelineModel怎么用?Scala PipelineModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PipelineModel类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TrainModel
//设置package包名称以及导入依赖的类
package songs
import org.apache.spark.ml.PipelineModel
import org.apache.spark.ml.regression.LinearRegressionModel
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.slf4j.LoggerFactory
object TrainModel {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(Config.appName)
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val logger = LoggerFactory.getLogger(getClass.getName)
logger.info("Loading datasets from parquet format")
val data = SongML.loadModelData(sqlContext = sqlContext)
logger.info("Showing summary stats for training data")
val summary = data.training.describe(SongML.allColumns:_*)
summary.show(1000)
logger.info("Training Linear Regression Model")
val startTime = System.nanoTime()
val pipeline = SongML.trainingPipeline.fit(data.training)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
logger.info(s"Training time: $elapsedTime seconds")
logger.info("Calculating Regression Metrics")
val bestModel = pipeline.bestModel.asInstanceOf[PipelineModel]
val testPredictions: RDD[(Double,Double)] = bestModel.transform(data.training)
.select(SongML.predictionColumn, SongML.labelColumn)
.map(r => (r.getAs[Double](SongML.predictionColumn), r.getAs[Double](SongML.labelColumn)))
val rm = new RegressionMetrics(testPredictions)
val model = bestModel.stages(SongML.lrStages.indices.last).asInstanceOf[LinearRegressionModel]
logger.info(s"Saving model to ${Config.modelOut}")
model.write.overwrite().save(Config.modelOut)
logger.info(SongML.printStats(model,rm,"Training"))
logger.info("Exiting")
sc.stop()
}
}
示例2: TwitterFireRepl
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts
import org.apache.spark.Logging
import org.apache.spark.ml.PipelineModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.functions._
import scala.io.Source
object TwitterFireRepl extends Script with Logging {
override def main(args: Array[String]) {
super.main(args)
import sqlc.implicits._
println("Loading fire model...")
sc // dummy call to init the context
val model = PipelineModel.load("/tw/fire/models/fire.model")
println("Done. Write the input as <temperature>,<pressure>,<humidity> and press <enter>")
for (input <- Source.stdin.getLines) {
val t = udf { (input: String) =>
val values = input.split(",").map(_.toDouble)
Vectors.dense(values)
}
val data = sc
.parallelize(Seq(input), 1)
.toDF("kb_input")
.withColumn("raw_input", t(col("kb_input")))
model
.transform(data)
.show(truncate = false)
}
}
}
示例3: TwitterEmoRepl
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts
import org.apache.spark.Logging
import org.apache.spark.ml.PipelineModel
import scala.io.Source
object TwitterEmoRepl extends Script with Logging {
override def main(args: Array[String]) {
super.main(args)
import sqlc.implicits._
println("Loading emo model...")
sc // dummy call to init the context
val model = PipelineModel.load("/tw/sentiment/models/emo.model")
println("Done. Write the sentence you want analysed and press <enter>")
for (input <- Source.stdin.getLines) {
val data = sc
.parallelize(Seq(input), 1)
.toDF("raw_text")
model
.transform(data)
.show(truncate = false)
}
}
}
示例4: Sentiment140Repl
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts
import org.apache.spark.Logging
import org.apache.spark.ml.PipelineModel
import scala.io.Source
object Sentiment140Repl extends Script with Logging {
override def main(args: Array[String]) {
super.main(args)
import sqlc.implicits._
println("Loading 140 model...")
sc // dummy call to init the context
val model = PipelineModel.load("tw/sentiment/models/140.model")
println("Done. Write the sentence you want analysed and press <enter>")
for (input <- Source.stdin.getLines) {
val data = sc
.parallelize(Seq(input), 1)
.toDF("raw_text")
model.transform(data)
.select("probability", "prediction")
.foreach(println)
}
}
}
示例5: BaseTransformerConverter
//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter.runtime
import com.truecar.mleap.runtime.transformer
import org.apache.spark.ml.PipelineModel
import org.apache.spark.ml.classification.RandomForestClassificationModel
import org.apache.spark.ml.feature.{IndexToString, StandardScalerModel, StringIndexerModel, VectorAssembler}
import org.apache.spark.ml.mleap.classification.SVMModel
import org.apache.spark.ml.mleap.converter.runtime.classification.{RandomForestClassificationModelToMleap, SupportVectorMachineModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.feature.{IndexToStringToMleap, StandardScalerModelToMleap, StringIndexerModelToMleap, VectorAssemblerModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.regression.{LinearRegressionModelToMleap, RandomForestRegressionModelToMleap}
import org.apache.spark.ml.regression.{LinearRegressionModel, RandomForestRegressionModel}
trait BaseTransformerConverter extends SparkTransformerConverter {
// regression
implicit val mleapLinearRegressionModelToMleap: TransformerToMleap[LinearRegressionModel, transformer.LinearRegressionModel] =
addConverter(LinearRegressionModelToMleap)
implicit val mleapRandomForestRegressionModelToMleap: TransformerToMleap[RandomForestRegressionModel, transformer.RandomForestRegressionModel] =
addConverter(RandomForestRegressionModelToMleap)
// classification
implicit val mleapRandomForestClassificationModelToMleap: TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] =
addConverter(RandomForestClassificationModelToMleap)
implicit val mleapSupportVectorMachineModelToMleap: TransformerToMleap[SVMModel, transformer.SupportVectorMachineModel] =
addConverter(SupportVectorMachineModelToMleap)
//feature
implicit val mleapIndexToStringToMleap: TransformerToMleap[IndexToString, transformer.ReverseStringIndexerModel] =
addConverter(IndexToStringToMleap)
implicit val mleapStandardScalerModelToMleap: TransformerToMleap[StandardScalerModel, transformer.StandardScalerModel] =
addConverter(StandardScalerModelToMleap)
implicit val mleapStringIndexerModelToMleap: TransformerToMleap[StringIndexerModel, transformer.StringIndexerModel] =
addConverter(StringIndexerModelToMleap)
implicit val mleapVectorAssemblerToMleap: TransformerToMleap[VectorAssembler, transformer.VectorAssemblerModel] =
addConverter(VectorAssemblerModelToMleap)
// other
implicit val mleapPipelineModelToMleap: TransformerToMleap[PipelineModel, transformer.PipelineModel] =
addConverter(PipelineModelToMleap(this))
}
object BaseTransformerConverter extends BaseTransformerConverter
示例6: PipelineClassifier
//设置package包名称以及导入依赖的类
import org.apache.spark.ml.PipelineModel
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
class PipelineClassifier(val pipeline: PipelineModel) extends UnifiedClassifier with Serializable {
override def predict[T](data: DataFrame): RDD[(T, Double)] = {
val singletonDF = ModelHelpers.addMetadata(data)
val predictions = pipeline.transform(singletonDF)
predictions.map(row => {
val firstClass = row.getAs[DenseVector](DataFrameColumns.RAW_PREDICTION)(1)
val zeroClass = row.getAs[DenseVector](DataFrameColumns.RAW_PREDICTION)(0)
val prob = firstClass.toDouble / (firstClass.toDouble + zeroClass.toDouble)
(row.getAs[T](DataFrameColumns.KEY), prob)
})
}
}
示例7: PredictNewsClassDemo
//设置package包名称以及导入依赖的类
package applications.mining
import algorithms.evaluation.MultiClassEvaluation
import config.paramconf.ClassParams
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.PipelineModel
import org.apache.spark.sql.{Row, SparkSession}
object PredictNewsClassDemo extends Serializable {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.WARN)
val spark = SparkSession
.builder
.master("local[2]")
.appName("predict news multi class demo")
.getOrCreate()
val args = Array("ckooc-ml/data/classnews/predict", "lr")
val filePath = args(0)
val modelType = args(1)
var modelPath = ""
val params = new ClassParams
modelType match {
case "lr" => modelPath = params.LRModelPath
case "dt" => modelPath = params.DTModelPath
case _ =>
println("???????")
System.exit(1)
}
import spark.implicits._
val data = spark.sparkContext.textFile(filePath).flatMap { line =>
val tokens: Array[String] = line.split("\u00ef")
if (tokens.length > 3) Some((tokens(0), tokens(1), tokens(2), tokens(3))) else None
}.toDF("label", "title", "time", "content")
data.persist()
//???????????
val model = PipelineModel.load(modelPath)
val predictions = model.transform(data)
//=== ????
val resultRDD = predictions.select("prediction", "indexedLabel").rdd.map { case Row(prediction: Double, label: Double) => (prediction, label) }
val (precision, recall, f1) = MultiClassEvaluation.multiClassEvaluate(resultRDD)
println("\n\n========= ???? ==========")
println(s"\n??????$precision")
println(s"??????$recall")
println(s"F1??$f1")
// predictions.select("label", "predictedLabel", "content").show(100, truncate = false)
data.unpersist()
spark.stop()
}
}
示例8: save
//设置package包名称以及导入依赖的类
package ml
import model.{ AppReader }
import org.apache.spark.ml.{ PipelineModel }
import scalaz.\/
trait PipelineRepository {
private val baseDir = "/Users/kirill/Documents/Projects/sentiment-service/src/main/resources"
def save(pipeline: PipelineModel, fileName: String): AppReader[Unit] = AppReader[Unit] {
spark ?
\/.fromTryCatchNonFatal {
pipeline.write.overwrite().save(s"$baseDir/$fileName")
}
}
def load(fileName: String): AppReader[PipelineModel] = AppReader[PipelineModel] {
spark ?
\/.fromTryCatchNonFatal {
PipelineModel.load(s"$baseDir/$fileName")
}
}
}
示例9: estimate
//设置package包名称以及导入依赖的类
package ml
import model.{ NegativeSentiment, PositiveSentiment, Sentiment, AppReader }
import org.apache.spark.ml.{ PipelineModel }
import scalaz.{ \/ }
trait SentimentEstimator {
def estimate(pipeline: PipelineModel, text: String): AppReader[Sentiment] = AppReader[Sentiment] {
spark ?
\/.fromTryCatchNonFatal {
val df = spark.createDataFrame(Seq((0, text))).toDF("id", "text")
val isPositive = pipeline.transform(df)
.select("prediction")
.first().getDouble(0)
if (isPositive == 1.0) PositiveSentiment else NegativeSentiment
}
}
}