本文整理汇总了Scala中org.apache.spark.ml.classification.LogisticRegression类的典型用法代码示例。如果您正苦于以下问题:Scala LogisticRegression类的具体用法?Scala LogisticRegression怎么用?Scala LogisticRegression使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LogisticRegression类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: LRCV
//设置package包名称以及导入依赖的类
package com.ferhtaydn.rater
import org.apache.spark.SparkContext
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
import org.apache.spark.ml.feature.{ StringIndexerModel, VectorAssembler }
import org.apache.spark.ml.tuning.{ CrossValidator, CrossValidatorModel, ParamGridBuilder }
import org.apache.spark.mllib.linalg.Matrix
import org.apache.spark.sql.{ DataFrame, Row, SQLContext }
class LRCV(sc: SparkContext) {
implicit val sqlContext = new SQLContext(sc)
val lr = new LogisticRegression().setMaxIter(10).setFeaturesCol("scaledFeatures")
val paramGrid = new ParamGridBuilder()
.addGrid(lr.regParam, Array(0.1, 0.01))
.build()
val assembler = new VectorAssembler()
.setInputCols(Array("gender", "age", "weight", "height", "indexedJob"))
.setOutputCol("features")
val pipeline = new Pipeline()
.setStages(Array(assembler, standardScaler("features"), lr))
val cv = new CrossValidator()
.setEstimator(pipeline)
.setEvaluator(new BinaryClassificationEvaluator)
.setEstimatorParamMaps(paramGrid)
.setNumFolds(10)
def train(df: DataFrame): (StringIndexerModel, CrossValidatorModel, Matrix) = {
// need to index strings on all data to not missing the job fields.
// other alternative can be manually assign values for each job like gender.
val indexerModel = stringIndexer("job").fit(df)
val indexed = indexerModel.transform(df)
val splits = indexed.randomSplit(Array(0.8, 0.2))
val training = splits(0).cache()
val test = splits(1)
val cvModel = cv.fit(training)
val predictionAndLabels = cvModel
.transform(test)
.select("label", "prediction").map {
case Row(label: Double, prediction: Double) ?
(prediction, label)
}
printBinaryMetrics(predictionAndLabels)
(indexerModel, cvModel, confusionMatrix(predictionAndLabels))
}
}
示例2: MLClassification
//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
import org.apache.spark.sql.SparkSession
object MLClassification extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(): Map[String, Any] = {
val training = session.createDataFrame(Seq(
(0L, "a b c d e spark", 1.0),
(1L, "b d", 0.0),
(2L, "spark f g h", 1.0),
(3L, "hadoop mapreduce", 0.0)
)).toDF("id", "text", "label")
val tokenizer = new Tokenizer()
.setInputCol("text")
.setOutputCol("words")
val hashingTF = new HashingTF()
.setNumFeatures(1000)
.setInputCol(tokenizer.getOutputCol)
.setOutputCol("features")
val lr = new LogisticRegression()
.setMaxIter(10)
.setRegParam(0.01)
val pipeline = new Pipeline()
.setStages(Array(tokenizer, hashingTF, lr))
val model = pipeline.fit(training)
model.write.overwrite().save("regression")
Map.empty[String, Any]
}
def serve(text: List[String]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(s"regression")
val data = LocalData(
LocalDataColumn("text", text)
)
val result: LocalData = pipeline.transform(data)
Map("result" -> result.select("text", "prediction").toMapList)
}
}
示例3: TestLogisticRegression
//设置package包名称以及导入依赖的类
package com.zobot.ai.spark
import breeze.linalg.Matrix
import com.zobot.ai.spark.helpers.LogisticRegressionHelpers
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.linalg.Vectors
import org.specs2.Specification
class TestLogisticRegression extends Specification {
def is = s2"""
Logistic Regression
can train model $testTrainLogisticRegressionModel
"""
val context = new Context().spark
def testTrainLogisticRegressionModel = {
val model = LogisticRegressionHelpers.trainModel(new LogisticRegression, context.createDataFrame(Seq(
(1.0, Vectors.dense(0.0, 1.1, 0.1)),
(0.0, Vectors.dense(2.0, 1.0, -1.0)),
(0.0, Vectors.dense(2.0, 1.3, 1.0)),
(1.0, Vectors.dense(0.0, 1.2, -0.5))
)).toDF("label", "features"))
model.coefficientMatrix.toString().must_==("-19.086478256375067 16.278339464295065 -2.494930802874724 ")
}
}
示例4: LogisticRegressionWithElasticNetExample
//设置package包名称以及导入依赖的类
package org.apache.spark.examples.ml
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object LogisticRegressionWithElasticNetExample {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("LogisticRegressionWithElasticNetExample")
val sc = new SparkContext(conf)
val sqlCtx = new SQLContext(sc)
// $example on$
// Load training data
val training = sqlCtx.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
val lr = new LogisticRegression()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.8)
// Fit the model
val lrModel = lr.fit(training)
// Print the coefficients and intercept for logistic regression
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// $example off$
sc.stop()
}
}
示例5: TrainNewsClassWithLRDemo
//设置package包名称以及导入依赖的类
package applications.mining
import config.paramconf.ClassParams
import functions.Preprocessor
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature._
import org.apache.spark.sql.SparkSession
object TrainNewsClassWithLRDemo extends Serializable {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.WARN)
val spark = SparkSession
.builder
.master("local[2]")
.appName("train news with LR Demo")
.getOrCreate()
val args = Array("ckooc-ml/data/classnews/train")
val filePath = args(0)
import spark.implicits._
val data = spark.sparkContext.textFile(filePath).flatMap { line =>
val tokens: Array[String] = line.split("\u00ef")
if (tokens.length > 3) Some((tokens(0), tokens(1), tokens(2), tokens(3))) else None
}.toDF("label", "title", "time", "content")
data.persist()
val preprocessor = new Preprocessor
val pipeline = preprocessor.preprocess(data)
//LR????
val params = new ClassParams
val logisticRegression = new LogisticRegression()
.setTol(params.converTol)
.setMaxIter(params.maxIteration)
.setRegParam(params.regParam)
.setElasticNetParam(params.elasticNetParam)
.setLabelCol("indexedLabel")
.setFeaturesCol("features")
val indexModel = pipeline.getStages(1).asInstanceOf[StringIndexerModel]
//?????
val labelConverter = new IndexToString()
.setLabels(indexModel.labels)
.setInputCol(logisticRegression.getPredictionCol)
.setOutputCol("predictedLabel")
val stages = pipeline.getStages ++ Array(logisticRegression, labelConverter)
pipeline.setStages(stages)
val model = pipeline.fit(data)
model.write.overwrite().save(params.LRModelPath)
data.unpersist()
spark.stop()
}
}
示例6: LogisticRegressionHelpers
//设置package包名称以及导入依赖的类
package com.zobot.ai.spark.helpers
import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.sql.{DataFrame, Row}
object LogisticRegressionHelpers {
case class ModelTestResults (
features: Vector,
label: Double,
probability: Vector,
prediction: Double
)
def trainModel(estimator: LogisticRegression, trainingSet: DataFrame): LogisticRegressionModel = estimator.fit(trainingSet)
def testModel(transformer: LogisticRegressionModel, testSet: DataFrame): LogisticRegressionModel = {
transformer.transform(testSet).select("features", "label", "myProbability", "prediction").collect().foreach {
case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
ModelTestResults(features, label, prob, prediction)
}
transformer
}
}
示例7: buildLogRegr
//设置package包名称以及导入依赖的类
package ml
import model.{ AppError, AppReader }
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature._
import scalaz.{ \/, Kleisli }
trait PipelineBuilder {
def buildLogRegr(inputColName: String, labelColName: String): AppReader[Pipeline] = AppReader[Pipeline] {
spark ?
\/.fromTryCatchNonFatal {
val tokenizer = new Tokenizer()
.setInputCol(inputColName)
.setOutputCol("words")
val stopWordsRemover = new StopWordsRemover()
.setInputCol("words")
.setOutputCol("words_filtered")
val hashingTF = new HashingTF()
.setInputCol("words")
.setOutputCol("rawFeatures")
.setNumFeatures(700)
val idf = new IDF()
.setInputCol("rawFeatures")
.setOutputCol("features")
val lr = new LogisticRegression()
.setMaxIter(1000)
.setRegParam(0.05)
.setFeaturesCol("features")
.setLabelCol(labelColName)
val pipeline = new Pipeline()
.setStages(Array(tokenizer, stopWordsRemover, hashingTF, idf, lr))
pipeline
}
}
}