本文整理汇总了Scala中org.apache.spark.mllib.evaluation.BinaryClassificationMetrics类的典型用法代码示例。如果您正苦于以下问题:Scala BinaryClassificationMetrics类的具体用法?Scala BinaryClassificationMetrics怎么用?Scala BinaryClassificationMetrics使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BinaryClassificationMetrics类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: SVMTest
//设置package包名称以及导入依赖的类
package cn.edu.bjtu
import org.apache.spark.SparkConf
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession
object SVMTest {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf()
.setAppName("SVMTest")
.setMaster("spark://master:7077")
.setJars(Array("/home/hadoop/SVM.jar"))
val spark = SparkSession.builder()
.config(sparkConf)
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")
// Split data into training (80%) and test (20%).
val splits = data.randomSplit(Array(0.7, 0.3), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
// Run training algorithm to build the model
val numIterations = 100
val model = SVMWithSGD.train(training, numIterations)
// Clear the default threshold.
model.setThreshold(-5000)
// Compute raw scores on the test set.
val scoreAndLabels = test.map { point =>
val score = model.predict(point.features)
(score, point.label)
}
// Get evaluation metrics.
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
println("Sensitivity = " + scoreAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / scoreAndLabels.filter(x => x._2 == 1.0).count().toDouble)
println("Specificity = " + scoreAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / scoreAndLabels.filter(x => x._2 == 0.0).count().toDouble)
println("Accuracy = " + scoreAndLabels.filter(x => x._1 == x._2).count().toDouble / scoreAndLabels.count().toDouble)
}
}
示例2: DecisionTreeTest
//设置package包名称以及导入依赖的类
package cn.edu.bjtu
import org.apache.spark.SparkConf
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession
object DecisionTreeTest {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf()
.setAppName("DecisionTreeTest")
.setMaster("spark://master:7077")
.setJars(Array("/home/hadoop/DecisionTree.jar"))
val spark = SparkSession.builder()
.config(sparkConf)
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
// Load and parse the data file.
val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")
// Split the data into training and test sets (30% held out for testing)
val splits = data.randomSplit(Array(0.7, 0.3))
val (training, test) = (splits(0), splits(1))
// Train a DecisionTree model.
// Empty categoricalFeaturesInfo indicates all features are continuous.
val numClasses = 2
val categoricalFeaturesInfo = Map[Int, Int]()
val impurity = "entropy" // Also, we can use entrophy
val maxDepth = 14
val maxBins = 16384
val model = DecisionTree.trainClassifier(training, numClasses, categoricalFeaturesInfo,
impurity, maxDepth, maxBins)
val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
val prediction = model.predict(features)
(prediction, label)
}
val metrics = new BinaryClassificationMetrics(predictionAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
}
}
示例3: LogisticRegressionTest
//设置package包名称以及导入依赖的类
package cn.edu.bjtu
import org.apache.spark.SparkConf
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession
object LogisticRegressionTest {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf()
.setAppName("LogisticRegressionTest")
.setMaster("spark://master:7077")
.setJars(Array("/home/hadoop/LogisticRegression.jar"))
val spark = SparkSession.builder()
.config(sparkConf)
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")
val splits = data.randomSplit(Array(0.7, 0.3), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
// Run training algorithm to build the model
val model = new LogisticRegressionWithLBFGS()
.setNumClasses(2)
.run(training)
// Compute raw scores on the test set.
val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
val prediction = model.predict(features)
(prediction, label)
}
// Get evaluation metrics.
val metrics = new BinaryClassificationMetrics(predictionAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
}
}
示例4: RandomForestTest
//设置package包名称以及导入依赖的类
package cn.edu.bjtu
import org.apache.spark.SparkConf
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.RandomForest
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession
object RandomForestTest {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf()
.setAppName("RandomForestTest")
.setMaster("spark://master:7077")
.setJars(Array("/home/hadoop/RandomForest.jar"))
val spark = SparkSession.builder()
.config(sparkConf)
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
// Load and parse the data file.
val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")
// Split the data into training and test sets (30% held out for testing)
val splits = data.randomSplit(Array(0.7, 0.3))
val (training, test) = (splits(0), splits(1))
// Train a RandomForest model.
// Empty categoricalFeaturesInfo indicates all features are continuous.
val numClasses = 2
val categoricalFeaturesInfo = Map[Int, Int]()
val numTrees = 3 // Use more in practice.
val featureSubsetStrategy = "18" // Let the algorithm choose.
val impurity = "gini"
val maxDepth = 14
val maxBins = 16384
val model = RandomForest.trainClassifier(training, numClasses, categoricalFeaturesInfo,
numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
val prediction = model.predict(features)
(prediction, label)
}
val metrics = new BinaryClassificationMetrics(predictionAndLabels)
val auROC = metrics.areaUnderROC()
println("Area under ROC = " + auROC)
println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
}
}
示例5: BinomialValidation
//设置package包名称以及导入依赖的类
package com.bistel.wordcount.logisticRegression
import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.LogisticRegressionModel
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
class BinomialValidation(sc: SparkContext,
model: LogisticRegressionModel,
numTasks: Int) {
def metrics(validationSet: Array[LabeledPoint]): Quality = {
val featuresLabels = validationSet.map(lbPt =>
(lbPt.label, lbPt.features)).unzip
val predicted_rdd = model.predict(
sc.makeRDD(featuresLabels._2, numTasks)
)
val scoreAndLabels = sc.makeRDD(featuresLabels._1,
numTasks).zip(predicted_rdd)
val successes = scoreAndLabels.map {
case (e, p) => Math.abs(e - p)
}.filter(_ < 0.1)
// Mean sum of square errors
val msse = scoreAndLabels.map {
case (e, p) => (e - p) * (e - p)
}.sum
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
Quality(metrics.fMeasureByThreshold().collect,
msse,
successes.count.toDouble / validationSet.length)
}
}