本文整理汇总了Scala中org.apache.spark.mllib.classification.NaiveBayes类的典型用法代码示例。如果您正苦于以下问题:Scala NaiveBayes类的具体用法?Scala NaiveBayes怎么用?Scala NaiveBayes使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NaiveBayes类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TitanicBayes
//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
object TitanicBayes {
var naiveBayesModel: NaiveBayesModel = null
def train(df: DataFrame): Unit = {
val mappedDf = df.map(row =>
(row.getAs[Int]("Survived"), row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age")
,row.getAs[Int]("Sex"), row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"),row.getAs[Int]("Embarked")))
val labledData = mappedDf.map { case (survived, fare, pclass, age, sex, parch, sibsp, embarked) =>
LabeledPoint(survived, Vectors.dense(fare, pclass, age, sex, parch, sibsp, embarked))
}
naiveBayesModel = NaiveBayes.train(labledData, lambda = 1.0, modelType = "multinomial")
}
def predict(df: DataFrame): RDD[Row] = {
val resultDf = df.map { row =>
val denseVecor = Vectors.dense(row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age"),row.getAs[Int]("Sex"),
row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"), row.getAs[Int]("Embarked") )
val result = naiveBayesModel.predict(denseVecor)
Row.fromTuple((row.getAs[Int]("PassengerId"), result.toInt))
}
resultDf
}
}
示例2: AlgorithmParams
//设置package包名称以及导入依赖的类
package org.template.classification
import org.apache.predictionio.controller.P2LAlgorithm
import org.apache.predictionio.controller.Params
import org.apache.spark.mllib.classification.NaiveBayes
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.SparkContext
import grizzled.slf4j.Logger
case class AlgorithmParams(
lambda: Double
) extends Params
// extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD.
class NaiveBayesAlgorithm(val ap: AlgorithmParams)
extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] {
@transient lazy val logger = Logger[this.type]
def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {
// MLLib NaiveBayes cannot handle empty training data.
require(data.labeledPoints.take(1).nonEmpty,
s"RDD[labeledPoints] in PreparedData cannot be empty." +
" Please check if DataSource generates TrainingData" +
" and Preparator generates PreparedData correctly.")
NaiveBayes.train(data.labeledPoints, ap.lambda)
}
def predict(model: NaiveBayesModel, query: Query): PredictedResult = {
val features = SyntheticFeatures.transform(
Vectors.dense(
Array(query.voice_usage, query.data_usage, query.text_usage)
)
)
val label = model.predict(features)
new PredictedResult(label)
}
}
示例3: NaiveBayesAlg
//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.mllib.naivebayes
import com.jjzhk.sparkexamples.CommonDefines
import org.apache.spark.mllib.classification.NaiveBayes
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.sql.SparkSession
object NaiveBayesAlg {
def main(args: Array[String]): Unit = {
val D1 = Array[String]("both", "health", "appearance")
val D2 = Array[String]("sedentary","active","moderate")
val D3 = Array[String]("moderate", "aggressive")
val D4 = Array[String]("yes", "no")
val P = Array[String]("i100", "i500")
val spark = SparkSession.builder().appName("NaiveBayesAlg").master("local[2]").getOrCreate()
val fileRdd = spark.sparkContext.textFile(CommonDefines.Data_File_Path + "/mllib/naivebayes.txt")
val training = fileRdd.map(record => {
var parts = record.split(",")
LabeledPoint(parts(4).toDouble, Vectors.dense(parts(0).toDouble, parts(1).toDouble, parts(2).toDouble, parts(3).toDouble))
})
val testData = "2,0,1,1,1".split(",")
val labeledPoint = LabeledPoint(testData(4).toDouble, Vectors.dense(testData(0).toDouble, testData(1).toDouble, testData(2).toDouble, testData(3).toDouble))
val test = spark.sparkContext.parallelize(Seq[LabeledPoint](labeledPoint))
val model = NaiveBayes.train(training)
val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
println("result:")
println("training.count:" + training.count())
println("test.count:" + test.count())
println("accuracy:" + accuracy)
predictionAndLabel.take(10).foreach(println)
}
}