本文整理汇总了Scala中org.apache.spark.mllib.classification.NaiveBayesModel类的典型用法代码示例。如果您正苦于以下问题:Scala NaiveBayesModel类的具体用法?Scala NaiveBayesModel怎么用?Scala NaiveBayesModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NaiveBayesModel类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: RelevanceFeature
//设置package包名称以及导入依赖的类
package applicant.ml.regression.features
import applicant.etl.ApplicantData
import applicant.nlp.LuceneTokenizer
import applicant.ml.regression.FeatureSetting
import applicant.ml.naivebayes.{NaiveBayesFeatureGenerator, NaiveBayesHelper}
import scala.collection.mutable.ListBuffer
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.apache.spark.mllib.feature.IDFModel
class RelevanceFeature(newSetting: FeatureSetting, naiveBayesModel: NaiveBayesModel, tfIdfModel: IDFModel) extends BaseFeature {
val setting: FeatureSetting = newSetting
val bayesModel: NaiveBayesModel = naiveBayesModel
val idfModel: IDFModel = tfIdfModel
def getFeatureScore(applicant: ApplicantData): Double = {
val tokenList = LuceneTokenizer.getTokens(applicant.fullText)
var scores = new ListBuffer[Double]()
tokenList.foreach { tokens =>
val score = NaiveBayesHelper.predictSingleScore(bayesModel, NaiveBayesFeatureGenerator.getAdjustedFeatureVec(tokens, idfModel))
scores += score
}
// Filter overconfident scores. Model confidence with vary more with larger training sets.
scores = scores.filter { score =>
score < 0.95 && score > 0.05
}
var result = 0.0
if (scores.length > 0) {
result = scores.sum / scores.length
}
return result
}
}
示例2: TitanicBayes
//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
object TitanicBayes {
var naiveBayesModel: NaiveBayesModel = null
def train(df: DataFrame): Unit = {
val mappedDf = df.map(row =>
(row.getAs[Int]("Survived"), row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age")
,row.getAs[Int]("Sex"), row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"),row.getAs[Int]("Embarked")))
val labledData = mappedDf.map { case (survived, fare, pclass, age, sex, parch, sibsp, embarked) =>
LabeledPoint(survived, Vectors.dense(fare, pclass, age, sex, parch, sibsp, embarked))
}
naiveBayesModel = NaiveBayes.train(labledData, lambda = 1.0, modelType = "multinomial")
}
def predict(df: DataFrame): RDD[Row] = {
val resultDf = df.map { row =>
val denseVecor = Vectors.dense(row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age"),row.getAs[Int]("Sex"),
row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"), row.getAs[Int]("Embarked") )
val result = naiveBayesModel.predict(denseVecor)
Row.fromTuple((row.getAs[Int]("PassengerId"), result.toInt))
}
resultDf
}
}
示例3: AlgorithmParams
//设置package包名称以及导入依赖的类
package org.template.classification
import org.apache.predictionio.controller.P2LAlgorithm
import org.apache.predictionio.controller.Params
import org.apache.spark.mllib.classification.NaiveBayes
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.SparkContext
import grizzled.slf4j.Logger
case class AlgorithmParams(
lambda: Double
) extends Params
// extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD.
class NaiveBayesAlgorithm(val ap: AlgorithmParams)
extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] {
@transient lazy val logger = Logger[this.type]
def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {
// MLLib NaiveBayes cannot handle empty training data.
require(data.labeledPoints.take(1).nonEmpty,
s"RDD[labeledPoints] in PreparedData cannot be empty." +
" Please check if DataSource generates TrainingData" +
" and Preparator generates PreparedData correctly.")
NaiveBayes.train(data.labeledPoints, ap.lambda)
}
def predict(model: NaiveBayesModel, query: Query): PredictedResult = {
val features = SyntheticFeatures.transform(
Vectors.dense(
Array(query.voice_usage, query.data_usage, query.text_usage)
)
)
val label = model.predict(features)
new PredictedResult(label)
}
}
示例4: NaiveBayesAlgorithmTest
//设置package包名称以及导入依赖的类
package org.template.classification
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.scalatest.FlatSpec
import org.scalatest.Matchers
class NaiveBayesAlgorithmTest
extends FlatSpec with SharedSingletonContext with Matchers {
val params = AlgorithmParams(lambda = 10)
val algorithm = new NaiveBayesAlgorithm(params)
val dataSource = Seq(
LabeledPoint(0, Vectors.dense(1000, 10, 10)),
LabeledPoint(1, Vectors.dense(10, 1000, 10)),
LabeledPoint(2, Vectors.dense(10, 10, 1000))
)
"train" should "return NaiveBayes model" in {
val dataSourceRDD = sparkContext.parallelize(dataSource)
val preparedData = new PreparedData(labeledPoints = dataSourceRDD)
val model = algorithm.train(sparkContext, preparedData)
model shouldBe a [NaiveBayesModel]
}
}