当前位置: 首页>>代码示例>>Scala>>正文


Scala NaiveBayesModel类代码示例

本文整理汇总了Scala中org.apache.spark.mllib.classification.NaiveBayesModel的典型用法代码示例。如果您正苦于以下问题:Scala NaiveBayesModel类的具体用法?Scala NaiveBayesModel怎么用?Scala NaiveBayesModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了NaiveBayesModel类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: RelevanceFeature

//设置package包名称以及导入依赖的类
package applicant.ml.regression.features

import applicant.etl.ApplicantData
import applicant.nlp.LuceneTokenizer
import applicant.ml.regression.FeatureSetting
import applicant.ml.naivebayes.{NaiveBayesFeatureGenerator, NaiveBayesHelper}

import scala.collection.mutable.ListBuffer
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.apache.spark.mllib.feature.IDFModel

class RelevanceFeature(newSetting: FeatureSetting, naiveBayesModel: NaiveBayesModel, tfIdfModel: IDFModel) extends BaseFeature {
  val setting: FeatureSetting = newSetting
  val bayesModel: NaiveBayesModel = naiveBayesModel
  val idfModel: IDFModel = tfIdfModel

  
  def getFeatureScore(applicant: ApplicantData): Double = {
    val tokenList = LuceneTokenizer.getTokens(applicant.fullText)
    var scores = new ListBuffer[Double]()

    tokenList.foreach { tokens =>
      val score = NaiveBayesHelper.predictSingleScore(bayesModel, NaiveBayesFeatureGenerator.getAdjustedFeatureVec(tokens, idfModel))
      scores += score
    }

    // Filter overconfident scores. Model confidence with vary more with larger training sets.
    scores = scores.filter { score =>
      score < 0.95 && score > 0.05
    }

    var result = 0.0
    if (scores.length > 0) {
      result = scores.sum / scores.length
    }

    return result
  }
} 
开发者ID:dataworks,项目名称:internship-2016,代码行数:40,代码来源:RelevanceFeature.scala

示例2: TitanicBayes

//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}


object TitanicBayes {

  var naiveBayesModel: NaiveBayesModel = null

  def train(df: DataFrame): Unit = {
    val mappedDf = df.map(row =>
      (row.getAs[Int]("Survived"), row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age")
        ,row.getAs[Int]("Sex"), row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"),row.getAs[Int]("Embarked")))

    val labledData = mappedDf.map { case (survived, fare, pclass, age, sex, parch, sibsp, embarked) =>
      LabeledPoint(survived, Vectors.dense(fare, pclass, age, sex, parch, sibsp, embarked))
    }
    naiveBayesModel = NaiveBayes.train(labledData, lambda = 1.0, modelType = "multinomial")

  }

  def predict(df: DataFrame): RDD[Row] = {

    val resultDf = df.map { row =>
      val denseVecor = Vectors.dense(row.getAs[Double]("Fare"), row.getAs[Int]("Pclass"), row.getAs[Double]("Age"),row.getAs[Int]("Sex"),
        row.getAs[Int]("Parch"), row.getAs[Int]("SibSp"), row.getAs[Int]("Embarked") )
      val result = naiveBayesModel.predict(denseVecor)
      Row.fromTuple((row.getAs[Int]("PassengerId"), result.toInt))
    }
    resultDf
  }


} 
开发者ID:digital-thinking,项目名称:spark-titanic,代码行数:37,代码来源:TitanicBayes.scala

示例3: AlgorithmParams

//设置package包名称以及导入依赖的类
package org.template.classification

import org.apache.predictionio.controller.P2LAlgorithm
import org.apache.predictionio.controller.Params

import org.apache.spark.mllib.classification.NaiveBayes
import org.apache.spark.mllib.classification.NaiveBayesModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.SparkContext

import grizzled.slf4j.Logger

case class AlgorithmParams(
  lambda: Double
) extends Params

// extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD.
class NaiveBayesAlgorithm(val ap: AlgorithmParams)
  extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] {

  @transient lazy val logger = Logger[this.type]

  def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {
    // MLLib NaiveBayes cannot handle empty training data.
    require(data.labeledPoints.take(1).nonEmpty,
      s"RDD[labeledPoints] in PreparedData cannot be empty." +
      " Please check if DataSource generates TrainingData" +
      " and Preparator generates PreparedData correctly.")

    NaiveBayes.train(data.labeledPoints, ap.lambda)
  }

  def predict(model: NaiveBayesModel, query: Query): PredictedResult = {
    val features = SyntheticFeatures.transform(
      Vectors.dense(
        Array(query.voice_usage, query.data_usage, query.text_usage)
      )
    )
    val label = model.predict(features)
    new PredictedResult(label)
  }

} 
开发者ID:wmfongsf,项目名称:predictionio-engine-classification,代码行数:44,代码来源:NaiveBayesAlgorithm.scala

示例4: NaiveBayesAlgorithmTest

//设置package包名称以及导入依赖的类
package org.template.classification

import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.classification.NaiveBayesModel

import org.scalatest.FlatSpec
import org.scalatest.Matchers

class NaiveBayesAlgorithmTest
  extends FlatSpec with SharedSingletonContext with Matchers {

  val params = AlgorithmParams(lambda = 10)
  val algorithm = new NaiveBayesAlgorithm(params)

  val dataSource = Seq(
    LabeledPoint(0, Vectors.dense(1000, 10, 10)),
    LabeledPoint(1, Vectors.dense(10, 1000, 10)),
    LabeledPoint(2, Vectors.dense(10, 10, 1000))
  )

  "train" should "return NaiveBayes model" in {
    val dataSourceRDD = sparkContext.parallelize(dataSource)
    val preparedData = new PreparedData(labeledPoints = dataSourceRDD)
    val model = algorithm.train(sparkContext, preparedData)
    model shouldBe a [NaiveBayesModel]
  }
} 
开发者ID:wmfongsf,项目名称:predictionio-engine-classification,代码行数:29,代码来源:NaiveBayesAlgorithmTest.scala


注:本文中的org.apache.spark.mllib.classification.NaiveBayesModel类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。