当前位置: 首页>>代码示例>>Scala>>正文


Scala Rating类代码示例

本文整理汇总了Scala中org.apache.spark.mllib.recommendation.Rating的典型用法代码示例。如果您正苦于以下问题:Scala Rating类的具体用法?Scala Rating怎么用?Scala Rating使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Rating类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: RankingDataProvider

//设置package包名称以及导入依赖的类
package com.github.jongwook

import org.apache.spark.SparkConf
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
import org.apache.spark.sql.SparkSession
import org.scalatest._

object RankingDataProvider {

  
  def apply(ratings: Seq[Rating], k: Int = 100): (Seq[Rating], Seq[Rating]) = {

    val spark = SparkSession.builder().master(new SparkConf().get("spark.master", "local[8]")).getOrCreate()
    val sc = spark.sparkContext

    val Array(trainRatings, testRatings) = sc.parallelize(ratings).cache().randomSplit(Array(0.9, 0.1), 0)
    val model = ALS.trainImplicit(trainRatings, rank = 10, iterations = 2, lambda = 2, blocks = 100, alpha = 10)

    val testUsers = testRatings.map(_.user).collect().toSet
    val testUsersBroadcast = spark.sparkContext.broadcast(testUsers)
    val testUserFeatures = model.userFeatures.filter {
      case (user, feature) => testUsersBroadcast.value.contains(user)
    }.repartition(100).cache()

    val testModel = new MatrixFactorizationModel(model.rank, testUserFeatures, model.productFeatures.repartition(100).cache())

    val result = testModel.recommendProductsForUsers(k)

    val prediction = result.values.flatMap(ratings => ratings).collect()
    val groundTruth = testRatings.collect()

    (prediction, groundTruth)
  }
}

class RankingDataProvider extends FlatSpec with Matchers {
  "Ranking Data Provider" should "calculate the rankings" in {
    val ratings = MovieLensLoader.load()
    val (prediction, groundTruth) = RankingDataProvider(ratings)
    prediction.map(_.user).distinct.sorted should equal (groundTruth.map(_.user).distinct.sorted)
  }
} 
开发者ID:jongwook,项目名称:spark-ranking-metrics,代码行数:43,代码来源:RankingDataProvider.scala

示例2: MovieLensLoader

//设置package包名称以及导入依赖的类
package com.github.jongwook

import org.apache.spark.mllib.recommendation.Rating
import org.scalatest._

import scala.io.Source

object MovieLensLoader {
  
  def load(): Seq[Rating] = {
    val input = getClass.getResource("u.data").openStream()
    try {
      Source.fromInputStream(input).getLines().toArray.map {
        _.split("\t") match {
          case Array(user, item, rating, timestamp) => Rating(user.toInt, item.toInt, rating.toDouble)
        }
      }
    } finally {
      input.close()
    }
  }
}

class MovieLensLoader extends FlatSpec with Matchers {
  "MovieLens Loader" should "load the ml-100k data" in {
    val data = MovieLensLoader.load()
    data.size should be (100000)
    data.map(_.rating).max should be (5.0)
    data.map(_.rating).min should be (1.0)
  }
} 
开发者ID:jongwook,项目名称:spark-ranking-metrics,代码行数:32,代码来源:MovieLensLoader.scala

示例3: trainModel

//设置package包名称以及导入依赖的类
package com.infosupport.recommendedcontent.core

import akka.actor.{Props, ActorLogging, Actor}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel}

import com.datastax.spark.connector._


  private def trainModel() = {
    val table = context.system.settings.config.getString("cassandra.table")
    val keyspace = context.system.settings.config.getString("cassandra.keyspace")

    // Retrieve the ratings given by users from the database.
    // Map them to the rating structure needed by the Alternate Least Squares algorithm.
val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"),
  record.get[Int]("item_id"), record.get[Double]("rating")))

// These settings control how well the predictions are going
// to fit the actual observations we loaded from Cassandra.
// Modify these to optimize the model!
val rank = 10
val iterations = 10
val lambda = 0.01

val model = ALS.train(ratings, rank, iterations, lambda)
    sender ! TrainingResult(model)

    context.stop(self)
  }
} 
开发者ID:wmeints,项目名称:recommendersystem,代码行数:32,代码来源:ModelTrainer.scala

示例4: SparkAlsPredictor

//设置package包名称以及导入依赖的类
package com.rikima.ml.recommend

import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel


object SparkAlsPredictor {

  def execute(sc: SparkContext, input: String, model_path: String): Unit = {
    // Load and parse the data
    val data = sc.textFile(input).map {
      case l =>
        val p = l.indexOf("#")
        l.substring(0, p)
    }
    val ratings = data.map(_.split('\t') match { case Array(user, item, rate) =>
      Rating(user.toInt, item.toInt, rate.toDouble)
    })

    // Build the recommendation model using ALS
    val model = MatrixFactorizationModel.load(sc, model_path)

    // Evaluate the model on rating data
    val usersProducts = ratings.map { case Rating(user, product, rate) =>
      (user, product)
    }
    val predictions =
      model.predict(usersProducts).map { case Rating(user, product, rate) =>
        ((user, product), rate)
      }

    val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
      ((user, product), rate)
    }.join(predictions)

    val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
      val err = (r1 - r2)
      err * err
    }.mean()
    println("Mean Squared Error = " + MSE)
  }


  def main(args: Array[String]): Unit = {
    var input = ""
    var model_path = ""
    for (i <- 0 until args.length) {
      val a = args(i)
      if (a == "-i" || a == "--input") {
        input = args(i+1)
      }
      if (a == "-m" || a == "--model") {
        model_path = args(i+1)
      }
    }
    val sc = new SparkContext()
    execute(sc, input, model_path)
  }
} 
开发者ID:rikima,项目名称:spark_als_work,代码行数:61,代码来源:SparkAlsPredictor.scala

示例5:

//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.fpm.FPGrowth
import org.apache.spark.mllib.recommendation.Rating

import scala.collection.mutable.ListBuffer


    val rawRatings = rawData.map(_.split("\t").take(3))
    rawRatings.first()
    // 14/03/30 13:22:44 INFO SparkContext: Job finished: first at <console>:21, took 0.003703 s
    // res25: Array[String] = Array(196, 242, 3)

    val ratings = rawRatings.map { case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) }
    val ratingsFirst = ratings.first()
    println(ratingsFirst)

    val userId = 789
    val K = 10

    val movies = sc.textFile(PATH + "/ml-100k/u.item")
    val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
    titles(123)

    var eRDD = sc.emptyRDD
    var z = Seq[String]()

    val l = ListBuffer()
    val aj = new Array[String](100)
    var i = 0
    for( a <- 801 to 900) {
      val moviesForUserX = ratings.keyBy(_.user).lookup(a)
      val moviesForUserX_10 = moviesForUserX.sortBy(-_.rating).take(10)
      val moviesForUserX_10_1 = moviesForUserX_10.map(r => r.product)
      var temp = ""
      for( x <- moviesForUserX_10_1){
        temp = temp + " " + x
        println(temp)

      }

      aj(i) = temp
      i += 1
    }
    z = aj
    val transaction2 = z.map(_.split(" "))

    val rddx = sc.parallelize(transaction2, 2).cache()

    val fpg = new FPGrowth()
    val model6 = fpg
      .setMinSupport(0.1)
      .setNumPartitions(1)
      .run(rddx)

    model6.freqItemsets.collect().foreach { itemset =>
      println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
    }
    sc.stop()
  }

} 
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:62,代码来源:MovieLensFPGrowthApp.scala

示例6:

//设置package包名称以及导入依赖的类
package com.sparksample

import org.apache.spark.mllib.fpm.FPGrowth
import org.apache.spark.mllib.recommendation.Rating

import scala.collection.mutable.ListBuffer


    val rawRatings = rawData.map(_.split("\t").take(3))
    rawRatings.first()
    val ratings = rawRatings.map { case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) }
    val ratingsFirst = ratings.first()
    println(ratingsFirst)

    val movies = Util.getMovieData()
    val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
    titles(123)

    var eRDD = sc.emptyRDD
    var z = Seq[String]()

    val l = ListBuffer()
    val aj = new Array[String](400)
    var i = 0
    for( a <- 501 to 900) {
      val moviesForUserX = ratings.keyBy(_.user).lookup(a)
      val moviesForUserX_10 = moviesForUserX.sortBy(-_.rating).take(10)
      val moviesForUserX_10_1 = moviesForUserX_10.map(r => r.product)
      var temp = ""
      for( x <- moviesForUserX_10_1){
        if(temp.equals(""))
          temp = x.toString
        else {
          temp =  temp + " " + x
        }
      }

      aj(i) = temp
      i += 1
    }
    z = aj

    val transaction = z.map(_.split(" "))
    val rddx = sc.parallelize(transaction, 2).cache()

    val fpg = new FPGrowth()
    val model = fpg
      .setMinSupport(0.1)
      .setNumPartitions(1)
      .run(rddx)

    model.freqItemsets.collect().foreach { itemset =>
      println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
    }
    sc.stop()
  }

} 
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:59,代码来源:MovieLensFPGrowthApp.scala

示例7: CollabFilteringApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object CollabFilteringApp {

  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: CollabFilteringApp <appname> <batchInterval> <iPath>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, iPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val ratingStream = ssc.textFileStream(iPath).map(_.split(" ") match {
      case Array(subject, activity, freq) =>
        Rating(subject.toInt, activity.toInt, freq.toDouble)
    })

    val rank = 10
    val numIterations = 10
    val lambda = 0.01
    ratingStream.foreachRDD(ratingRDD => {
      val testTrain = ratingRDD.randomSplit(Array(0.3, 0.7))
      val model = ALS.train(testTrain(1), rank, numIterations, lambda)
      val test = testTrain(0).map {
        case Rating(subject, activity, freq) =>
          (subject, activity)
      }
      val prediction = model.predict(test)
      prediction.take(5).map(println)
    })

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:52,代码来源:L9-12CollabFiltering.scala

示例8: SparkMysql

//设置package包名称以及导入依赖的类
package com.demo

import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}


object SparkMysql {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.spark_project.jetty").setLevel(Level.OFF)
    //??jetty??
    val sparkConf = new SparkConf().setAppName("SparkMysql").setMaster("local[2]")
    val spark = SparkSession
      .builder()
      .config(sparkConf)
      .getOrCreate()
    val jdbcDF = spark.read
      .format("jdbc")
      .option("url", "jdbc:mysql://127.0.0.1:3306/jeeshop?useUnicode=true&characterEncoding=utf8")
      .option("dbtable", "t_comment")
      .option("user", "root")
      .option("password", "XXXXXX")
      .load().cache()
    //jdbcDF.rdd.foreach(print)
    val ratings = jdbcDF.rdd.map(row => Rating(row.getString(2).toInt, row.getString(1).toInt, row.getInt(8)))
    //?ALS??????
    val rank = 100
    val numIterations = 10
    val model = ALS.train(ratings, rank, numIterations, 0.01)
    model.userFeatures.foreach(println)
  }
} 
开发者ID:Larry3z,项目名称:SparkStreamingWithKafka,代码行数:35,代码来源:SparkMysql.scala

示例9: RecommendationExample

//设置package包名称以及导入依赖的类
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
import org.apache.spark.mllib.recommendation.Rating

object RecommendationExample {
  def main(args: Array[String]): Unit = {
    PropertyConfigurator.configure("file/log4j.properties")
    val conf = new SparkConf().setAppName("CollaborativeFilteringExample").setMaster("local")
    val sc = new SparkContext(conf)
    // Load and parse the data
    val data = sc.textFile("file/test.data")
    val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
      Rating(user.toInt, item.toInt, rate.toDouble)
    })

    // Build the recommendation model using ALS
    val rank = 10
    val numIterations = 10
    val model = ALS.train(ratings, rank, numIterations, 0.01)

    // Evaluate the model on rating data
    val usersProducts = ratings.map { case Rating(user, product, rate) =>
      (user, product)
    }
    val predictions =
      model.predict(usersProducts).map { case Rating(user, product, rate) =>
        ((user, product), rate)
      }
    val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
      ((user, product), rate)
    }.join(predictions)
    val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
      val err = (r1 - r2)
      err * err
    }.mean()
    println("----------------------------------------")
    println("-------Mean Squared Error = " + MSE)
    println("----------------------------------------")

    // Save and load model
    model.save(sc, "target/tmp/myCollaborativeFilter")
    val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
    sameModel.userFeatures.foreach(println)
    val proFCounts = sameModel.productFeatures.count()
    println(proFCounts)

  }
}
// scalastyle:on println 
开发者ID:Larry3z,项目名称:SparkPractice,代码行数:52,代码来源:RecommendationExample.scala

示例10:

//设置package包名称以及导入依赖的类
package org.apache.spark.examples.mllib

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating

        Rating(userId.toInt, itemId.toInt, rating.toDouble)
       }
   //?????????????
    val movieratings = ratings.union(pratings)
    //??ALS????,??rank?5,?????10??lambda?0.01
    val model = ALS.train(movieratings, 10, 10, 0.01)
     //????????????????,??????ID?195?<???>??
    model.predict(sc.parallelize(Array((944,195)))).collect.foreach(println)
    //????????????????,??????ID?402<?????>
    model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println)
    //????????????????,??????ID?148<????>
    model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println)  
  }
} 
开发者ID:tophua,项目名称:spark1.52,代码行数:22,代码来源:ALSDome.scala

示例11: UserProductRecoModel

//设置package包名称以及导入依赖的类
package processing

import java.io.File

import controllers.Global
import org.apache.spark.mllib.recommendation.{Rating, MatrixFactorizationModel}
import org.apache.spark.rdd.RDD
import org.jblas.DoubleMatrix

class UserProductRecoModel(val weightFactor: Array[Double], rank: Int,
                           userFeatures: RDD[(Int, Array[Double])],
                           productFeatures: RDD[(Int, Array[Double])])
  extends MatrixFactorizationModel(rank, userFeatures, productFeatures) {

  override def recommendProducts(user: Int, num: Int): Array[Rating] = {
    recommend(userFeatures.lookup(user).head, productFeatures, num)
      .map(t => Rating(user, t._1, t._2))
  }

  private def recommend(
                         recommendToFeatures: Array[Double],
                         recommendableFeatures: RDD[(Int, Array[Double])],
                         num: Int): Array[(Int, Double)] = {
    val recommendToVector = new DoubleMatrix(recommendToFeatures)
    val scored = recommendableFeatures.map { case (id,features) =>
      (id, recommendToVector.dot(new DoubleMatrix(features).mul(new DoubleMatrix(weightFactor))))
    }
    scored.top(num)(Ordering.by(_._2))
  }

 def withWeightFactor(weightFactor: Array[Double]): UserProductRecoModel = {
    new UserProductRecoModel(weightFactor, this.rank, this.userFeatures, this.productFeatures)
 }

}

object UserProductRecoModel{
  def apply(model:MatrixFactorizationModel): UserProductRecoModel ={

    val weightFactor:Array[Double] = if (new File("model/featureWeightFactors").exists){ Global.ctx.textFile("model/featureWeightFactors").map(_.toDouble).collect() } else new Array[Double](model.rank).map(x=>1.0)
    new UserProductRecoModel(weightFactor, model.rank, model.userFeatures, model.productFeatures)
  }
} 
开发者ID:srihari,项目名称:recommendr,代码行数:44,代码来源:UserProductRecoModel.scala

示例12: cf

//设置package包名称以及导入依赖的类
package spark

import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory

/**
  * Created by I311352 on 3/29/2017.
  */
class cf {

}

object cf extends App {

}

object RecommendationExample {
  def main(args: Array[String]): Unit = {
    val LOG = LoggerFactory.getLogger(getClass)

    val conf = new SparkConf().setAppName("mltest").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val data = sc.textFile("data/test.data")
    data.foreach(r=>LOG.warn(r))
    val rating = data.map(_.split(",") match {
      case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble)
    })


    LOG.warn(rating.toString())

    // Build the recommendation model using ALS
    val rank = 10
    val numIterations = 20
    val model = ALS.train(rating, rank, numIterations, 0.01)

    val userProducts = rating map { case Rating(user, item, rating) => (user, item)}
    val predictions = model predict(userProducts) map {case Rating(user, product, rating) => ((user, product), rating)}
    val ratesAndPreds = rating.map { case Rating(user, product, rate) =>
      ((user, product), rate)
    }.join(predictions)

    val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
      val err = (r1 - r2)
      err * err
    }.mean()

    LOG.warn("Mean Squared Error = " + MSE)

  }
} 
开发者ID:compasses,项目名称:elastic-spark,代码行数:53,代码来源:cf.scala

示例13: trainModel

//设置package包名称以及导入依赖的类
package com.infosupport.recommendedcontent.core

import akka.actor.{Props, ActorLogging, Actor}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel}

import com.datastax.spark.connector._


  private def trainModel() = {
    val table = context.system.settings.config.getString("cassandra.table")
    val keyspace = context.system.settings.config.getString("cassandra.keyspace")
    // Retrieve the ratings given by users from the database.
    // Map them to the rating structure needed by the Alternate Least Squares algorithm.
val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"),
  record.get[Int]("category_id"), record.get[Int]("counter")))

// These settings control how well the predictions are going
// to fit the actual observations we loaded from Cassandra.
// Modify these to optimize the model!
val rank = 10
val iterations = 10
val lambda = 0.01

val model = ALS.train(ratings, rank, iterations, lambda)
    sender ! TrainingResult(model)

    context.stop(self)
  }
} 
开发者ID:giangstrider,项目名称:Recommendation-Abstract,代码行数:31,代码来源:ModelTrainer.scala

示例14: TrainALSModelUsingVotes

//设置package包名称以及导入依赖的类
package wykopml

import com.typesafe.scalalogging.StrictLogging
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
import org.apache.spark.rdd.RDD
import wykopml.TrainALS.EstimatedBestRank
import wykopml.spark.{LoadVotesFromCassandra, WithSpark}

object TrainALSModelUsingVotes extends App with StrictLogging {

  val paths = Paths(".model_votes")

  WithSpark {
    sc =>

      val numIterations = 15
      val rank = 90 //estimated using TrainALS.estimateBestRankValue (For rank 90 and 10 iterations mse is Some(0.06912949551198742))

      val votesRDD = LoadVotesFromCassandra(sc).setName("votes").cache()
      val userMappingsRDD = votesRDD.map(_.who).distinct().zipWithIndex.map(p => (p._1, p._2.toInt))
      val userMappings = userMappingsRDD.collectAsMap()
      val ratings = votesRDD.map {
        v => Rating(userMappings(v.who), v.wykopId, if (v.isUp) 1 else -3)
      }.cache()

      val (model, mse) = TrainALS.createModel(rank, numIterations, ratings, shouldCalculateMse = true)

      println(s"Saving user mappings to ${paths.userMappingsPath}")
      userMappingsRDD.saveAsObjectFile(paths.userMappingsPath)
      println(s"Saving model with rank ${rank} and MSE ${mse} to ${paths.modelPath}")
      model.save(sc, paths.modelPath)

  }

} 
开发者ID:blstream,项目名称:wykopml,代码行数:36,代码来源:TrainALSModelUsingVotes.scala


注:本文中的org.apache.spark.mllib.recommendation.Rating类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。