本文整理汇总了Scala中org.apache.spark.mllib.recommendation.Rating类的典型用法代码示例。如果您正苦于以下问题:Scala Rating类的具体用法?Scala Rating怎么用?Scala Rating使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Rating类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: RankingDataProvider
//设置package包名称以及导入依赖的类
package com.github.jongwook
import org.apache.spark.SparkConf
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
import org.apache.spark.sql.SparkSession
import org.scalatest._
object RankingDataProvider {
def apply(ratings: Seq[Rating], k: Int = 100): (Seq[Rating], Seq[Rating]) = {
val spark = SparkSession.builder().master(new SparkConf().get("spark.master", "local[8]")).getOrCreate()
val sc = spark.sparkContext
val Array(trainRatings, testRatings) = sc.parallelize(ratings).cache().randomSplit(Array(0.9, 0.1), 0)
val model = ALS.trainImplicit(trainRatings, rank = 10, iterations = 2, lambda = 2, blocks = 100, alpha = 10)
val testUsers = testRatings.map(_.user).collect().toSet
val testUsersBroadcast = spark.sparkContext.broadcast(testUsers)
val testUserFeatures = model.userFeatures.filter {
case (user, feature) => testUsersBroadcast.value.contains(user)
}.repartition(100).cache()
val testModel = new MatrixFactorizationModel(model.rank, testUserFeatures, model.productFeatures.repartition(100).cache())
val result = testModel.recommendProductsForUsers(k)
val prediction = result.values.flatMap(ratings => ratings).collect()
val groundTruth = testRatings.collect()
(prediction, groundTruth)
}
}
class RankingDataProvider extends FlatSpec with Matchers {
"Ranking Data Provider" should "calculate the rankings" in {
val ratings = MovieLensLoader.load()
val (prediction, groundTruth) = RankingDataProvider(ratings)
prediction.map(_.user).distinct.sorted should equal (groundTruth.map(_.user).distinct.sorted)
}
}
示例2: MovieLensLoader
//设置package包名称以及导入依赖的类
package com.github.jongwook
import org.apache.spark.mllib.recommendation.Rating
import org.scalatest._
import scala.io.Source
object MovieLensLoader {
def load(): Seq[Rating] = {
val input = getClass.getResource("u.data").openStream()
try {
Source.fromInputStream(input).getLines().toArray.map {
_.split("\t") match {
case Array(user, item, rating, timestamp) => Rating(user.toInt, item.toInt, rating.toDouble)
}
}
} finally {
input.close()
}
}
}
class MovieLensLoader extends FlatSpec with Matchers {
"MovieLens Loader" should "load the ml-100k data" in {
val data = MovieLensLoader.load()
data.size should be (100000)
data.map(_.rating).max should be (5.0)
data.map(_.rating).min should be (1.0)
}
}
示例3: trainModel
//设置package包名称以及导入依赖的类
package com.infosupport.recommendedcontent.core
import akka.actor.{Props, ActorLogging, Actor}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel}
import com.datastax.spark.connector._
private def trainModel() = {
val table = context.system.settings.config.getString("cassandra.table")
val keyspace = context.system.settings.config.getString("cassandra.keyspace")
// Retrieve the ratings given by users from the database.
// Map them to the rating structure needed by the Alternate Least Squares algorithm.
val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"),
record.get[Int]("item_id"), record.get[Double]("rating")))
// These settings control how well the predictions are going
// to fit the actual observations we loaded from Cassandra.
// Modify these to optimize the model!
val rank = 10
val iterations = 10
val lambda = 0.01
val model = ALS.train(ratings, rank, iterations, lambda)
sender ! TrainingResult(model)
context.stop(self)
}
}
示例4: SparkAlsPredictor
//设置package包名称以及导入依赖的类
package com.rikima.ml.recommend
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
object SparkAlsPredictor {
def execute(sc: SparkContext, input: String, model_path: String): Unit = {
// Load and parse the data
val data = sc.textFile(input).map {
case l =>
val p = l.indexOf("#")
l.substring(0, p)
}
val ratings = data.map(_.split('\t') match { case Array(user, item, rate) =>
Rating(user.toInt, item.toInt, rate.toDouble)
})
// Build the recommendation model using ALS
val model = MatrixFactorizationModel.load(sc, model_path)
// Evaluate the model on rating data
val usersProducts = ratings.map { case Rating(user, product, rate) =>
(user, product)
}
val predictions =
model.predict(usersProducts).map { case Rating(user, product, rate) =>
((user, product), rate)
}
val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
((user, product), rate)
}.join(predictions)
val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
val err = (r1 - r2)
err * err
}.mean()
println("Mean Squared Error = " + MSE)
}
def main(args: Array[String]): Unit = {
var input = ""
var model_path = ""
for (i <- 0 until args.length) {
val a = args(i)
if (a == "-i" || a == "--input") {
input = args(i+1)
}
if (a == "-m" || a == "--model") {
model_path = args(i+1)
}
}
val sc = new SparkContext()
execute(sc, input, model_path)
}
}
示例5:
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.fpm.FPGrowth
import org.apache.spark.mllib.recommendation.Rating
import scala.collection.mutable.ListBuffer
val rawRatings = rawData.map(_.split("\t").take(3))
rawRatings.first()
// 14/03/30 13:22:44 INFO SparkContext: Job finished: first at <console>:21, took 0.003703 s
// res25: Array[String] = Array(196, 242, 3)
val ratings = rawRatings.map { case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) }
val ratingsFirst = ratings.first()
println(ratingsFirst)
val userId = 789
val K = 10
val movies = sc.textFile(PATH + "/ml-100k/u.item")
val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
titles(123)
var eRDD = sc.emptyRDD
var z = Seq[String]()
val l = ListBuffer()
val aj = new Array[String](100)
var i = 0
for( a <- 801 to 900) {
val moviesForUserX = ratings.keyBy(_.user).lookup(a)
val moviesForUserX_10 = moviesForUserX.sortBy(-_.rating).take(10)
val moviesForUserX_10_1 = moviesForUserX_10.map(r => r.product)
var temp = ""
for( x <- moviesForUserX_10_1){
temp = temp + " " + x
println(temp)
}
aj(i) = temp
i += 1
}
z = aj
val transaction2 = z.map(_.split(" "))
val rddx = sc.parallelize(transaction2, 2).cache()
val fpg = new FPGrowth()
val model6 = fpg
.setMinSupport(0.1)
.setNumPartitions(1)
.run(rddx)
model6.freqItemsets.collect().foreach { itemset =>
println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
}
sc.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:62,代码来源:MovieLensFPGrowthApp.scala
示例6:
//设置package包名称以及导入依赖的类
package com.sparksample
import org.apache.spark.mllib.fpm.FPGrowth
import org.apache.spark.mllib.recommendation.Rating
import scala.collection.mutable.ListBuffer
val rawRatings = rawData.map(_.split("\t").take(3))
rawRatings.first()
val ratings = rawRatings.map { case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) }
val ratingsFirst = ratings.first()
println(ratingsFirst)
val movies = Util.getMovieData()
val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap()
titles(123)
var eRDD = sc.emptyRDD
var z = Seq[String]()
val l = ListBuffer()
val aj = new Array[String](400)
var i = 0
for( a <- 501 to 900) {
val moviesForUserX = ratings.keyBy(_.user).lookup(a)
val moviesForUserX_10 = moviesForUserX.sortBy(-_.rating).take(10)
val moviesForUserX_10_1 = moviesForUserX_10.map(r => r.product)
var temp = ""
for( x <- moviesForUserX_10_1){
if(temp.equals(""))
temp = x.toString
else {
temp = temp + " " + x
}
}
aj(i) = temp
i += 1
}
z = aj
val transaction = z.map(_.split(" "))
val rddx = sc.parallelize(transaction, 2).cache()
val fpg = new FPGrowth()
val model = fpg
.setMinSupport(0.1)
.setNumPartitions(1)
.run(rddx)
model.freqItemsets.collect().foreach { itemset =>
println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
}
sc.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:59,代码来源:MovieLensFPGrowthApp.scala
示例7: CollabFilteringApp
//设置package包名称以及导入依赖的类
package org.apress.prospark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
object CollabFilteringApp {
def main(args: Array[String]) {
if (args.length != 3) {
System.err.println(
"Usage: CollabFilteringApp <appname> <batchInterval> <iPath>")
System.exit(1)
}
val Seq(appName, batchInterval, iPath) = args.toSeq
val conf = new SparkConf()
.setAppName(appName)
.setJars(SparkContext.jarOfClass(this.getClass).toSeq)
val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
val ratingStream = ssc.textFileStream(iPath).map(_.split(" ") match {
case Array(subject, activity, freq) =>
Rating(subject.toInt, activity.toInt, freq.toDouble)
})
val rank = 10
val numIterations = 10
val lambda = 0.01
ratingStream.foreachRDD(ratingRDD => {
val testTrain = ratingRDD.randomSplit(Array(0.3, 0.7))
val model = ALS.train(testTrain(1), rank, numIterations, lambda)
val test = testTrain(0).map {
case Rating(subject, activity, freq) =>
(subject, activity)
}
val prediction = model.predict(test)
prediction.take(5).map(println)
})
ssc.start()
ssc.awaitTermination()
}
}
示例8: SparkMysql
//设置package包名称以及导入依赖的类
package com.demo
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
object SparkMysql {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.spark_project.jetty").setLevel(Level.OFF)
//??jetty??
val sparkConf = new SparkConf().setAppName("SparkMysql").setMaster("local[2]")
val spark = SparkSession
.builder()
.config(sparkConf)
.getOrCreate()
val jdbcDF = spark.read
.format("jdbc")
.option("url", "jdbc:mysql://127.0.0.1:3306/jeeshop?useUnicode=true&characterEncoding=utf8")
.option("dbtable", "t_comment")
.option("user", "root")
.option("password", "XXXXXX")
.load().cache()
//jdbcDF.rdd.foreach(print)
val ratings = jdbcDF.rdd.map(row => Rating(row.getString(2).toInt, row.getString(1).toInt, row.getInt(8)))
//?ALS??????
val rank = 100
val numIterations = 10
val model = ALS.train(ratings, rank, numIterations, 0.01)
model.userFeatures.foreach(println)
}
}
示例9: RecommendationExample
//设置package包名称以及导入依赖的类
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
import org.apache.spark.mllib.recommendation.Rating
object RecommendationExample {
def main(args: Array[String]): Unit = {
PropertyConfigurator.configure("file/log4j.properties")
val conf = new SparkConf().setAppName("CollaborativeFilteringExample").setMaster("local")
val sc = new SparkContext(conf)
// Load and parse the data
val data = sc.textFile("file/test.data")
val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
Rating(user.toInt, item.toInt, rate.toDouble)
})
// Build the recommendation model using ALS
val rank = 10
val numIterations = 10
val model = ALS.train(ratings, rank, numIterations, 0.01)
// Evaluate the model on rating data
val usersProducts = ratings.map { case Rating(user, product, rate) =>
(user, product)
}
val predictions =
model.predict(usersProducts).map { case Rating(user, product, rate) =>
((user, product), rate)
}
val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
((user, product), rate)
}.join(predictions)
val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
val err = (r1 - r2)
err * err
}.mean()
println("----------------------------------------")
println("-------Mean Squared Error = " + MSE)
println("----------------------------------------")
// Save and load model
model.save(sc, "target/tmp/myCollaborativeFilter")
val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
sameModel.userFeatures.foreach(println)
val proFCounts = sameModel.productFeatures.count()
println(proFCounts)
}
}
// scalastyle:on println
示例10:
//设置package包名称以及导入依赖的类
package org.apache.spark.examples.mllib
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.ALS
import org.apache.spark.mllib.recommendation.Rating
Rating(userId.toInt, itemId.toInt, rating.toDouble)
}
//?????????????
val movieratings = ratings.union(pratings)
//??ALS????,??rank?5,?????10??lambda?0.01
val model = ALS.train(movieratings, 10, 10, 0.01)
//????????????????,??????ID?195?<???>??
model.predict(sc.parallelize(Array((944,195)))).collect.foreach(println)
//????????????????,??????ID?402<?????>
model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println)
//????????????????,??????ID?148<????>
model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println)
}
}
示例11: UserProductRecoModel
//设置package包名称以及导入依赖的类
package processing
import java.io.File
import controllers.Global
import org.apache.spark.mllib.recommendation.{Rating, MatrixFactorizationModel}
import org.apache.spark.rdd.RDD
import org.jblas.DoubleMatrix
class UserProductRecoModel(val weightFactor: Array[Double], rank: Int,
userFeatures: RDD[(Int, Array[Double])],
productFeatures: RDD[(Int, Array[Double])])
extends MatrixFactorizationModel(rank, userFeatures, productFeatures) {
override def recommendProducts(user: Int, num: Int): Array[Rating] = {
recommend(userFeatures.lookup(user).head, productFeatures, num)
.map(t => Rating(user, t._1, t._2))
}
private def recommend(
recommendToFeatures: Array[Double],
recommendableFeatures: RDD[(Int, Array[Double])],
num: Int): Array[(Int, Double)] = {
val recommendToVector = new DoubleMatrix(recommendToFeatures)
val scored = recommendableFeatures.map { case (id,features) =>
(id, recommendToVector.dot(new DoubleMatrix(features).mul(new DoubleMatrix(weightFactor))))
}
scored.top(num)(Ordering.by(_._2))
}
def withWeightFactor(weightFactor: Array[Double]): UserProductRecoModel = {
new UserProductRecoModel(weightFactor, this.rank, this.userFeatures, this.productFeatures)
}
}
object UserProductRecoModel{
def apply(model:MatrixFactorizationModel): UserProductRecoModel ={
val weightFactor:Array[Double] = if (new File("model/featureWeightFactors").exists){ Global.ctx.textFile("model/featureWeightFactors").map(_.toDouble).collect() } else new Array[Double](model.rank).map(x=>1.0)
new UserProductRecoModel(weightFactor, model.rank, model.userFeatures, model.productFeatures)
}
}
示例12: cf
//设置package包名称以及导入依赖的类
package spark
import org.apache.spark.mllib.recommendation.{ALS, Rating}
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory
/**
* Created by I311352 on 3/29/2017.
*/
class cf {
}
object cf extends App {
}
object RecommendationExample {
def main(args: Array[String]): Unit = {
val LOG = LoggerFactory.getLogger(getClass)
val conf = new SparkConf().setAppName("mltest").setMaster("local[2]")
val sc = new SparkContext(conf)
val data = sc.textFile("data/test.data")
data.foreach(r=>LOG.warn(r))
val rating = data.map(_.split(",") match {
case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble)
})
LOG.warn(rating.toString())
// Build the recommendation model using ALS
val rank = 10
val numIterations = 20
val model = ALS.train(rating, rank, numIterations, 0.01)
val userProducts = rating map { case Rating(user, item, rating) => (user, item)}
val predictions = model predict(userProducts) map {case Rating(user, product, rating) => ((user, product), rating)}
val ratesAndPreds = rating.map { case Rating(user, product, rate) =>
((user, product), rate)
}.join(predictions)
val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
val err = (r1 - r2)
err * err
}.mean()
LOG.warn("Mean Squared Error = " + MSE)
}
}
示例13: trainModel
//设置package包名称以及导入依赖的类
package com.infosupport.recommendedcontent.core
import akka.actor.{Props, ActorLogging, Actor}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel}
import com.datastax.spark.connector._
private def trainModel() = {
val table = context.system.settings.config.getString("cassandra.table")
val keyspace = context.system.settings.config.getString("cassandra.keyspace")
// Retrieve the ratings given by users from the database.
// Map them to the rating structure needed by the Alternate Least Squares algorithm.
val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"),
record.get[Int]("category_id"), record.get[Int]("counter")))
// These settings control how well the predictions are going
// to fit the actual observations we loaded from Cassandra.
// Modify these to optimize the model!
val rank = 10
val iterations = 10
val lambda = 0.01
val model = ALS.train(ratings, rank, iterations, lambda)
sender ! TrainingResult(model)
context.stop(self)
}
}
示例14: TrainALSModelUsingVotes
//设置package包名称以及导入依赖的类
package wykopml
import com.typesafe.scalalogging.StrictLogging
import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
import org.apache.spark.rdd.RDD
import wykopml.TrainALS.EstimatedBestRank
import wykopml.spark.{LoadVotesFromCassandra, WithSpark}
object TrainALSModelUsingVotes extends App with StrictLogging {
val paths = Paths(".model_votes")
WithSpark {
sc =>
val numIterations = 15
val rank = 90 //estimated using TrainALS.estimateBestRankValue (For rank 90 and 10 iterations mse is Some(0.06912949551198742))
val votesRDD = LoadVotesFromCassandra(sc).setName("votes").cache()
val userMappingsRDD = votesRDD.map(_.who).distinct().zipWithIndex.map(p => (p._1, p._2.toInt))
val userMappings = userMappingsRDD.collectAsMap()
val ratings = votesRDD.map {
v => Rating(userMappings(v.who), v.wykopId, if (v.isUp) 1 else -3)
}.cache()
val (model, mse) = TrainALS.createModel(rank, numIterations, ratings, shouldCalculateMse = true)
println(s"Saving user mappings to ${paths.userMappingsPath}")
userMappingsRDD.saveAsObjectFile(paths.userMappingsPath)
println(s"Saving model with rank ${rank} and MSE ${mse} to ${paths.modelPath}")
model.save(sc, paths.modelPath)
}
}