本文整理汇总了Java中org.apache.spark.mllib.recommendation.MatrixFactorizationModel类的典型用法代码示例。如果您正苦于以下问题:Java MatrixFactorizationModel类的具体用法?Java MatrixFactorizationModel怎么用?Java MatrixFactorizationModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MatrixFactorizationModel类属于org.apache.spark.mllib.recommendation包,在下文中一共展示了MatrixFactorizationModel类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: rmse
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
/**
* Computes root mean squared error of {@link Rating#rating()} versus predicted value.
*/
static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) {
JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues =
testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,Object>> testUserProducts =
(RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd();
JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts));
double mse = predictions.mapToPair(
rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())
).join(testUserProductValues).values().mapToDouble(valuePrediction -> {
double diff = valuePrediction._1() - valuePrediction._2();
return diff * diff;
}).mean();
return Math.sqrt(mse);
}
示例2: recommendProducts
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
/**
* This method recommends products for a given user.
*
* @param model Matrix factorization model
* @param userId The user to recommend products to
* @param numberOfProducts Number of products to return
* @return List of productIds recommended to a given user
* @throws MLModelHandlerException
*/
public static List<Integer> recommendProducts(final MatrixFactorizationModel model, int userId, int numberOfProducts)
throws MLModelHandlerException {
try {
Rating[] recommendations = model.recommendProducts(userId, numberOfProducts);
List<Integer> productList = new ArrayList<Integer>();
for (Rating rating : recommendations) {
productList.add(rating.product());
}
return productList;
} catch (NoSuchElementException e) {
throw new MLModelHandlerException("Invalid user id: " + userId);
}
}
示例3: updateModel
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
/**
* to reduce task sizes, all spark tasks need to access local variables only.
* A transparent access over >this< forces spark to serialize the parent object.
* To mark this in the implementation, all variables with forced local
* access have a '_' prefix.
* @param domainID
*/
private void updateModel(Long domainID) {
ArrayList<Rating> _items = new ArrayList<>();
LimitedList<Tuple2> list = pool.get(domainID);
int max = list.size();
for (int i = 0; i < max; i++) {
Tuple2 t = list.get(i);
_items.add(new Rating(((Long)t._1()).intValue(),((Long)t._2()).intValue(),1));
}
JavaRDD<Rating> rdd = SharedService.parallelize(_items);
try {
MatrixFactorizationModel model = ALS.train(rdd.rdd(), RANK, ITERATIONS, LAMBDA);
results.put(domainID, model);
}catch (Exception e){
log.error("worker failed", e);
}
}
示例4: recommendUsers
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
/**
* This method recommends users for a given product. (i.e. the users who are most likely to be interested in the
* given product.
*
* @param model Matrix factorization model
* @param productId The product to recommend users to
* @param numberOfUsers Number of users to return
* @return List of userIds recommended to a given product
*/
public static List<Integer> recommendUsers(final MatrixFactorizationModel model, int productId, int numberOfUsers)
throws MLModelHandlerException {
try {
List<Integer> userList = new ArrayList<Integer>();
Rating[] recommendations = model.recommendUsers(productId, numberOfUsers);
for (Rating rating : recommendations) {
userList.add(rating.user());
}
return userList;
} catch (NoSuchElementException e) {
throw new MLModelHandlerException("Invalid product id: " + productId);
}
}
示例5: readExternal
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
int rank = in.readInt();
List<Tuple2<Object, double[]>> userFeaturesList = (List<Tuple2<Object, double[]>>) in.readObject();
List<Tuple2<Object, double[]>> productFeaturesList = (List<Tuple2<Object, double[]>>) in.readObject();
MLCoreServiceValueHolder valueHolder = MLCoreServiceValueHolder.getInstance();
RDD<Tuple2<Object, double[]>> userFeatures = valueHolder.getSparkContext().parallelize(userFeaturesList).rdd();
RDD<Tuple2<Object, double[]>> productFeatures = valueHolder.getSparkContext().parallelize(productFeaturesList)
.rdd();
model = new MatrixFactorizationModel(rank, userFeatures, productFeatures);
if (log.isDebugEnabled()) {
log.debug("Rank, user features and product features were de-serialized successfully and loaded "
+ "MatrixFactorizationModel.");
}
}
示例6: TestALS
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
@Ignore
public void TestALS() throws DDFException {
createTableRatings();
DDF ratings = manager.sql2ddf("select userid, movieid, score from ratings", false);
int rank = 3;
double lambda = 10;
int iterNum = 15;
MatrixFactorizationModel model = (MatrixFactorizationModel) ratings.ML.train("collaborativeFiltering", rank,
iterNum, lambda).getRawModel();
double r = model.predict(1, 4);
System.out.println(">>>RATING: " + r);
manager.shutdown();
}
示例7: getRecommend
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
public String getRecommend() {
final int rank = 5, iterations = 1, blocks = -1;
ArrayList<String> result = new ArrayList<>();
MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
return new Gson().toJson(
model.productFeatures().toJavaRDD().map(element ->
(element._1() + "," + Arrays.toString(element._2())))
.collect());
}
示例8: pmmlToMFModel
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
private static MatrixFactorizationModel pmmlToMFModel(JavaSparkContext sparkContext,
PMML pmml,
Path modelParentPath,
Broadcast<Map<String,Integer>> bUserIDToIndex,
Broadcast<Map<String,Integer>> bItemIDToIndex) {
String xPathString = AppPMMLUtils.getExtensionValue(pmml, "X");
String yPathString = AppPMMLUtils.getExtensionValue(pmml, "Y");
JavaPairRDD<String,float[]> userRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, xPathString));
JavaPairRDD<String,float[]> productRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, yPathString));
int rank = userRDD.first()._2().length;
return new MatrixFactorizationModel(
rank,
readAndConvertFeatureRDD(userRDD, bUserIDToIndex),
readAndConvertFeatureRDD(productRDD, bItemIDToIndex));
}
示例9: predictAll
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
private static JavaPairRDD<Integer,Iterable<Rating>> predictAll(
MatrixFactorizationModel mfModel,
JavaRDD<Rating> data,
JavaPairRDD<Integer,Integer> userProducts) {
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,Object>> userProductsRDD =
(RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd();
return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user);
}
示例10: create
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
public static TrainedModel create(JavaRDD<Rating> trainingRdd, JavaRDD<Rating> testRdd, int rank, int iterationsNr) {
logger.info(String.format("Train with parameters -> iterations: %d, rank :%d", iterationsNr, rank));
JavaRDD<Tuple2<Object, Object>> testForPredict = testRdd.map(rating ->
new Tuple2<>(rating.user(), rating.product())
);
TimeKeeper timeKeeper = new TimeKeeper();
timeKeeper.start();
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingRdd), rank, iterationsNr, 0.1);
timeKeeper.end().print(logger, "als model trained in ").reset();
Double error = getError(testRdd, rank, iterationsNr, testForPredict, timeKeeper, model);
return new TrainedModel(error, model);
}
示例11: getError
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
private static Double getError(JavaRDD<Rating> testRdd, int rank, int iterationsNr, JavaRDD<Tuple2<Object, Object>> testForPredict, TimeKeeper timeKeeper, MatrixFactorizationModel model) {
timeKeeper.start();
JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
model.predict(testForPredict.rdd())
.toJavaRDD()
.map(r -> new Tuple2<>(new Tuple2<>(r.user(), r.product()), r.rating()))
);
Double error = computeRMSE(predictions, testRdd);
timeKeeper.end().print(logger, "rmse calculated in ").reset();
logger.info(String.format("For rank %d and iterations %d the RMSE is %f", rank, iterationsNr, error));
return error;
}
示例12: predict
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
@Override
public List<Long> predict(Item item, Long domainID, Integer numberOfRequestedResults) {
if (item == null) return null;
if (numberOfRequestedResults == null) return null;
//update(rdd,domainID);
System.out.println("domainID "+domainID+"\nuserID "+item.getUserID());
int itemID = item.getItemID().intValue();
//Model and user item matrix are domain ID specific
Tuple<Model, JavaRDD> modelMatrixTuple = modelTable.get(domainID);
if (modelMatrixTuple == null) { // model is not trained yet
// fallback solution
return predictFallback(item,domainID,numberOfRequestedResults);
}
MatrixFactorizationModel model = modelMatrixTuple.model.self; // matrix is not null
System.out.println("prodFeatures "+model.productFeatures().count());
System.out.println("userFeatures "+model.userFeatures().count());
Rating[] results = model.recommendProducts(itemID,numberOfRequestedResults);
System.out.println("recommend: "+Arrays.toString(results));
// Write only the item IDs into the result list
List<Long> returnResult = new ArrayList<Long>();
for (Rating r : results) {
returnResult.add((long) r.product());
}
System.out.println(returnResult);
return returnResult;
}
示例13: train
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
@Override
public Rating call(CassandraRow trainingRow) throws Exception {
return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL));
}
});
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
return model;
}
示例14: predict
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() {
@Override
public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception {
return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
}
}));
JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
return predictionJavaRdd;
}
示例15: trainAndValidate
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; //导入依赖的package包/类
double trainAndValidate(int version) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
final ICollabFilterCassandra cfc;
String className = "collabfilter.CollabFilterCassandra" + version;
cfc = (ICollabFilterCassandra) Class.forName(className).newInstance();
try (Session session = this.cassandraConnector.openSession()) {
MatrixFactorizationModel model = cfc.train(this.sparkCtx, this.cassandraConnector);
CassandraJavaRDD<CassandraRow> validationsCassRdd = javaFunctions(this.sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.VALIDATION_TABLE);
JavaRDD<Rating> predictionJavaRdd = cfc.predict(model, validationsCassRdd);
double rmse = cfc.validate(predictionJavaRdd, validationsCassRdd);
System.out.println(cfc.resultsReport(predictionJavaRdd, validationsCassRdd, rmse));
return rmse;
}
}