本文整理汇总了Java中org.apache.spark.mllib.recommendation.ALS类的典型用法代码示例。如果您正苦于以下问题:Java ALS类的具体用法?Java ALS怎么用?Java ALS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ALS类属于org.apache.spark.mllib.recommendation包,在下文中一共展示了ALS类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: updateModel
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
/**
* to reduce task sizes, all spark tasks need to access local variables only.
* A transparent access over >this< forces spark to serialize the parent object.
* To mark this in the implementation, all variables with forced local
* access have a '_' prefix.
* @param domainID
*/
private void updateModel(Long domainID) {
ArrayList<Rating> _items = new ArrayList<>();
LimitedList<Tuple2> list = pool.get(domainID);
int max = list.size();
for (int i = 0; i < max; i++) {
Tuple2 t = list.get(i);
_items.add(new Rating(((Long)t._1()).intValue(),((Long)t._2()).intValue(),1));
}
JavaRDD<Rating> rdd = SharedService.parallelize(_items);
try {
MatrixFactorizationModel model = ALS.train(rdd.rdd(), RANK, ITERATIONS, LAMBDA);
results.put(domainID, model);
}catch (Exception e){
log.error("worker failed", e);
}
}
示例2: getRecommend
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
public String getRecommend() {
final int rank = 5, iterations = 1, blocks = -1;
ArrayList<String> result = new ArrayList<>();
MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
return new Gson().toJson(
model.productFeatures().toJavaRDD().map(element ->
(element._1() + "," + Arrays.toString(element._2())))
.collect());
}
示例3: create
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
public static TrainedModel create(JavaRDD<Rating> trainingRdd, JavaRDD<Rating> testRdd, int rank, int iterationsNr) {
logger.info(String.format("Train with parameters -> iterations: %d, rank :%d", iterationsNr, rank));
JavaRDD<Tuple2<Object, Object>> testForPredict = testRdd.map(rating ->
new Tuple2<>(rating.user(), rating.product())
);
TimeKeeper timeKeeper = new TimeKeeper();
timeKeeper.start();
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingRdd), rank, iterationsNr, 0.1);
timeKeeper.end().print(logger, "als model trained in ").reset();
Double error = getError(testRdd, rank, iterationsNr, testForPredict, timeKeeper, model);
return new TrainedModel(error, model);
}
示例4: train
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
@Override
public Rating call(CassandraRow trainingRow) throws Exception {
return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL));
}
});
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
return model;
}
示例5: buildModel
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
@Override
public PMML buildModel(JavaSparkContext sparkContext,
JavaRDD<String> trainData,
List<?> hyperParameters,
Path candidatePath) {
int features = (Integer) hyperParameters.get(0);
double lambda = (Double) hyperParameters.get(1);
double alpha = (Double) hyperParameters.get(2);
double epsilon = Double.NaN;
if (logStrength) {
epsilon = (Double) hyperParameters.get(3);
}
Preconditions.checkArgument(features > 0);
Preconditions.checkArgument(lambda >= 0.0);
Preconditions.checkArgument(alpha > 0.0);
if (logStrength) {
Preconditions.checkArgument(epsilon > 0.0);
}
JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN);
parsedRDD.cache();
Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true);
Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false);
log.info("Broadcasting ID-index mappings for {} users, {} items",
userIDIndexMap.size(), itemIDIndexMap.size());
Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap);
Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap);
JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex);
trainRatingData = aggregateScores(trainRatingData, epsilon);
ALS als = new ALS()
.setRank(features)
.setIterations(iterations)
.setLambda(lambda)
.setCheckpointInterval(5);
if (implicit) {
als = als.setImplicitPrefs(true).setAlpha(alpha);
}
RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd();
trainingRatingDataRDD.cache();
MatrixFactorizationModel model = als.run(trainingRatingDataRDD);
trainingRatingDataRDD.unpersist(false);
bUserIDToIndex.unpersist();
bItemIDToIndex.unpersist();
parsedRDD.unpersist();
Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap));
Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));
PMML pmml = mfModelToPMML(model,
features,
lambda,
alpha,
epsilon,
implicit,
logStrength,
candidatePath,
bUserIndexToID,
bItemIndexToID);
unpersist(model);
bUserIndexToID.unpersist();
bItemIndexToID.unpersist();
return pmml;
}
示例6: train
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)));
MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
return model;
}
示例7: trainImplicit
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
/**
* This method uses alternating least squares (ALS) algorithm to train a matrix factorization model given an JavaRDD
* of 'implicit preferences' given by users to some products.
*
* @param trainingDataset Training dataset as a JavaRDD of Ratings
* @param rank Number of latent factors
* @param noOfIterations Number of iterations
* @param regularizationParameter Regularization parameter
* @param confidenceParameter Confidence parameter
* @param noOfBlocks Level of parallelism (auto configure = -1)
* @return Matrix factorization model
*/
public MatrixFactorizationModel trainImplicit(JavaRDD<Rating> trainingDataset, int rank, int noOfIterations,
double regularizationParameter, double confidenceParameter,
int noOfBlocks) {
return ALS.trainImplicit(trainingDataset.rdd(), rank, noOfIterations, regularizationParameter, noOfBlocks,
confidenceParameter);
}
示例8: trainExplicit
import org.apache.spark.mllib.recommendation.ALS; //导入依赖的package包/类
/**
* This method uses alternating least squares (ALS) algorithm to train a matrix factorization model given an JavaRDD
* of ratings given by users to some products.
*
* @param trainingDataset Training dataset as a JavaRDD of Ratings
* @param rank Number of latent factors
* @param noOfIterations Number of iterations
* @param regularizationParameter Regularization parameter
* @param noOfBlocks Level of parallelism (auto configure = -1)
* @return Matrix factorization model
*/
public MatrixFactorizationModel trainExplicit(JavaRDD<Rating> trainingDataset, int rank, int noOfIterations,
double regularizationParameter, int noOfBlocks) {
return ALS.train(trainingDataset.rdd(), rank, noOfIterations, regularizationParameter, noOfBlocks);
}