本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.unpersist方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.unpersist方法的具体用法?Java JavaRDD.unpersist怎么用?Java JavaRDD.unpersist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaRDD
的用法示例。
在下文中一共展示了JavaRDD.unpersist方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* @param args
*/
public static void main(String[] args) {
//C:\Users\sumit.kumar\Downloads\bin\warehouse
//System.setProperty("hadoop.home.dir", "C:\\Users\\sumit.kumar\\Downloads");
String logFile = "src/main/resources/Apology_by_Plato.txt"; // Should be some file on your system
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setMaster("local").setAppName("ActionExamples").set("spark.hadoop.validateOutputSpecs", "false");
JavaSparkContext sparkContext = new JavaSparkContext(conf);
JavaRDD<Integer> rdd = sparkContext.parallelize(Arrays.asList(1, 2, 3,4,5),3).cache();
JavaRDD<Integer> evenRDD= rdd.filter(new org.apache.spark.api.java.function.Function<Integer, Boolean>() {
@Override
public Boolean call(Integer v1) throws Exception {
return ((v1%2)==0)?true:false;
}
});
evenRDD.persist(StorageLevel.MEMORY_AND_DISK());
evenRDD.foreach(new VoidFunction<Integer>() {
@Override
public void call(Integer t) throws Exception {
System.out.println("The value of RDD are :"+t);
}
});
//unpersisting the RDD
evenRDD.unpersist();
rdd.unpersist();
/* JavaRDD<String> lines = spark.read().textFile(logFile).javaRDD().cache();
System.out.println("DEBUG: \n"+ lines.toDebugString());
long word= lines.count();
JavaRDD<String> distinctLines=lines.distinct();
System.out.println("DEBUG: \n"+ distinctLines.toDebugString());
JavaRDD<String> finalRdd=lines.subtract(distinctLines);
System.out.println("DEBUG: \n"+ finalRdd.toDebugString());
System.out.println("The count is "+word);
System.out.println("The count is "+distinctLines.count());
System.out.println("The count is "+finalRdd.count());
finalRdd.foreach(new VoidFunction<String>() {
@Override
public void call(String t) throws Exception {
// TODO Auto-generated method stub
System.out.println(t);
}
});
*/ /*SparkConf conf = new SparkConf().setAppName("Simple Application");
JavaSparkContext sc = new JavaSparkContext(conf);
StorageLevel newLevel;
JavaRDD<String> logData = sc.textFile(logFile).cache();
long numAs = logData.filter(new Function(logFile, logFile, logFile, logFile, false) {
public Boolean call(String s) { return s.contains("a"); }
}).count();
long numBs = logData.filter(new Function(logFile, logFile, logFile, logFile, false) {
public Boolean call(String s) { return s.contains("b"); }
}).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
sc.stop();*/
}
示例2: buildModel
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public PMML buildModel(JavaSparkContext sparkContext,
JavaRDD<String> trainData,
List<?> hyperParameters,
Path candidatePath) {
int features = (Integer) hyperParameters.get(0);
double lambda = (Double) hyperParameters.get(1);
double alpha = (Double) hyperParameters.get(2);
double epsilon = Double.NaN;
if (logStrength) {
epsilon = (Double) hyperParameters.get(3);
}
Preconditions.checkArgument(features > 0);
Preconditions.checkArgument(lambda >= 0.0);
Preconditions.checkArgument(alpha > 0.0);
if (logStrength) {
Preconditions.checkArgument(epsilon > 0.0);
}
JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN);
parsedRDD.cache();
Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true);
Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false);
log.info("Broadcasting ID-index mappings for {} users, {} items",
userIDIndexMap.size(), itemIDIndexMap.size());
Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap);
Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap);
JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex);
trainRatingData = aggregateScores(trainRatingData, epsilon);
ALS als = new ALS()
.setRank(features)
.setIterations(iterations)
.setLambda(lambda)
.setCheckpointInterval(5);
if (implicit) {
als = als.setImplicitPrefs(true).setAlpha(alpha);
}
RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd();
trainingRatingDataRDD.cache();
MatrixFactorizationModel model = als.run(trainingRatingDataRDD);
trainingRatingDataRDD.unpersist(false);
bUserIDToIndex.unpersist();
bItemIDToIndex.unpersist();
parsedRDD.unpersist();
Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap));
Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));
PMML pmml = mfModelToPMML(model,
features,
lambda,
alpha,
epsilon,
implicit,
logStrength,
candidatePath,
bUserIndexToID,
bItemIndexToID);
unpersist(model);
bUserIndexToID.unpersist();
bItemIndexToID.unpersist();
return pmml;
}
示例3: evaluate
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public double evaluate(JavaSparkContext sparkContext,
PMML model,
Path modelParentPath,
JavaRDD<String> testData,
JavaRDD<String> trainData) {
JavaRDD<String[]> parsedTestRDD = testData.map(MLFunctions.PARSE_FN);
parsedTestRDD.cache();
Map<String,Integer> userIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, true);
Map<String,Integer> itemIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, false);
log.info("Broadcasting ID-index mappings for {} users, {} items",
userIDToIndex.size(), itemIDToIndex.size());
Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDToIndex);
Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDToIndex);
JavaRDD<Rating> testRatingData = parsedToRatingRDD(parsedTestRDD, bUserIDToIndex, bItemIDToIndex);
double epsilon = Double.NaN;
if (logStrength) {
epsilon = Double.parseDouble(AppPMMLUtils.getExtensionValue(model, "epsilon"));
}
testRatingData = aggregateScores(testRatingData, epsilon);
MatrixFactorizationModel mfModel =
pmmlToMFModel(sparkContext, model, modelParentPath, bUserIDToIndex, bItemIDToIndex);
parsedTestRDD.unpersist();
double eval;
if (implicit) {
double auc = Evaluation.areaUnderCurve(sparkContext, mfModel, testRatingData);
log.info("AUC: {}", auc);
eval = auc;
} else {
double rmse = Evaluation.rmse(mfModel, testRatingData);
log.info("RMSE: {}", rmse);
eval = -rmse;
}
unpersist(mfModel);
bUserIDToIndex.unpersist();
bItemIDToIndex.unpersist();
return eval;
}