本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.unpersist方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.unpersist方法的具体用法?Java JavaPairRDD.unpersist怎么用?Java JavaPairRDD.unpersist使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaPairRDD
的用法示例。
在下文中一共展示了JavaPairRDD.unpersist方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public JavaPairRDD<Integer, IntArrayList> run(JavaRDD<String> blockingInput, LongAccumulator BLOCK_ASSIGNMENTS) {
JavaPairRDD<Integer,IntArrayList> parsedBlocks = parseBlockCollection(blockingInput);
JavaPairRDD<Integer,Tuple2<Integer,Integer>> entityBlocks = getEntityBlocksAdvanced(parsedBlocks);
JavaPairRDD<Integer, IntArrayList> entityIndex = getEntityIndex(entityBlocks, BLOCK_ASSIGNMENTS);
parsedBlocks.unpersist();
return entityIndex;
}
示例2: run
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* return a map of topN neighbors per entity (reversed to point to in-neighbors (values) having the key entity as their top out-neighbor)
* @param rawTriples
* @param SEPARATOR
* @param entityIdsRDD
* @param MIN_SUPPORT_THRESHOLD
* @param N topN neighbors per entity
* @param positiveIds
* @param jsc
* @return
*/
public Map<Integer,IntArrayList> run(JavaRDD<String> rawTriples, String SEPARATOR, JavaRDD<String> entityIdsRDD, float MIN_SUPPORT_THRESHOLD, int N, boolean positiveIds, JavaSparkContext jsc) {
//rawTriples.persist(StorageLevel.MEMORY_AND_DISK_SER());
//List<String> subjects = Utils.getEntityUrlsFromEntityRDDInOrder(rawTriples, SEPARATOR); //a list of (distinct) subject URLs, keeping insertion order (from original triples file)
//Object2IntOpenHashMap<String> subjects = Utils.getEntityIdsMapping(rawTriples, SEPARATOR);
Object2IntOpenHashMap<String> entityIds = Utils.readEntityIdsMapping(entityIdsRDD, positiveIds);
System.out.println("Found "+entityIds.size()+" entities in collection "+ (positiveIds?"1":"2"));
long numEntitiesSquared = (long)entityIds.keySet().size();
numEntitiesSquared *= numEntitiesSquared;
Broadcast<Object2IntOpenHashMap<String>> entityIds_BV = jsc.broadcast(entityIds);
JavaPairRDD<String,List<Tuple2<Integer, Integer>>> relationIndex = getRelationIndex(rawTriples, SEPARATOR, entityIds_BV); //a list of (s,o) for each predicate
//rawTriples.unpersist();
relationIndex.persist(StorageLevel.MEMORY_AND_DISK_SER());
List<String> relationsRank = getRelationsRank(relationIndex, MIN_SUPPORT_THRESHOLD, numEntitiesSquared);
System.out.println("Top-5 relations in collection "+(positiveIds?"1: ":"2: ")+Arrays.toString(relationsRank.subList(0, Math.min(5,relationsRank.size())).toArray()));
JavaPairRDD<Integer, IntArrayList> topOutNeighbors = getTopOutNeighborsPerEntity(relationIndex, relationsRank, N, positiveIds); //action
relationIndex.unpersist();
//reverse the outNeighbors, to get in neighbors
Map<Integer, IntArrayList> inNeighbors =
topOutNeighbors.flatMapToPair(x -> { //reverse the neighbor pairs from (in,[out1,out2,out3]) to (out1,in), (out2,in), (out3,in)
List<Tuple2<Integer,Integer>> inNeighbs = new ArrayList<>();
for (int outNeighbor : x._2()) {
inNeighbs.add(new Tuple2<>(outNeighbor, x._1()));
}
return inNeighbs.iterator();
})
.aggregateByKey(new IntOpenHashSet(),
(x,y) -> {x.add(y); return x;},
(x,y) -> {x.addAll(y); return x;})
.mapValues(x-> new IntArrayList(x))
.collectAsMap();
return inNeighbors;
}
示例3: main
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) {
String tmpPath;
String master;
String inputPath;
String outputPath;
if (args.length == 0) {
System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Documents\\hadoop_home"); //only for local mode
tmpPath = "/file:C:\\tmp";
master = "local[2]";
inputPath = "/file:C:\\Users\\VASILIS\\Documents\\OAEI_Datasets\\exportedBlocks\\testInput";
outputPath = "/file:C:\\Users\\VASILIS\\Documents\\OAEI_Datasets\\exportedBlocks\\testOutput";
} else {
tmpPath = "/file:/tmp/";
//master = "spark://master:7077";
inputPath = args[0];
outputPath = args[1];
// delete existing output directories
try {
Utils.deleteHDFSPath(outputPath);
} catch (IOException | URISyntaxException ex) {
Logger.getLogger(MetaBlockingOnlyValuesCBS.class.getName()).log(Level.SEVERE, null, ex);
}
}
String appName = "MetaBlocking CBS only values on "+inputPath.substring(inputPath.lastIndexOf("/", inputPath.length()-2)+1);
SparkSession spark = Utils.setUpSpark(appName, 288, 8, 3, tmpPath);
int PARALLELISM = spark.sparkContext().getConf().getInt("spark.default.parallelism", 152);
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
////////////////////////
//start the processing//
////////////////////////
//Block Filtering
System.out.println("\n\nStarting BlockFiltering, reading from "+inputPath);
LongAccumulator BLOCK_ASSIGNMENTS_ACCUM = jsc.sc().longAccumulator();
BlockFilteringAdvanced bf = new BlockFilteringAdvanced();
JavaPairRDD<Integer,IntArrayList> entityIndex = bf.run(jsc.textFile(inputPath), BLOCK_ASSIGNMENTS_ACCUM);
entityIndex.cache();
//Blocks From Entity Index
System.out.println("\n\nStarting BlocksFromEntityIndex...");
LongAccumulator CLEAN_BLOCK_ACCUM = jsc.sc().longAccumulator();
LongAccumulator NUM_COMPARISONS_ACCUM = jsc.sc().longAccumulator();
BlocksFromEntityIndex bFromEI = new BlocksFromEntityIndex();
JavaPairRDD<Integer, IntArrayList> blocksFromEI = bFromEI.run(entityIndex, CLEAN_BLOCK_ACCUM, NUM_COMPARISONS_ACCUM);
blocksFromEI.persist(StorageLevel.DISK_ONLY());
blocksFromEI.count(); //the simplest action just to run blocksFromEI and get the actual value for the counters below
double BCin = (double) BLOCK_ASSIGNMENTS_ACCUM.value() / entityIndex.count(); //BCin = average number of block assignments per entity
final int K = Math.max(1, ((Double)Math.floor(BCin)).intValue()); //K = |_BCin -1_|
System.out.println(BLOCK_ASSIGNMENTS_ACCUM.value()+" block assignments");
System.out.println(CLEAN_BLOCK_ACCUM.value()+" clean blocks");
System.out.println(NUM_COMPARISONS_ACCUM.value()+" comparisons");
System.out.println("BCin = "+BCin);
System.out.println("K = "+K);
entityIndex.unpersist();
//CNP
System.out.println("\n\nStarting CNP...");
CNPCBSValuesOnly cnp = new CNPCBSValuesOnly();
JavaPairRDD<Integer,IntArrayList> metablockingResults = cnp.run(blocksFromEI, K);
metablockingResults
.mapValues(x -> x.toString()).saveAsTextFile(outputPath); //only to see the output and add an action (saving to file may not be needed)
System.out.println("Job finished successfully. Output written in "+outputPath);
}