本文整理汇总了Java中org.apache.spark.util.LongAccumulator类的典型用法代码示例。如果您正苦于以下问题:Java LongAccumulator类的具体用法?Java LongAccumulator怎么用?Java LongAccumulator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
LongAccumulator类属于org.apache.spark.util包,在下文中一共展示了LongAccumulator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: evaluateBlockingResults
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Compute precision, recall, f-measure of the input results, given the ground truth.
* The input RDDs should be in the same format (negative entity Id, positive entity Id).
* @param blockingResults the blocking results in the form (-entityId, +entityId)
* @param groundTruth the ground truth in the form (-entityId, +entityId)
* @param TPs true positives to update (true matches)
* @param FPs false positives to update (false matches)
* @param FNs false negatives to update (missed matches)
*/
public void evaluateBlockingResults(JavaPairRDD<Integer,IntArrayList> blockingResults, JavaPairRDD<Integer,Integer> groundTruth, LongAccumulator TPs, LongAccumulator FPs, LongAccumulator FNs, boolean verbose) {
blockingResults
.fullOuterJoin(groundTruth)
.foreach(joinedMatch -> {
IntArrayList myCandidates = joinedMatch._2()._1().orElse(null);
Integer correctResult = joinedMatch._2()._2().orElse(null);
if (myCandidates == null) { //this means that the correct result is not null (otherwise, nothing to join here)
FNs.add(1); //missed match
if (verbose) {
System.out.println("FN: Did not provide any match for "+joinedMatch._1());
}
} else if (correctResult == null) {
FPs.add(myCandidates.size()); //each candidate is a false match (no candidate should exist)
} else if (myCandidates.contains(correctResult)) {
TPs.add(1); //true match
FPs.add(myCandidates.size()-1); //the rest are false matches (ideal: only one candidate suggested)
} else { //then the correct result is not included in my candidates => I missed this match and all my candidates are wrong
FPs.add(myCandidates.size()); //all my candidates were false
FNs.add(1); //the correct match was missed
if (verbose) {
System.out.println("FN: Provided false matches "+myCandidates+" for "+joinedMatch._1()+". The correct results was "+correctResult);
}
}
});
}
示例2: getTruePositivesEntityIds
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Compute precision, recall, f-measure of the input results, given the ground truth and return the negative ids of found matches.
* The input RDDs should be in the same format (negative entity Id, positive entity Id).
* @param blockingResults the blocking results in the form (-entityId, +entityId)
* @param groundTruth the ground truth in the form (-entityId, +entityId)
* @param TPs true positives to update (true matches)
* @param FPs false positives to update (false matches)
* @param FNs false negatives to update (missed matches)
* @return the negative ids from found matches.
*/
public JavaRDD<Integer> getTruePositivesEntityIds(JavaPairRDD<Integer,IntArrayList> blockingResults, JavaPairRDD<Integer,Integer> groundTruth, LongAccumulator TPs, LongAccumulator FPs, LongAccumulator FNs) {
return blockingResults
.fullOuterJoin(groundTruth)
.map(joinedMatch -> {
IntArrayList myCandidates = joinedMatch._2()._1().orElse(null);
Integer correctResult = joinedMatch._2()._2().orElse(null);
if (myCandidates == null) { //this means that the correct result is not null (otherwise, nothing to join here)
FNs.add(1); //missed match
return null;
} else if (correctResult == null) {
FPs.add(myCandidates.size()); //each candidate is a false match (no candidate should exist)
return null;
} else if (myCandidates.contains(correctResult)) {
TPs.add(1); //true match
FPs.add(myCandidates.size()-1); //the rest are false matches (ideal: only one candidate suggested)
return joinedMatch._1(); //this entity contains the correct match in its list of candidates
} else { //then the correct result is not included in my candidates => I missed this match and all my candidates are wrong
FPs.add(myCandidates.size()); //all my candidates were wrong
FNs.add(1); //the correct match was missed
return null;
}
}).filter(x -> x != null);
}
示例3: getTruePositivesEntityIdsNEW
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Compute precision, recall, f-measure of the input results, given the ground truth and return the negative ids of found matches.
* The input RDDs should be in the same format (negative entity Id, positive entity Id).
* @param blockingResults the blocking results in the form (-entityId, +entityId)
* @param groundTruth the ground truth in the form (-entityId, +entityId)
* @param TPs true positives to update (true matches)
* @param FPs false positives to update (false matches)
* @param FNs false negatives to update (missed matches)
* @return the negative ids from found matches.
*/
public JavaRDD<Integer> getTruePositivesEntityIdsNEW(JavaPairRDD<Integer,IntArrayList> blockingResults, JavaPairRDD<Integer,Integer> groundTruth, LongAccumulator TPs, LongAccumulator FPs, LongAccumulator FNs) {
return blockingResults
.rightOuterJoin(groundTruth) //keep only ground truth matches, ignore other FPs
.map(joinedMatch -> {
IntArrayList myCandidates = joinedMatch._2()._1().orElse(null);
Integer correctResult = joinedMatch._2()._2();
if (myCandidates == null) { //this means that the correct result is not null (otherwise, nothing to join here)
FNs.add(1); //missed match
return null;
/*} else if (correctResult == null) {
FPs.add(myCandidates.size()); //each candidate is a false match (no candidate should exist)
return null; */
} else if (myCandidates.contains(correctResult)) {
TPs.add(1); //true match
FPs.add(myCandidates.size()-1); //the rest are false matches (ideal: only one candidate suggested)
return joinedMatch._1(); //this entity contains the correct match in its list of candidates
} else { //then the correct result is not included in my candidates => I missed this match and all my candidates are wrong
FPs.add(myCandidates.size()); //all my candidates were wrong
FNs.add(1); //the correct match was missed
return null;
}
}).filter(x -> x != null);
}
示例4: evaluateResults
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Compute precision, recall, f-measure of the input results, given the ground truth.
* The input RDDs should be in the same format (negative entity Id, positive entity Id).
* This is a void method, as it only changes the accumulator values.
* @param results the matching results in the form (-entityId, +entityId)
* @param groundTruth the ground truth in the form (-entityId, +entityId)
* @param TPs
* @param FPs
* @param FNs
*/
public void evaluateResults(JavaPairRDD<Integer,Integer> results, JavaPairRDD<Integer,Integer> groundTruth, LongAccumulator TPs, LongAccumulator FPs, LongAccumulator FNs) {
results
.fullOuterJoin(groundTruth)
.foreach(joinedMatch -> {
Integer myResult = joinedMatch._2()._1().orElse(null);
Integer correctResult = joinedMatch._2()._2().orElse(null);
if (myResult == null) {
FNs.add(1); //missed match
} else if (correctResult == null) {
FPs.add(1); //wrong match
} else if (myResult.equals(correctResult)) {
TPs.add(1); //true match
} else { //then I gave a different result than the correct match
FPs.add(1); //my result was wrong
FNs.add(1); //the correct match was missed
}
});
}
示例5: evaluateResultsNEW
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Compute precision, recall, f-measure of the input results, given the ground truth.
* The input RDDs should be in the same format (negative entity Id, positive entity Id).
* This is a void method, as it only changes the accumulator values.
* @param results the matching results in the form (-entityId, +entityId)
* @param groundTruth the ground truth in the form (-entityId, +entityId)
* @param TPs
* @param FPs
* @param FNs
*/
public void evaluateResultsNEW(JavaPairRDD<Integer,Integer> results, JavaPairRDD<Integer,Integer> groundTruth, LongAccumulator TPs, LongAccumulator FPs, LongAccumulator FNs) {
results
.rightOuterJoin(groundTruth)
.foreach(joinedMatch -> {
Integer myResult = joinedMatch._2()._1().orElse(null);
Integer correctResult = joinedMatch._2()._2();
if (myResult == null) {
FNs.add(1); //missed match
/*} else if (correctResult == null) {
FPs.add(1); //wrong match
*/
} else if (myResult.equals(correctResult)) {
TPs.add(1); //true match
} else { //then I gave a different result than the correct match
FPs.add(1); //my result was wrong
FNs.add(1); //the correct match was missed
}
});
}
示例6: writeRDDtoHDFS
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS( RDDObject rdd, String path, OutputInfo oinfo )
{
JavaPairRDD<MatrixIndexes,MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
//piggyback nnz maintenance on write
LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
//save file is an action which also triggers nnz maintenance
lrdd.saveAsHadoopFile(path,
oinfo.outputKeyClass,
oinfo.outputValueClass,
oinfo.outputFormatClass);
//return nnz aggregate of all blocks
return aNnz.value();
}
示例7: RemoteDPParForSparkWorker
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
public RemoteDPParForSparkWorker(String program, HashMap<String, byte[]> clsMap, String inputVar, String iterVar,
boolean cpCaching, MatrixCharacteristics mc, boolean tSparseCol, PartitionFormat dpf, OutputInfo oinfo,
LongAccumulator atasks, LongAccumulator aiters)
throws DMLRuntimeException
{
_prog = program;
_clsMap = clsMap;
_caching = cpCaching;
_inputVar = inputVar;
_iterVar = iterVar;
_oinfo = oinfo;
//setup spark accumulators
_aTasks = atasks;
_aIters = aiters;
//setup matrix block partition meta data
_rlen = (int)dpf.getNumRows(mc);
_clen = (int)dpf.getNumColumns(mc);
_brlen = mc.getRowsPerBlock();
_bclen = mc.getColsPerBlock();
_tSparseCol = tSparseCol;
_dpf = dpf._dpf;
}
示例8: run
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
public JavaPairRDD<Integer, IntArrayList> run(JavaRDD<String> blockingInput, LongAccumulator BLOCK_ASSIGNMENTS) {
JavaPairRDD<Integer,IntArrayList> parsedBlocks = parseBlockCollection(blockingInput);
JavaPairRDD<Integer,Tuple2<Integer,Integer>> entityBlocks = getEntityBlocksAdvanced(parsedBlocks);
JavaPairRDD<Integer, IntArrayList> entityIndex = getEntityIndex(entityBlocks, BLOCK_ASSIGNMENTS);
parsedBlocks.unpersist();
return entityIndex;
}
示例9: getEntityIndex
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
private JavaPairRDD<Integer, IntArrayList> getEntityIndex(JavaPairRDD<Integer,Tuple2<Integer,Integer>> entityBlocks, LongAccumulator BLOCK_ASSIGNMENTS) {
System.out.println("Creating the entity index...");
return entityBlocks.groupByKey()
.mapValues(blocks -> {
//sort the tuples by value (inverseUtility)
PriorityQueue<ComparableIntFloatPair> inverseBlocks = new PriorityQueue<>();
int numBlocks = 0;
for (Tuple2<Integer,Integer> block : blocks) {
inverseBlocks.add(new ComparableIntFloatPair(block._1(), block._2()));
numBlocks++;
}
final int MAX_BLOCKS = (int) Math.round(0.8 * numBlocks);
//keep MAX_BLOCKS blocks per entity
IntArrayList entityIndex = new IntArrayList();
int indexedBlocks = 0;
while (!inverseBlocks.isEmpty()) {
int blockId = inverseBlocks.poll().getEntityId();
entityIndex.add(blockId);
if (++indexedBlocks == MAX_BLOCKS) { break;} //comment-out this line to skip block filtering
}
BLOCK_ASSIGNMENTS.add(entityIndex.size());
return entityIndex;
});
}
示例10: getTopCandidatePerEntity
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Aggregates the two lists of candidate matches per entity using Borda, and returns the top-1 aggregate candidate match per entity.
* @param topKValueCandidates the top candidate matches per entity based on values, in the form: key: entityId, value: map of [candidateMatch, valueSim(entityId,candidateMatch)]
* @param topKNeighborCandidates the top candidate matches per entity based on neighbors, in the form: key: entityId, value: ranked list of [candidateMatch]
* @param LISTS_WITH_COMMON_CANDIDATES
* @return the top-1 aggregate candidate match per entity
*/
public JavaPairRDD<Integer,Integer> getTopCandidatePerEntity(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKValueCandidates, JavaPairRDD<Integer, IntArrayList> topKNeighborCandidates, LongAccumulator LISTS_WITH_COMMON_CANDIDATES) {
return topKValueCandidates
.mapValues(x -> new IntArrayList(Utils.sortByValue(x, true).keySet())) //sort the int2floatopenhashmap and get the keys (entityIds) sorted by values (value similarity) (descending)
.fullOuterJoin(topKNeighborCandidates)
.mapValues(x -> top1Borda(x, LISTS_WITH_COMMON_CANDIDATES))
.filter((x -> x._2() != null));
}
示例11: getTopKCandidatesPerEntity
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Aggregates the two lists of candidate matches per entity using Borda, and returns the top-1 aggregate candidate match per entity.
* @param topKValueCandidates the top candidate matches per entity based on values, in the form: key: entityId, value: map of [candidateMatch, valueSim(entityId,candidateMatch)]
* @param topKNeighborCandidates the top candidate matches per entity based on neighbors, in the form: key: entityId, value: ranked list of [candidateMatch]
* @param LISTS_WITH_COMMON_CANDIDATES
* @param K how many candidates to keep per entity
* @return the top-K aggregate candidate match per entity
*/
public JavaPairRDD<Integer,IntArrayList> getTopKCandidatesPerEntity(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKValueCandidates, JavaPairRDD<Integer, IntArrayList> topKNeighborCandidates, LongAccumulator LISTS_WITH_COMMON_CANDIDATES, int K,
LongAccumulator RESULTS_FROM_VALUES, LongAccumulator RESULTS_FROM_NEIGHBORS, LongAccumulator RESULTS_FROM_SUM,
LongAccumulator RESULTS_FROM_VALUES_WITHOUT_NEIGHBORS, LongAccumulator RESULTS_FROM_NEIGHBORS_WITHOUT_VALUES) {
return topKValueCandidates
.mapValues(x -> new IntArrayList(Utils.sortByValue(x, true).keySet())) //sort the int2floatopenhashmap and get the keys (entityIds) sorted by values (value similarity) (descending)
.fullOuterJoin(topKNeighborCandidates)
.mapValues(x -> topKBorda(x, LISTS_WITH_COMMON_CANDIDATES, K,
RESULTS_FROM_VALUES, RESULTS_FROM_NEIGHBORS, RESULTS_FROM_SUM, RESULTS_FROM_VALUES_WITHOUT_NEIGHBORS, RESULTS_FROM_NEIGHBORS_WITHOUT_VALUES))
.filter((x -> x._2() != null));
}
示例12: getTopKCandidatesPerEntityDEBUGGING
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Aggregates the two lists of candidate matches per entity using Borda, and returns the top-1 aggregate candidate match per entity.
* @param topKValueCandidates the top candidate matches per entity based on values, in the form: key: entityId, value: map of [candidateMatch, valueSim(entityId,candidateMatch)]
* @param topKNeighborCandidates the top candidate matches per entity based on neighbors, in the form: key: entityId, value: ranked list of [candidateMatch]
* @param LISTS_WITH_COMMON_CANDIDATES
* @param K how many candidates to keep per entity
* @return the top-K aggregate candidate match per entity
*/
public JavaPairRDD<Integer,PriorityQueue<ComparableIntFloatPairDUMMY>> getTopKCandidatesPerEntityDEBUGGING(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKValueCandidates, JavaPairRDD<Integer, IntArrayList> topKNeighborCandidates, LongAccumulator LISTS_WITH_COMMON_CANDIDATES, int K,
LongAccumulator RESULTS_FROM_VALUES, LongAccumulator RESULTS_FROM_NEIGHBORS, LongAccumulator RESULTS_FROM_SUM,
LongAccumulator RESULTS_FROM_VALUES_WITHOUT_NEIGHBORS, LongAccumulator RESULTS_FROM_NEIGHBORS_WITHOUT_VALUES) {
return topKValueCandidates
.mapValues(x -> new IntArrayList(Utils.sortByValue(x, true).keySet())) //sort the int2floatopenhashmap and get the keys (entityIds) sorted by values (value similarity) (descending)
.fullOuterJoin(topKNeighborCandidates)
.mapValues(x -> topKBordaDEBUGGING(x, LISTS_WITH_COMMON_CANDIDATES, K,
RESULTS_FROM_VALUES, RESULTS_FROM_NEIGHBORS, RESULTS_FROM_SUM, RESULTS_FROM_VALUES_WITHOUT_NEIGHBORS, RESULTS_FROM_NEIGHBORS_WITHOUT_VALUES))
.filter((x -> x._2() != null));
}
示例13: testRun
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
/**
* Test of run method, of class BlockFilteringAdvanced.
*/
@Test
public void testRun() {
System.out.println("getEntityBlocksAdvanced");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> result = instance.run(blockingInput, BLOCK_ASSIGNMENTS);
List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{2,1})));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2,1})));
JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
assertEquals((long)BLOCK_ASSIGNMENTS.value(), 15);
}
示例14: create
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
public static <A extends JavaRDDLike<?, ?>> VoidFunction<A> create(JavaStreamingContext jsc, long amount, String printf) {
final LongAccumulator stopAcc = jsc.ssc().sc().longAccumulator();
return rdd -> {
if (printf != null)
System.out.printf(printf, rdd.count());
if (rdd.count() == 0L) {
stopAcc.add(1L);
if (stopAcc.value() >= amount)
jsc.stop();
} else
stopAcc.reset();
};
}
示例15: main
import org.apache.spark.util.LongAccumulator; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-OffsetsToZK");
try (JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
LongAccumulator stopCondition = jsc.ssc().sc().longAccumulator();
JavaPairDStream<String, String> stream = dealWithOffsets(jsc);
final ParseXML parseXML = new ParseXML();
JavaPairDStream<String,ExampleXML> records = stream.mapToPair(
tuple -> new Tuple2<>(tuple._1(), parseXML.call(tuple._2())));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
if (rdd.count() > 0L) {
stopCondition.reset();
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis() - time) / 1000f);
} else {
stopCondition.add(1L);
if (stopCondition.value() >= 2L)
jsc.stop();
}
});
jsc.start();
jsc.awaitTermination();
}
}