本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.collect方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.collect方法的具体用法?Java JavaPairRDD.collect怎么用?Java JavaPairRDD.collect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaPairRDD
的用法示例。
在下文中一共展示了JavaPairRDD.collect方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testParseBlockCollection
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of parseBlockCollection method, of class BlockFilteringAdvanced.
*/
@Test
public void testParseBlockCollection() {
System.out.println("parseBlockCollection");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> result = instance.parseBlockCollection(blockingInput);
List<Tuple2<Integer,IntArrayList>> dummyBlocksParsed = new ArrayList<>();
dummyBlocksParsed.add(new Tuple2<>(0, new IntArrayList(new int[]{1,2,3,4,5,-1,-2,-3,-4,-5})));
dummyBlocksParsed.add(new Tuple2<>(1, new IntArrayList(new int[]{3,4,5,-1,-5})));
dummyBlocksParsed.add(new Tuple2<>(2, new IntArrayList(new int[]{5,-5})));
dummyBlocksParsed.add(new Tuple2<>(3, new IntArrayList(new int[]{5})));
JavaPairRDD<Integer, IntArrayList> expResult = jsc.parallelizePairs(dummyBlocksParsed);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResult.collect();
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(resultList, expResultList);
}
示例2: testRun
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of run method, of class BlockFilteringAdvanced.
*/
@Test
public void testRun() {
System.out.println("getEntityBlocksAdvanced");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> result = instance.run(blockingInput, BLOCK_ASSIGNMENTS);
List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{2,1})));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2,1})));
JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
assertEquals((long)BLOCK_ASSIGNMENTS.value(), 15);
}
示例3: collect
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
private static Collection<Tuple2<String,String>> collect(JavaPairRDD<String,String> rdd) {
if (rdd == null) {
return Collections.emptyList();
} else {
return rdd.collect();
}
}
示例4: testGetMapOutputWJS
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of getMapOutputWJS method, of class CNPMapPhase.
*/
@Test
public void testGetMapOutputWJS() {
System.out.println("getMapOutputWJS");
System.out.println("blocks from entity index");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> entityIndex = blockFiltering.run(blockingInput, BLOCK_ASSIGNMENTS);
BlocksFromEntityIndex bfei = new BlocksFromEntityIndex();
LongAccumulator cleanBlocksAccum = jsc.sc().longAccumulator();
LongAccumulator numComparisons = jsc.sc().longAccumulator();
JavaPairRDD<Integer, IntArrayList> filteredBlocks = bfei.run(entityIndex, cleanBlocksAccum, numComparisons);
List<Tuple2<Integer,IntArrayList>> tweakedBlocks = new ArrayList<>(filteredBlocks.collect());
tweakedBlocks.add(new Tuple2<>(-1, new IntArrayList(new int[]{-100}))); //this should not alter the results
filteredBlocks = jsc.parallelizePairs(tweakedBlocks);
JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutputWJS(filteredBlocks);
List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{3, -1, -5})));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{3, -1, -5})));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{3, -1, -5})));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{2, 4, 3, 5})));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2, 4, 3, 5})));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{1, -5})));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{1, 5})));
JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
//expResultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
//resultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
}
示例5: testRun
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of run method, of class BlocksFromEntityIndex.
*/
@Test
public void testRun() {
System.out.println("blocks from entity index");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> entityIndex = blockFiltering.run(blockingInput, BLOCK_ASSIGNMENTS);
BlocksFromEntityIndex instance = new BlocksFromEntityIndex();
LongAccumulator cleanBlocksAccum = jsc.sc().longAccumulator();
LongAccumulator numComparisons = jsc.sc().longAccumulator();
JavaPairRDD<Integer, IntArrayList> result = instance.run(entityIndex, cleanBlocksAccum, numComparisons);
List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(0, new IntArrayList(new int[]{1,2,3,4,-1,-2,-3,-4})));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{3,4,5,-1,-5})));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{5,-5})));
JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
expResultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
resultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals((long)cleanBlocksAccum.value(), 3);
assertEquals((long)numComparisons.value(), 23);
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
//assertEquals(expResultRDD, result);
}
示例6: testGetEntityBlocksAdvanced
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of getEntityBlocksAdvanced method, of class BlockFilteringAdvanced.
* @throws java.lang.IllegalAccessException
* @throws java.lang.reflect.InvocationTargetException
* @throws java.lang.NoSuchMethodException
*/
@Test
public void testGetEntityBlocksAdvanced() throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException {
System.out.println("getEntityBlocksAdvanced");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
Method method = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityBlocksAdvanced", JavaPairRDD.class);
method.setAccessible(true);
JavaPairRDD<Integer, Tuple2<Integer, Integer>> result = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method.invoke(instance, parsedBlocks);
List<Tuple2<Integer,Tuple2<Integer,Integer>>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(1, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(2, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(3, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(4, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(5, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(-1, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(-2, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(-3, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(-4, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(0,5)));
expResult.add(new Tuple2<>(3, new Tuple2<>(1,3)));
expResult.add(new Tuple2<>(4, new Tuple2<>(1,3)));
expResult.add(new Tuple2<>(5, new Tuple2<>(1,3)));
expResult.add(new Tuple2<>(-1, new Tuple2<>(1,3)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(1,3)));
expResult.add(new Tuple2<>(5, new Tuple2<>(2,1)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(2,1)));
//expResult.add(new Tuple2<>(5, new Tuple2<>(3,0))); //null result
JavaPairRDD<Integer,Tuple2<Integer,Integer>> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, Tuple2<Integer, Integer>>> resultList = result.collect();
List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResultList = expResultRDD.collect();
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
}
示例7: testGetEntityIndex
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Test of getEntityIndex method, of class BlockFilteringAdvanced.
*/
@Test
public void testGetEntityIndex() throws NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
System.out.println("getEntityIndex");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
Method method1 = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityBlocksAdvanced", JavaPairRDD.class);
method1.setAccessible(true);
JavaPairRDD<Integer, Tuple2<Integer, Integer>> entityBlocks = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method1.invoke(instance, parsedBlocks);
Method method2 = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityIndex", JavaPairRDD.class, LongAccumulator.class);
method2.setAccessible(true);
LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
JavaPairRDD<Integer, IntArrayList> result = (JavaPairRDD<Integer, IntArrayList>) method2.invoke(instance, entityBlocks, BLOCK_ASSIGNMENTS);
//final int MAX_BLOCKS = ((Double)Math.floor(3*numBlocks/4+1)).intValue(); //|_ 3|Bi|/4+1 _| //preprocessing
List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2,1})));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{0})));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{1,0})));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{2,1})));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{0})));
JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
System.out.println("Result: "+Arrays.toString(resultList.toArray()));
System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
assertEquals((long)BLOCK_ASSIGNMENTS.value(), 15);
}