当前位置: 首页>>代码示例>>Java>>正文


Java JavaPairRDD.collect方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.collect方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.collect方法的具体用法?Java JavaPairRDD.collect怎么用?Java JavaPairRDD.collect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaPairRDD的用法示例。


在下文中一共展示了JavaPairRDD.collect方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testParseBlockCollection

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of parseBlockCollection method, of class BlockFilteringAdvanced.
 */
@Test
public void testParseBlockCollection() {
    System.out.println("parseBlockCollection");
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    BlockFilteringAdvanced instance = new BlockFilteringAdvanced();        
    JavaPairRDD<Integer, IntArrayList> result = instance.parseBlockCollection(blockingInput);
    
    List<Tuple2<Integer,IntArrayList>> dummyBlocksParsed = new ArrayList<>();
    dummyBlocksParsed.add(new Tuple2<>(0, new IntArrayList(new int[]{1,2,3,4,5,-1,-2,-3,-4,-5})));
    dummyBlocksParsed.add(new Tuple2<>(1, new IntArrayList(new int[]{3,4,5,-1,-5})));
    dummyBlocksParsed.add(new Tuple2<>(2, new IntArrayList(new int[]{5,-5})));
    dummyBlocksParsed.add(new Tuple2<>(3, new IntArrayList(new int[]{5})));
    JavaPairRDD<Integer, IntArrayList> expResult = jsc.parallelizePairs(dummyBlocksParsed);
    
    List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
    List<Tuple2<Integer, IntArrayList>> expResultList = expResult.collect();
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    assertEquals(resultList, expResultList);
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:29,代码来源:BlockFilteringAdvancedTest.java

示例2: testRun

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of run method, of class BlockFilteringAdvanced.
 */
@Test
public void testRun() {
    System.out.println("getEntityBlocksAdvanced");
    
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
    
    BlockFilteringAdvanced instance = new BlockFilteringAdvanced();          
    JavaPairRDD<Integer, IntArrayList> result = instance.run(blockingInput, BLOCK_ASSIGNMENTS);
    
    List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
    expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{2,1})));
    expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2,1})));
    
    JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
    
    List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
    List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
    
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    
    assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    assertEquals((long)BLOCK_ASSIGNMENTS.value(), 15);
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:42,代码来源:BlockFilteringAdvancedTest.java

示例3: collect

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
private static Collection<Tuple2<String,String>> collect(JavaPairRDD<String,String> rdd) {
  if (rdd == null) {
    return Collections.emptyList();
  } else {
    return rdd.collect();
  }
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:8,代码来源:MockBatchUpdate.java

示例4: testGetMapOutputWJS

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of getMapOutputWJS method, of class CNPMapPhase.
 */
@Test
public void testGetMapOutputWJS() {
    System.out.println("getMapOutputWJS");
    
    System.out.println("blocks from entity index");
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
    
    BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();          
    JavaPairRDD<Integer, IntArrayList> entityIndex = blockFiltering.run(blockingInput, BLOCK_ASSIGNMENTS);
    
    BlocksFromEntityIndex bfei = new BlocksFromEntityIndex();
    LongAccumulator cleanBlocksAccum = jsc.sc().longAccumulator();
    LongAccumulator numComparisons = jsc.sc().longAccumulator();
    JavaPairRDD<Integer, IntArrayList> filteredBlocks = bfei.run(entityIndex, cleanBlocksAccum, numComparisons);
    
    List<Tuple2<Integer,IntArrayList>> tweakedBlocks = new ArrayList<>(filteredBlocks.collect());        
    tweakedBlocks.add(new Tuple2<>(-1, new IntArrayList(new int[]{-100}))); //this should not alter the results
    filteredBlocks = jsc.parallelizePairs(tweakedBlocks); 
    
    JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutputWJS(filteredBlocks);
    
    List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
    expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
    expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
    expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{4, -2, -1, -4, -3})));
    expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{4, -2, -1, -4, -3})));        
    expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
    expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{4, 4, 1, 3, 2})));        
    expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
    expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{4, 4, 1, 3, 2})));
    expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{3, -1, -5})));
    expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{3, -1, -5})));
    expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{3, -1, -5})));
    expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{2, 4, 3, 5})));
    expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2, 4, 3, 5})));
    expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{1, -5})));
    expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{1, 5})));
    
    JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
    
    List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
    List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
    
    //expResultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
    //resultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
    
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    
    assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:62,代码来源:EntityBasedCNPMapPhaseTest.java

示例5: testRun

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of run method, of class BlocksFromEntityIndex.
 */
@Test
public void testRun() {
    System.out.println("blocks from entity index");
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
    
    BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();          
    JavaPairRDD<Integer, IntArrayList> entityIndex = blockFiltering.run(blockingInput, BLOCK_ASSIGNMENTS);
    
    BlocksFromEntityIndex instance = new BlocksFromEntityIndex();
    LongAccumulator cleanBlocksAccum = jsc.sc().longAccumulator();
    LongAccumulator numComparisons = jsc.sc().longAccumulator();
    JavaPairRDD<Integer, IntArrayList> result = instance.run(entityIndex, cleanBlocksAccum, numComparisons);
    
    List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
    expResult.add(new Tuple2<>(0, new IntArrayList(new int[]{1,2,3,4,-1,-2,-3,-4})));
    expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{3,4,5,-1,-5})));
    expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{5,-5})));      
    
    JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
    
    List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
    List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
    
    expResultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
    resultList.stream().forEach(listItem -> Collections.sort(listItem._2()));
    
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    
    assertEquals((long)cleanBlocksAccum.value(), 3);
    assertEquals((long)numComparisons.value(), 23);
    assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    
    //assertEquals(expResultRDD, result);
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:45,代码来源:BlocksFromEntityIndexTest.java

示例6: testGetEntityBlocksAdvanced

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of getEntityBlocksAdvanced method, of class BlockFilteringAdvanced.
 * @throws java.lang.IllegalAccessException
 * @throws java.lang.reflect.InvocationTargetException
 * @throws java.lang.NoSuchMethodException
 */
@Test
public void testGetEntityBlocksAdvanced() throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException {
    System.out.println("getEntityBlocksAdvanced");
    
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    BlockFilteringAdvanced instance = new BlockFilteringAdvanced();        
    JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
    
    Method method = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityBlocksAdvanced", JavaPairRDD.class);
    method.setAccessible(true);
    
    JavaPairRDD<Integer, Tuple2<Integer, Integer>> result = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method.invoke(instance, parsedBlocks);        
    
    List<Tuple2<Integer,Tuple2<Integer,Integer>>> expResult = new ArrayList<>();
    expResult.add(new Tuple2<>(1, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(2, new Tuple2<>(0,5)));        
    expResult.add(new Tuple2<>(3, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(4, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(5, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(-1, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(-2, new Tuple2<>(0,5)));        
    expResult.add(new Tuple2<>(-3, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(-4, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(-5, new Tuple2<>(0,5)));
    expResult.add(new Tuple2<>(3, new Tuple2<>(1,3)));
    expResult.add(new Tuple2<>(4, new Tuple2<>(1,3)));
    expResult.add(new Tuple2<>(5, new Tuple2<>(1,3)));
    expResult.add(new Tuple2<>(-1, new Tuple2<>(1,3)));
    expResult.add(new Tuple2<>(-5, new Tuple2<>(1,3)));
    expResult.add(new Tuple2<>(5, new Tuple2<>(2,1)));
    expResult.add(new Tuple2<>(-5, new Tuple2<>(2,1)));
    //expResult.add(new Tuple2<>(5, new Tuple2<>(3,0))); //null result
    
    
    JavaPairRDD<Integer,Tuple2<Integer,Integer>> expResultRDD = jsc.parallelizePairs(expResult);
           
    List<Tuple2<Integer, Tuple2<Integer, Integer>>> resultList = result.collect();
    List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResultList = expResultRDD.collect();
    
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    
    assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:56,代码来源:BlockFilteringAdvancedTest.java

示例7: testGetEntityIndex

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
 * Test of getEntityIndex method, of class BlockFilteringAdvanced.
 */    
@Test
public void testGetEntityIndex() throws NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
    System.out.println("getEntityIndex");
    
    List<String> dummyBlocks = new ArrayList<>();
    dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
    dummyBlocks.add("1\t3#4#5#;-1#-5#");
    dummyBlocks.add("2\t5#;-5#");
    dummyBlocks.add("3\t5#;");
    JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
    BlockFilteringAdvanced instance = new BlockFilteringAdvanced();        
    JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
    Method method1 = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityBlocksAdvanced", JavaPairRDD.class);
    method1.setAccessible(true);        
    JavaPairRDD<Integer, Tuple2<Integer, Integer>> entityBlocks = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method1.invoke(instance, parsedBlocks);
    
    Method method2 = BlockFilteringAdvanced.class.getDeclaredMethod("getEntityIndex", JavaPairRDD.class, LongAccumulator.class);
    method2.setAccessible(true);        
    LongAccumulator BLOCK_ASSIGNMENTS = jsc.sc().longAccumulator();
    JavaPairRDD<Integer, IntArrayList> result = (JavaPairRDD<Integer, IntArrayList>) method2.invoke(instance, entityBlocks, BLOCK_ASSIGNMENTS);        
    
    //final int MAX_BLOCKS = ((Double)Math.floor(3*numBlocks/4+1)).intValue(); //|_ 3|Bi|/4+1 _| //preprocessing
    
    List<Tuple2<Integer,IntArrayList>> expResult = new ArrayList<>();
    expResult.add(new Tuple2<>(-2, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(4, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(-1, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(-5, new IntArrayList(new int[]{2,1})));
    expResult.add(new Tuple2<>(-4, new IntArrayList(new int[]{0})));        
    expResult.add(new Tuple2<>(1, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(-3, new IntArrayList(new int[]{0})));
    expResult.add(new Tuple2<>(3, new IntArrayList(new int[]{1,0})));
    expResult.add(new Tuple2<>(5, new IntArrayList(new int[]{2,1})));
    expResult.add(new Tuple2<>(2, new IntArrayList(new int[]{0})));
           
    JavaPairRDD<Integer,IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
    
    List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
    List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
    
    System.out.println("Result: "+Arrays.toString(resultList.toArray()));
    System.out.println("Expect: "+Arrays.toString(expResultList.toArray()));
    
    assertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));        
    assertEquals((long)BLOCK_ASSIGNMENTS.value(), 15);
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:50,代码来源:BlockFilteringAdvancedTest.java


注:本文中的org.apache.spark.api.java.JavaPairRDD.collect方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。