当前位置: 首页>>代码示例>>Java>>正文


Java JavaPairRDD.partitionBy方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.partitionBy方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.partitionBy方法的具体用法?Java JavaPairRDD.partitionBy怎么用?Java JavaPairRDD.partitionBy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaPairRDD的用法示例。


在下文中一共展示了JavaPairRDD.partitionBy方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parallizeUsers

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public JavaRDD<String> parallizeUsers(Map<String, Double> userDocs) {

    // prepare list for parallize
    List<Tuple2<String, Double>> list = new ArrayList<>();
    for (String user : userDocs.keySet()) {
      list.add(new Tuple2<String, Double>(user, userDocs.get(user)));
    }

    // group users
    ThePartitionProblemSolver solution = new KGreedyPartitionSolver();
    Map<String, Integer> userGroups = solution.solve(userDocs, this.partition);

    JavaPairRDD<String, Double> pairRdd = spark.sc.parallelizePairs(list);
    JavaPairRDD<String, Double> userPairRDD = pairRdd.partitionBy(new logPartitioner(userGroups, this.partition));

    // repartitioned user RDD
    return userPairRDD.keys();
  }
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:19,代码来源:LogAbstract.java

示例2: main

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("Partitioning");
		JavaSparkContext jsc = new JavaSparkContext(conf);

		JavaPairRDD<Integer, String> pairRdd = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<Integer, String>(1, "A"),new Tuple2<Integer, String>(2, "B"),
						new Tuple2<Integer, String>(3, "C"),new Tuple2<Integer, String>(4, "D"),
						new Tuple2<Integer, String>(5, "E"),new Tuple2<Integer, String>(6, "F"),
						new Tuple2<Integer, String>(7, "G"),new Tuple2<Integer, String>(8, "H")),3);
		
		
		
		
		RDD<Tuple2<Integer, String>> rdd = JavaPairRDD.toRDD(pairRdd);
		
		System.out.println(pairRdd.getNumPartitions());
//		JavaPairRDD<Integer, String> hashPartitioned = pairRdd.partitionBy(new HashPartitioner(2));
//		
//		System.out.println(hashPartitioned.getNumPartitions());
		
		
		
		RangePartitioner rangePartitioner = new RangePartitioner(4, rdd, true, scala.math.Ordering.Int$.MODULE$ , scala.reflect.ClassTag$.MODULE$.apply(Integer.class));
				
		JavaPairRDD<Integer, String> rangePartitioned = pairRdd.partitionBy(rangePartitioner);
		
		
		 JavaRDD<String> mapPartitionsWithIndex = rangePartitioned.mapPartitionsWithIndex((index, tupleIterator) -> {
				
			List<String> list=new ArrayList<>();
			
			while(tupleIterator.hasNext()){
				list.add("Partition number:"+index+",key:"+tupleIterator.next()._1());
			}
			
			return list.iterator();
		}, true);
		
		 System.out.println(mapPartitionsWithIndex.collect());
		 
		 
		 
		
		 
		 
		 
		 
	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:50,代码来源:Partitioning.java

示例3: main

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) {
	System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
	SparkConf conf = new SparkConf().setMaster("local").setAppName("Partitioning");
	JavaSparkContext jsc = new JavaSparkContext(conf);
	
	 JavaPairRDD<String, String> pairRdd = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<String, String>("India", "Asia"),new Tuple2<String, String>("Germany", "Europe"),
						new Tuple2<String, String>("Japan", "Asia"),new Tuple2<String, String>("France", "Europe"))
						,3);
	 
	 
	 JavaPairRDD<String, String> customPartitioned = pairRdd.partitionBy(new CustomPartitioner());
	 
	 System.out.println(customPartitioned.getNumPartitions());
	 
	 
	 JavaRDD<String> mapPartitionsWithIndex = customPartitioned.mapPartitionsWithIndex((index, tupleIterator) -> {
			
			List<String> list=new ArrayList<>();
			
			while(tupleIterator.hasNext()){
				list.add("Partition number:"+index+",key:"+tupleIterator.next()._1());
			}
			
			return list.iterator();
		}, true);
		
		 System.out.println(mapPartitionsWithIndex.collect());
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:30,代码来源:CustomPartitionerExample.java


注:本文中的org.apache.spark.api.java.JavaPairRDD.partitionBy方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。