当前位置: 首页>>代码示例>>Java>>正文


Java JavaSparkContext.parallelizePairs方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaSparkContext.parallelizePairs方法的典型用法代码示例。如果您正苦于以下问题:Java JavaSparkContext.parallelizePairs方法的具体用法?Java JavaSparkContext.parallelizePairs怎么用?Java JavaSparkContext.parallelizePairs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaSparkContext的用法示例。


在下文中一共展示了JavaSparkContext.parallelizePairs方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) {

		SparkSession sparkSession = SparkSession.builder().master("local").appName("My App")
				.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();

		JavaSparkContext jsc = new JavaSparkContext(sparkSession.sparkContext());

		JavaPairRDD<String, String> userIdToCityId = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<String, String>("1", "101"), new Tuple2<String, String>("2", "102"),
						new Tuple2<String, String>("3", "107"), new Tuple2<String, String>("4", "103"),
						new Tuple2<String, String>("11", "101"), new Tuple2<String, String>("12", "102"),
						new Tuple2<String, String>("13", "107"), new Tuple2<String, String>("14", "103")));

		JavaPairRDD<String, String> cityIdToCityName = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<String, String>("101", "India"), new Tuple2<String, String>("102", "UK"),
						new Tuple2<String, String>("103", "Germany"), new Tuple2<String, String>("107", "USA")));

		Broadcast<Map<String, String>> citiesBroadcasted = jsc.broadcast(cityIdToCityName.collectAsMap());

		JavaRDD<Tuple3<String, String, String>> joined = userIdToCityId.map(
				v1 -> new Tuple3<String, String, String>(v1._1(), v1._2(), citiesBroadcasted.value().get(v1._2())));

		System.out.println(joined.collect());

	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:26,代码来源:MapSideJoinBroadcast.java

示例2: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) {
	System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
	SparkConf conf = new SparkConf().setMaster("local").setAppName("Partitioning");
	JavaSparkContext jsc = new JavaSparkContext(conf);
	
	 JavaPairRDD<String, String> pairRdd = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<String, String>("India", "Asia"),new Tuple2<String, String>("Germany", "Europe"),
						new Tuple2<String, String>("Japan", "Asia"),new Tuple2<String, String>("France", "Europe"))
						,3);
	 
	 
	 JavaPairRDD<String, String> customPartitioned = pairRdd.partitionBy(new CustomPartitioner());
	 
	 System.out.println(customPartitioned.getNumPartitions());
	 
	 
	 JavaRDD<String> mapPartitionsWithIndex = customPartitioned.mapPartitionsWithIndex((index, tupleIterator) -> {
			
			List<String> list=new ArrayList<>();
			
			while(tupleIterator.hasNext()){
				list.add("Partition number:"+index+",key:"+tupleIterator.next()._1());
			}
			
			return list.iterator();
		}, true);
		
		 System.out.println(mapPartitionsWithIndex.collect());
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:30,代码来源:CustomPartitionerExample.java

示例3: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("Partitioning");
		JavaSparkContext jsc = new JavaSparkContext(conf);

		JavaPairRDD<Integer, String> pairRdd = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<Integer, String>(1, "A"),new Tuple2<Integer, String>(2, "B"),
						new Tuple2<Integer, String>(3, "C"),new Tuple2<Integer, String>(4, "D"),
						new Tuple2<Integer, String>(5, "E"),new Tuple2<Integer, String>(6, "F"),
						new Tuple2<Integer, String>(7, "G"),new Tuple2<Integer, String>(8, "H")),3);
		
		
		
		
		RDD<Tuple2<Integer, String>> rdd = JavaPairRDD.toRDD(pairRdd);
		
		System.out.println(pairRdd.getNumPartitions());
//		JavaPairRDD<Integer, String> hashPartitioned = pairRdd.partitionBy(new HashPartitioner(2));
//		
//		System.out.println(hashPartitioned.getNumPartitions());
		
		
		
		RangePartitioner rangePartitioner = new RangePartitioner(4, rdd, true, scala.math.Ordering.Int$.MODULE$ , scala.reflect.ClassTag$.MODULE$.apply(Integer.class));
				
		JavaPairRDD<Integer, String> rangePartitioned = pairRdd.partitionBy(rangePartitioner);
		
		
		 JavaRDD<String> mapPartitionsWithIndex = rangePartitioned.mapPartitionsWithIndex((index, tupleIterator) -> {
				
			List<String> list=new ArrayList<>();
			
			while(tupleIterator.hasNext()){
				list.add("Partition number:"+index+",key:"+tupleIterator.next()._1());
			}
			
			return list.iterator();
		}, true);
		
		 System.out.println(mapPartitionsWithIndex.collect());
		 
		 
		 
		
		 
		 
		 
		 
	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:50,代码来源:Partitioning.java


注:本文中的org.apache.spark.api.java.JavaSparkContext.parallelizePairs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。