本文整理汇总了Java中org.apache.spark.sql.SparkSession.sparkContext方法的典型用法代码示例。如果您正苦于以下问题:Java SparkSession.sparkContext方法的具体用法?Java SparkSession.sparkContext怎么用?Java SparkSession.sparkContext使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.SparkSession
的用法示例。
在下文中一共展示了SparkSession.sparkContext方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.sql.SparkSession; //导入方法依赖的package包/类
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));
Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
flightId, curFlightDetail, state) -> {
List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();
boolean isLanded = false;
if (curFlightDetail.isPresent()) {
details.add(curFlightDetail.get());
if (curFlightDetail.get().isLanded()) {
isLanded = true;
}
}
Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);
if (isLanded) {
state.remove();
} else {
state.update(details);
}
return new Tuple2<String, Double>(flightId, avgSpeed);
};
JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
streamWithState.print();
jssc.start();
jssc.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:52,代码来源:StateFulProcessingExample.java
示例2: main
import org.apache.spark.sql.SparkSession; //导入方法依赖的package包/类
public static void main(String[] args) {
SparkSession sparkSession = SparkSession.builder().master("local").appName("My App")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(sparkSession.sparkContext());
JavaPairRDD<String, String> userIdToCityId = jsc.parallelizePairs(
Arrays.asList(new Tuple2<String, String>("1", "101"), new Tuple2<String, String>("2", "102"),
new Tuple2<String, String>("3", "107"), new Tuple2<String, String>("4", "103"),
new Tuple2<String, String>("11", "101"), new Tuple2<String, String>("12", "102"),
new Tuple2<String, String>("13", "107"), new Tuple2<String, String>("14", "103")));
JavaPairRDD<String, String> cityIdToCityName = jsc.parallelizePairs(
Arrays.asList(new Tuple2<String, String>("101", "India"), new Tuple2<String, String>("102", "UK"),
new Tuple2<String, String>("103", "Germany"), new Tuple2<String, String>("107", "USA")));
Broadcast<Map<String, String>> citiesBroadcasted = jsc.broadcast(cityIdToCityName.collectAsMap());
JavaRDD<Tuple3<String, String, String>> joined = userIdToCityId.map(
v1 -> new Tuple3<String, String, String>(v1._1(), v1._2(), citiesBroadcasted.value().get(v1._2())));
System.out.println(joined.collect());
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:26,代码来源:MapSideJoinBroadcast.java
示例3: pushUdf
import org.apache.spark.sql.SparkSession; //导入方法依赖的package包/类
/**
* Pushes an "in_valueset" UDF that uses the given {@link BroadcastableValueSets} for its content.
*
* @param spark the spark session
* @param valueSets the valuesets to use in the UDF
*/
public static synchronized void pushUdf(SparkSession spark, BroadcastableValueSets valueSets) {
JavaSparkContext ctx = new JavaSparkContext(spark.sparkContext());
Broadcast<BroadcastableValueSets> broadcast = ctx.broadcast(valueSets);
spark.udf()
.register("in_valueset",
new InValuesetUdf(broadcast),
DataTypes.BooleanType);
// Push the broadcast variable
valueSetStack.push(broadcast);
}
示例4: main
import org.apache.spark.sql.SparkSession; //导入方法依赖的package包/类
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("stateless Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc = new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
//flightDetailsStream.print();
//flightDetailsStream.foreachRDD((VoidFunction<JavaRDD<FlightDetails>>) rdd -> rdd.saveAsTextFile("hdfs://namenode:port/path"));
JavaDStream<FlightDetails> window = flightDetailsStream.window(Durations.minutes(5),Durations.minutes(1));
JavaPairDStream<String, Double> transfomedWindow = window.mapToPair(f->new Tuple2<String,Double>(f.getFlightId(),f.getTemperature())).
mapValues(t->new Tuple2<Double,Integer>(t,1))
.reduceByKey((t1, t2) -> new Tuple2<Double, Integer>(t1._1()+t2._1(), t1._2()+t2._2())).mapValues(t -> t._1()/t._2());
transfomedWindow.cache();
transfomedWindow.print();
jssc.start();
jssc.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:34,代码来源:StateLessProcessingExample.java