本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.mapToPair方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.mapToPair方法的具體用法?Java JavaDStream.mapToPair怎麽用?Java JavaDStream.mapToPair使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.streaming.api.java.JavaDStream
的用法示例。
在下文中一共展示了JavaDStream.mapToPair方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例2: run
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public void run() throws IOException {
SparkConf conf = new SparkConf();
conf.setAppName(getAppName());
conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
JavaSparkUtil.packProjectJars(conf);
setupSparkConf(conf);
JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
List<JavaDStream<T>> streamsList = getStreamsList(ssc);
// Union all the streams if there is more than 1 stream
JavaDStream<T> streams = unionStreams(ssc, streamsList);
JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
public Tuple2<String, RowMutation> call(T t) {
RowMutation rowMutation = convert(t);
return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
}
});
pairDStream.foreachRDD(getFunction());
ssc.start();
ssc.awaitTermination();
}
示例3: publishToNats
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void publishToNats(final String subject1, final String subject2, final int partitionsNb) {
final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath()).repartition(partitionsNb);
JavaPairDStream<String, String> stream1 =
lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject1, str);
});
JavaPairDStream<String, String> stream2 =
lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject2, str);
});
final JavaPairDStream<String, String> stream = stream1.union(stream2);
if (logger.isDebugEnabled()) {
stream.print();
}
SparkToNatsConnectorPool
.newPool()
.withNatsURL(NATS_SERVER_URL)
.withConnectionTimeout(Duration.ofSeconds(2))
.publishToNatsAsKeyValue(stream);
}
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:24,代碼來源:KeyValueSparkToStandardNatsConnectorLifecycleTest.java
示例4: testEsRDDWriteWithDynamicMapping
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test
public void testEsRDDWriteWithDynamicMapping() throws Exception {
Map<String, Object> doc1 = new HashMap<>();
doc1.put("number", 3);
doc1.put("one", null);
Set<String> values = new HashSet<>();
values.add("2");
doc1.put("two", values);
doc1.put("three", ".");
Map<String, Object> doc2 = new HashMap<>();
doc2.put("number", 4);
doc2.put("OTP", "Otopeni");
doc2.put("SFO", "San Fran");
List<Map<String, Object>> docs = new ArrayList<>();
docs.add(doc1);
docs.add(doc2);
String target = wrapIndex("spark-streaming-test-scala-dyn-id-write/data");
JavaRDD<Map<String,Object>> batch = sc.parallelize(docs);
Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
rddQueue.add(batch);
JavaDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue);
JavaPairDStream<Integer, Map<String, Object>> metaDstream = dstream.mapToPair(new ExtractIDFunction());
JavaEsSparkStreaming.saveToEsWithMeta(metaDstream, target, cfg);
ssc.start();
TimeUnit.SECONDS.sleep(2);
ssc.stop(false, true);
assertEquals(2, JavaEsSpark.esRDD(sc, target).count());
assertTrue(RestUtils.exists(target + "/3"));
assertTrue(RestUtils.exists(target + "/4"));
assertThat(RestUtils.get(target + "/_search?"), containsString("SFO"));
}
示例5: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));
Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
flightId, curFlightDetail, state) -> {
List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();
boolean isLanded = false;
if (curFlightDetail.isPresent()) {
details.add(curFlightDetail.get());
if (curFlightDetail.get().isLanded()) {
isLanded = true;
}
}
Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);
if (isLanded) {
state.remove();
} else {
state.update(details);
}
return new Tuple2<String, Double>(flightId, avgSpeed);
};
JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
streamWithState.print();
jssc.start();
jssc.awaitTermination();
}
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:52,代碼來源:StateFulProcessingExample.java
示例6: performQuery
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to perform the query given an input JavaDStream of JSON
*
*/
public void performQuery(JavaDStream<MapWritable> input)
{
logger.info("Performing query: ");
// Process non-overlapping windows of data of duration windowLength seconds
// If we are using queue streams, there is no need to window
if (!useQueueStream)
{
input.window(Durations.seconds(windowLength), Durations.seconds(windowLength));
}
// Extract the selectors for each dataElement based upon the query type
// and perform a keyed hash of the selectors
JavaPairDStream<Integer,List<BigInteger>> selectorHashToDocRDD = input.mapToPair(new HashSelectorsAndPartitionData(bVars));
// Group by hashed selector (row) -- can combine with the line above, separating for testing and benchmarking...
JavaPairDStream<Integer,Iterable<List<BigInteger>>> selectorGroupRDD = selectorHashToDocRDD.groupByKey();
// Calculate the encrypted row values for each row, emit <colNum, colVal> for each row
JavaPairDStream<Long,BigInteger> encRowRDD = selectorGroupRDD.flatMapToPair(new EncRowCalc(accum, bVars));
// Multiply the column values by colNum: emit <colNum, finalColVal> and write the final result object
encryptedColumnCalc(encRowRDD);
// Start the streaming computation
start();
}
示例7: processPOIData
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to get the vehicles which are in radius of POI and their distance from POI.
*
* @param nonFilteredIotDataStream original IoT data stream
* @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor.
*/
public void processPOIData(JavaDStream<IoTData> nonFilteredIotDataStream,Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues) {
// Filter by routeId,vehicleType and in POI range
JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream
.filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2())
&& iot.getVehicleType().contains(broadcastPOIValues.value()._3())
&& GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()),
Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(),
broadcastPOIValues.value()._1().getLongitude(),
broadcastPOIValues.value()._1().getRadius())));
// pair with poi
JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered
.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));
// Transform to dstream of POITrafficData
JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc);
// Map Cassandra table column
Map<String, String> columnNameMappings = new HashMap<String, String>();
columnNameMappings.put("vehicleId", "vehicleid");
columnNameMappings.put("distance", "distance");
columnNameMappings.put("vehicleType", "vehicletype");
columnNameMappings.put("timeStamp", "timestamp");
// call CassandraStreamingJavaUtil function to save in DB
javaFunctions(trafficDStream)
.writerBuilder("traffickeyspace", "poi_traffic",CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings))
.withConstantTTL(120)//keeping data for 2 minutes
.saveToCassandra();
}
示例8: getJavaPairDStream
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static JavaPairDStream<String, String> getJavaPairDStream(final File tempDir, final JavaStreamingContext ssc, final String subject1) {
final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath());
JavaPairDStream<String, String> keyValues = lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject1 + "." + str, str);
});
return keyValues;
}
示例9: testEsRDDWriteWithDynamicMapping
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test
public void testEsRDDWriteWithDynamicMapping() throws Exception {
Map<String, Object> doc1 = new HashMap<>();
doc1.put("number", 3);
doc1.put("one", null);
Set<String> values = new HashSet<>();
values.add("2");
doc1.put("two", values);
doc1.put("three", ".");
Map<String, Object> doc2 = new HashMap<>();
doc2.put("number", 4);
doc2.put("OTP", "Otopeni");
doc2.put("SFO", "San Fran");
List<Map<String, Object>> docs = new ArrayList<>();
docs.add(doc1);
docs.add(doc2);
String target = wrapIndex("spark-test-scala-dyn-id-write/data");
JavaRDD<Map<String,Object>> batch = sc.parallelize(docs);
Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
rddQueue.add(batch);
JavaDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue);
JavaPairDStream<Integer, Map<String, Object>> metaDstream = dstream.mapToPair(new ExtractIDFunction());
JavaEsSparkStreaming.saveToEsWithMeta(metaDstream, target, cfg);
ssc.start();
TimeUnit.SECONDS.sleep(2);
ssc.stop(false, true);
assertEquals(2, JavaEsSpark.esRDD(sc, target).count());
assertTrue(RestUtils.exists(target + "/3"));
assertTrue(RestUtils.exists(target + "/4"));
assertThat(RestUtils.get(target + "/_search?"), containsString("SFO"));
}
示例10: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
final Pattern SPACE = Pattern.compile(" ");
SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
lines.print();
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Lists.newArrayList(SPACE.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
});
wordsDstream.print();
Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
};
JavaPairDStream<String, Integer> windowedWordCounts = wordsDstream.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));
windowedWordCounts.print();
ssc.start();
ssc.awaitTermination();
}
示例11: process
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Override
public void process(JavaPairInputDStream<String, String> messages) {
// DStream: sequence of RDDs that presents streams of data
// JavaDStream<String> lines = messages.map(
// new Function<Tuple2<String, String>, String>() {
// private static final long serialVersionUID = 9174430087884353818L;
//
// @Override
// public String call(Tuple2<String, String> tuple2) {
// return tuple2._2();
// }
// }).cache();
// JavaPairDStream<String, Integer> wordCounts = lines.mapToPair(
// new PairFunction<String, String, Integer>() {
//
// private static final long serialVersionUID = -5361351005611686720L;
//
// @Override
// public Tuple2<String, Integer> call(String s)
// throws Exception {
// return new Tuple2<String, Integer>(s, 1);
// }
//
// }).reduceByKey(new Function2<Integer, Integer, Integer>() {
// private static final long serialVersionUID = 1597536134161007070L;
//
// @Override
// public Integer call(Integer count1, Integer count2)
// throws Exception {
// return count1 + count2;
// }
// });
// Using lambda in Java 8
// take only the values
JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
// JavaDStream<Integer> ints = messages.map(tuple2 ->
// Integer.parseInt(tuple2._2()));
JavaPairDStream<String, Integer> wordCounts = lines
.mapToPair(line -> new Tuple2<String, Integer>(line, 1));
wordCounts.reduceByKey((val1, val2) -> val1 + val2);
wordCounts.print();
// we can specify the window and the sliding interval
// lines.window(windowDuration)
// wordCounts.print();
}
示例12: networkWordCount
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
private static final void networkWordCount() {
/* StreamingContext with two threads and batch interval of 1 second */
final SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
final JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(1));
/* Create a DStream that will connect to localhost:9999 */
final JavaReceiverInputDStream<String> lines = jssc.socketTextStream("localhost", 9999);
/* Split each line into words */
final JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
private static final long serialVersionUID = 1L;
@Override
public final Iterable<String> call(final String x) {
return Arrays.asList(x.split(" "));
}
});
/* Count each word in each batch */
final JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public final Tuple2<String, Integer> call(final String s) {
return new Tuple2<String, Integer>(s, 1);
}
});
final JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public final Integer call(final Integer i1, final Integer i2) {
return i1 + i2;
}
});
/* Print the first ten elements of each RDD generated in this DStream to the console */
wordCounts.print();
jssc.start(); // Start the computation
jssc.awaitTermination(); // Wait for the computation to terminate
if (jssc != null) {
jssc.close();
}
}
示例13: testEsRDDWriteWithDynamicMapMapping
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test
public void testEsRDDWriteWithDynamicMapMapping() throws Exception {
Map<String, Object> doc1 = new HashMap<>();
doc1.put("id", 5);
doc1.put("version", "3");
doc1.put("one", null);
Set<String> values = new HashSet<>();
values.add("2");
doc1.put("two", values);
doc1.put("three", ".");
Map<String, Object> doc2 = new HashMap<>();
doc2.put("id", 6);
doc1.put("version", "5");
doc2.put("OTP", "Otopeni");
doc2.put("SFO", "San Fran");
List<Map<String, Object>> docs = new ArrayList<>();
docs.add(doc1);
docs.add(doc2);
String target = wrapIndex("spark-streaming-test-scala-dyn-id-write-map/data");
JavaRDD<Map<String,Object>> batch = sc.parallelize(docs);
Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
rddQueue.add(batch);
JavaDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue);
JavaPairDStream<Map<Metadata, Object>, Map<String, Object>> metaDstream = dstream.mapToPair(new ExtractMetaMap());
JavaEsSparkStreaming.saveToEsWithMeta(metaDstream, target, cfg);
ssc.start();
TimeUnit.SECONDS.sleep(2);
ssc.stop(false, true);
assertEquals(2, JavaEsSpark.esRDD(sc, target).count());
assertTrue(RestUtils.exists(target + "/5"));
assertTrue(RestUtils.exists(target + "/6"));
assertThat(RestUtils.get(target + "/_search?"), containsString("SFO"));
}
示例14: testEsRDDWriteWithDynamicMapMapping
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test
public void testEsRDDWriteWithDynamicMapMapping() throws Exception {
Map<String, Object> doc1 = new HashMap<>();
doc1.put("id", 5);
doc1.put("version", "3");
doc1.put("one", null);
Set<String> values = new HashSet<>();
values.add("2");
doc1.put("two", values);
doc1.put("three", ".");
Map<String, Object> doc2 = new HashMap<>();
doc2.put("id", 6);
doc1.put("version", "5");
doc2.put("OTP", "Otopeni");
doc2.put("SFO", "San Fran");
List<Map<String, Object>> docs = new ArrayList<>();
docs.add(doc1);
docs.add(doc2);
String target = wrapIndex("spark-test-scala-dyn-id-write-map/data");
JavaRDD<Map<String,Object>> batch = sc.parallelize(docs);
Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
rddQueue.add(batch);
JavaDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue);
JavaPairDStream<Map<Metadata, Object>, Map<String, Object>> metaDstream = dstream.mapToPair(new ExtractMetaMap());
JavaEsSparkStreaming.saveToEsWithMeta(metaDstream, target, cfg);
ssc.start();
TimeUnit.SECONDS.sleep(2);
ssc.stop(false, true);
assertEquals(2, JavaEsSpark.esRDD(sc, target).count());
assertTrue(RestUtils.exists(target + "/5"));
assertTrue(RestUtils.exists(target + "/6"));
assertThat(RestUtils.get(target + "/_search?"), containsString("SFO"));
}