本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.map方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.map方法的具體用法?Java JavaDStream.map怎麽用?Java JavaDStream.map使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.streaming.api.java.JavaDStream
的用法示例。
在下文中一共展示了JavaDStream.map方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: processTotalTrafficData
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to get total traffic counts of different type of vehicles for each route.
*
* @param filteredIotDataStream IoT data stream
*/
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
// We need to get count of vehicle group by routeId and vehicleType
JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
.reduceByKey((a, b) -> a + b);
// Need to keep state for total count
JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
.mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
// Transform to dstream of TrafficData
JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);
// Map Cassandra table column
Map<String, String> columnNameMappings = new HashMap<String, String>();
columnNameMappings.put("routeId", "routeid");
columnNameMappings.put("vehicleType", "vehicletype");
columnNameMappings.put("totalCount", "totalcount");
columnNameMappings.put("timeStamp", "timestamp");
columnNameMappings.put("recordDate", "recorddate");
// call CassandraStreamingJavaUtil function to save in DB
javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}
示例2: start
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public void start() {
final JavaStreamingContext context = new JavaStreamingContext(conf, checkpointInterval);
// for graceful shutdown of the application ...
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
System.out.println("Shutting down streaming app...");
context.stop(true, true);
System.out.println("Shutdown of streaming app complete.");
}
});
JKinesisReceiver receiver = new JKinesisReceiver(appName, streamName,
endpointUrl, regionName,
checkpointInterval,
InitialPositionInStream.LATEST);
JavaDStream<String> dstream = context.receiverStream(receiver);
JavaDStream<EventRecord> recs = dstream.map(new EventRecordMapFunc());
recs.print();
// persist to DStream to Cassandra
javaFunctions(recs)
.writerBuilder("canary", "eventrecord", mapToRow(EventRecord.class))
.saveToCassandra();
System.out.println("Start Spark Stream Processing...");
context.start();
context.awaitTermination();
}
示例3: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
//read Spark and Cassandra properties and create SparkConf
Properties prop = PropertyFileReader.readPropertyFile();
SparkConf conf = new SparkConf()
.setAppName(prop.getProperty("com.iot.app.spark.app.name"))
.setMaster(prop.getProperty("com.iot.app.spark.master"))
.set("spark.cassandra.connection.host", prop.getProperty("com.iot.app.cassandra.host"))
.set("spark.cassandra.connection.port", prop.getProperty("com.iot.app.cassandra.port"))
.set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive"));
//batch interval of 5 seconds for incoming stream
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
//add check point directory
jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir"));
//read and set Kafka properties
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("zookeeper.connect", prop.getProperty("com.iot.app.kafka.zookeeper"));
kafkaParams.put("metadata.broker.list", prop.getProperty("com.iot.app.kafka.brokerlist"));
String topic = prop.getProperty("com.iot.app.kafka.topic");
Set<String> topicsSet = new HashSet<String>();
topicsSet.add(topic);
//create direct kafka stream
JavaPairInputDStream<String, IoTData> directKafkaStream = KafkaUtils.createDirectStream(
jssc,
String.class,
IoTData.class,
StringDecoder.class,
IoTDataDecoder.class,
kafkaParams,
topicsSet
);
logger.info("Starting Stream Processing");
//We need non filtered stream for poi traffic data calculation
JavaDStream<IoTData> nonFilteredIotDataStream = directKafkaStream.map(tuple -> tuple._2());
//We need filtered stream for total and traffic data calculation
JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );
// Check vehicle Id is already processed
JavaMapWithStateDStream<String, IoTData, Boolean, Tuple2<IoTData,Boolean>> iotDStreamWithStatePairs = iotDataPairStream
.mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour
// Filter processed vehicle ids and keep un-processed
JavaDStream<Tuple2<IoTData,Boolean>> filteredIotDStreams = iotDStreamWithStatePairs.map(tuple2 -> tuple2)
.filter(tuple -> tuple._2.equals(Boolean.FALSE));
// Get stream of IoTdata
JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1);
//cache stream as it is used in total and window based computation
filteredIotDataStream.cache();
//process data
IoTTrafficDataProcessor iotTrafficProcessor = new IoTTrafficDataProcessor();
iotTrafficProcessor.processTotalTrafficData(filteredIotDataStream);
iotTrafficProcessor.processWindowTrafficData(filteredIotDataStream);
//poi data
POIData poiData = new POIData();
poiData.setLatitude(33.877495);
poiData.setLongitude(-95.50238);
poiData.setRadius(30);//30 km
//broadcast variables. We will monitor vehicles on Route 37 which are of type Truck
Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
//call method to process stream
iotTrafficProcessor.processPOIData(nonFilteredIotDataStream,broadcastPOIValues);
//start context
jssc.start();
jssc.awaitTermination();
}
示例4: testStaticSparkToNatsWithConnectionLifecycle
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test(timeout=20000)
public void testStaticSparkToNatsWithConnectionLifecycle() throws Exception {
startStreamingServer(clusterID, false);
long poolSize = SparkToNatsStreamingConnectorPool.poolSize();
final List<Integer> data = UnitTestUtilities.getData();
final String subject1 = "subject1";
final String subject2 = "subject2";
final int partitionsNb = 3;
final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath()).repartition(partitionsNb);
final JavaDStream<Integer> integers = lines.map(str -> Integer.parseInt(str));
final Properties properties = new Properties();
properties.setProperty(PROP_URL, STAN_URL);
SparkToNatsConnectorPool
.newStreamingPool(clusterID)
.withProperties(properties)
.withConnectionTimeout(Duration.ofSeconds(2))
.withSubjects(DEFAULT_SUBJECT, subject1, subject2)
.publishToNats(integers);
ssc.start();
TimeUnit.SECONDS.sleep(1);
final NatsStreamingSubscriber ns1 = UnitTestUtilities.getNatsStreamingSubscriber(data, subject1, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
final NatsStreamingSubscriber ns2 = UnitTestUtilities.getNatsStreamingSubscriber(data, subject2, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
writeTmpFile(data);
// wait for the subscribers to complete.
ns1.waitForCompletion();
ns2.waitForCompletion();
TimeUnit.MILLISECONDS.sleep(100);
assertEquals("The connections Pool size should be the same as the number of Spark partitions",
poolSize + partitionsNb, SparkToNatsStreamingConnectorPool.poolSize());
final NatsStreamingSubscriber ns1p = UnitTestUtilities.getNatsStreamingSubscriber(data, subject1, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
final NatsStreamingSubscriber ns2p = UnitTestUtilities.getNatsStreamingSubscriber(data, subject2, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
writeTmpFile(data);
// wait for the subscribers to complete.
ns1p.waitForCompletion();
ns2p.waitForCompletion();
TimeUnit.MILLISECONDS.sleep(100);
assertEquals("The connections Pool size should be the same as the number of Spark partitions",
poolSize + partitionsNb, SparkToNatsStreamingConnectorPool.poolSize());
ssc.stop();
ssc = null;
logger.debug("Spark Context Stopped");
TimeUnit.SECONDS.sleep(5);
logger.debug("After 5 sec delay");
assertTrue("The poolSize() of " + SparkToNatsStreamingConnectorPool.connectorsPoolMap + " should have been reverted to its original value",
SparkToNatsStreamingConnectorPool.poolSize() == poolSize);
}
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:62,代碼來源:SparkToNatsStreamingConnectorLifecycleTest.java
示例5: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String s[]) {
StreamNumberServer.startNumberGeneratorServer(9999);
// Create a local StreamingContext with two working thread and batch interval of 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("ConfigurableFilterApp");
try (JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(1))) {
JavaReceiverInputDStream<String> lines = jssc.socketTextStream("localhost", 9999);
JavaDStream<SensorData> values = lines.map(line -> SensorData.fromString(line));
values = values.map(new CfgFunction());
values.print();
jssc.start(); // Start the computation
jssc.awaitTermination(); // Wait for the computation to terminate
}
}