當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaDStream.map方法代碼示例

本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.map方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.map方法的具體用法?Java JavaDStream.map怎麽用?Java JavaDStream.map使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.streaming.api.java.JavaDStream的用法示例。


在下文中一共展示了JavaDStream.map方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: processTotalTrafficData

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Method to get total traffic counts of different type of vehicles for each route.
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

	// We need to get count of vehicle group by routeId and vehicleType
	JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
			.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
			.reduceByKey((a, b) -> a + b);
	
	// Need to keep state for total count
	JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
			.mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour

	// Transform to dstream of TrafficData
	JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
	JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("routeId", "routeid");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("totalCount", "totalcount");
	columnNameMappings.put("timeStamp", "timestamp");
	columnNameMappings.put("recordDate", "recorddate");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
			CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}
 
開發者ID:baghelamit,項目名稱:iot-traffic-monitor,代碼行數:33,代碼來源:IoTTrafficDataProcessor.java

示例2: start

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public void start() {
    final JavaStreamingContext context = new JavaStreamingContext(conf, checkpointInterval);

    // for graceful shutdown of the application ...
    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            System.out.println("Shutting down streaming app...");
            context.stop(true, true);
            System.out.println("Shutdown of streaming app complete.");
        }
    });

    JKinesisReceiver receiver = new JKinesisReceiver(appName, streamName,
                                                     endpointUrl, regionName,
                                                     checkpointInterval,
                                                     InitialPositionInStream.LATEST);

    JavaDStream<String> dstream = context.receiverStream(receiver);

    JavaDStream<EventRecord> recs = dstream.map(new EventRecordMapFunc());

    recs.print();

    // persist to DStream to Cassandra
    javaFunctions(recs)
        .writerBuilder("canary", "eventrecord", mapToRow(EventRecord.class))
        .saveToCassandra();


    System.out.println("Start Spark Stream Processing...");

    context.start();
    context.awaitTermination();

}
 
開發者ID:lenards,項目名稱:spark-cstar-canaries,代碼行數:37,代碼來源:Consumer.java

示例3: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
 //read Spark and Cassandra properties and create SparkConf
 Properties prop = PropertyFileReader.readPropertyFile();		
 SparkConf conf = new SparkConf()
		 .setAppName(prop.getProperty("com.iot.app.spark.app.name"))
		 .setMaster(prop.getProperty("com.iot.app.spark.master"))
		 .set("spark.cassandra.connection.host", prop.getProperty("com.iot.app.cassandra.host"))
		 .set("spark.cassandra.connection.port", prop.getProperty("com.iot.app.cassandra.port"))
		 .set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive"));		 
 //batch interval of 5 seconds for incoming stream		 
 JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));	
 //add check point directory
 jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir"));
 
 //read and set Kafka properties
 Map<String, String> kafkaParams = new HashMap<String, String>();
 kafkaParams.put("zookeeper.connect", prop.getProperty("com.iot.app.kafka.zookeeper"));
 kafkaParams.put("metadata.broker.list", prop.getProperty("com.iot.app.kafka.brokerlist"));
 String topic = prop.getProperty("com.iot.app.kafka.topic");
 Set<String> topicsSet = new HashSet<String>();
 topicsSet.add(topic);
 //create direct kafka stream
 JavaPairInputDStream<String, IoTData> directKafkaStream = KafkaUtils.createDirectStream(
	        jssc,
	        String.class,
	        IoTData.class,
	        StringDecoder.class,
	        IoTDataDecoder.class,
	        kafkaParams,
	        topicsSet
	    );
 logger.info("Starting Stream Processing");
 
 //We need non filtered stream for poi traffic data calculation
 JavaDStream<IoTData> nonFilteredIotDataStream = directKafkaStream.map(tuple -> tuple._2());
 
 //We need filtered stream for total and traffic data calculation
 JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );

 // Check vehicle Id is already processed
 JavaMapWithStateDStream<String, IoTData, Boolean, Tuple2<IoTData,Boolean>> iotDStreamWithStatePairs = iotDataPairStream
					.mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour

 // Filter processed vehicle ids and keep un-processed
 JavaDStream<Tuple2<IoTData,Boolean>> filteredIotDStreams = iotDStreamWithStatePairs.map(tuple2 -> tuple2)
					.filter(tuple -> tuple._2.equals(Boolean.FALSE));

 // Get stream of IoTdata
 JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1);
 
 //cache stream as it is used in total and window based computation
 filteredIotDataStream.cache();
 	 
 //process data
 IoTTrafficDataProcessor iotTrafficProcessor = new IoTTrafficDataProcessor();
 iotTrafficProcessor.processTotalTrafficData(filteredIotDataStream);
 iotTrafficProcessor.processWindowTrafficData(filteredIotDataStream);

 //poi data
 POIData poiData = new POIData();
 poiData.setLatitude(33.877495);
 poiData.setLongitude(-95.50238);
 poiData.setRadius(30);//30 km
 
 //broadcast variables. We will monitor vehicles on Route 37 which are of type Truck
 Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
 //call method  to process stream
 iotTrafficProcessor.processPOIData(nonFilteredIotDataStream,broadcastPOIValues);
 
 //start context
 jssc.start();            
 jssc.awaitTermination();  
}
 
開發者ID:baghelamit,項目名稱:iot-traffic-monitor,代碼行數:74,代碼來源:IoTDataProcessor.java

示例4: testStaticSparkToNatsWithConnectionLifecycle

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Test(timeout=20000)
public void testStaticSparkToNatsWithConnectionLifecycle() throws Exception {  
   	startStreamingServer(clusterID, false);

   	long poolSize = SparkToNatsStreamingConnectorPool.poolSize();
	
	final List<Integer> data = UnitTestUtilities.getData();

	final String subject1 = "subject1";

	final String subject2 = "subject2";

	final int partitionsNb = 3;
	final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath()).repartition(partitionsNb);
	final JavaDStream<Integer> integers = lines.map(str -> Integer.parseInt(str));
	
	final Properties properties = new Properties();
	properties.setProperty(PROP_URL, STAN_URL);
	SparkToNatsConnectorPool
		.newStreamingPool(clusterID)
		.withProperties(properties)
		.withConnectionTimeout(Duration.ofSeconds(2))
		.withSubjects(DEFAULT_SUBJECT, subject1, subject2)
		.publishToNats(integers);
	
	ssc.start();

	TimeUnit.SECONDS.sleep(1);

	final NatsStreamingSubscriber ns1 = UnitTestUtilities.getNatsStreamingSubscriber(data, subject1, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
	final NatsStreamingSubscriber ns2 = UnitTestUtilities.getNatsStreamingSubscriber(data, subject2, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
	writeTmpFile(data);
	// wait for the subscribers to complete.
	ns1.waitForCompletion();
	ns2.waitForCompletion();
	
	TimeUnit.MILLISECONDS.sleep(100);
	assertEquals("The connections Pool size should be the same as the number of Spark partitions", 
			poolSize + partitionsNb, SparkToNatsStreamingConnectorPool.poolSize());
			
	final NatsStreamingSubscriber ns1p = UnitTestUtilities.getNatsStreamingSubscriber(data, subject1, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
	final NatsStreamingSubscriber ns2p = UnitTestUtilities.getNatsStreamingSubscriber(data, subject2, clusterID, getUniqueClientName() + "_SUB1", STAN_URL);
	writeTmpFile(data);
	// wait for the subscribers to complete.
	ns1p.waitForCompletion();
	ns2p.waitForCompletion();
	TimeUnit.MILLISECONDS.sleep(100);
	assertEquals("The connections Pool size should be the same as the number of Spark partitions", 
			poolSize + partitionsNb, SparkToNatsStreamingConnectorPool.poolSize());

	ssc.stop();
	ssc = null;
	
	logger.debug("Spark Context Stopped");
	
	TimeUnit.SECONDS.sleep(5);
	logger.debug("After 5 sec delay");
	
	assertTrue("The poolSize() of " + SparkToNatsStreamingConnectorPool.connectorsPoolMap + " should have been reverted to its original value",
			SparkToNatsStreamingConnectorPool.poolSize() == poolSize);
}
 
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:62,代碼來源:SparkToNatsStreamingConnectorLifecycleTest.java

示例5: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String s[]) {
	StreamNumberServer.startNumberGeneratorServer(9999);

	// Create a local StreamingContext with two working thread and batch interval of 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("ConfigurableFilterApp");
	try (JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(1))) {
		
		
		JavaReceiverInputDStream<String> lines = jssc.socketTextStream("localhost", 9999);
		
		JavaDStream<SensorData> values = lines.map(line -> SensorData.fromString(line));
		
		values = values.map(new CfgFunction());
		
		values.print();
		
		jssc.start();              // Start the computation
		jssc.awaitTermination();   // Wait for the computation to terminate
	} 
}
 
開發者ID:smarcu,項目名稱:spark-streaming-example,代碼行數:21,代碼來源:SparkStreamingExample.java


注:本文中的org.apache.spark.streaming.api.java.JavaDStream.map方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。