当前位置: 首页>>代码示例>>Java>>正文


Java JavaStreamingContext.checkpoint方法代码示例

本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaStreamingContext.checkpoint方法的典型用法代码示例。如果您正苦于以下问题:Java JavaStreamingContext.checkpoint方法的具体用法?Java JavaStreamingContext.checkpoint怎么用?Java JavaStreamingContext.checkpoint使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.streaming.api.java.JavaStreamingContext的用法示例。


在下文中一共展示了JavaStreamingContext.checkpoint方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: checkpoint

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) {
  Path rootCheckpointPath = checkpointDir.getRootCheckpointDir();
  Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir();
  Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir();

  try {
    FileSystem fileSystem =
        rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration());
    if (!fileSystem.exists(rootCheckpointPath)) {
      fileSystem.mkdirs(rootCheckpointPath);
    }
    if (!fileSystem.exists(sparkCheckpointPath)) {
      fileSystem.mkdirs(sparkCheckpointPath);
    }
    if (!fileSystem.exists(beamCheckpointPath)) {
      fileSystem.mkdirs(beamCheckpointPath);
    }
  } catch (IOException e) {
    throw new RuntimeException("Failed to create checkpoint dir", e);
  }

  jssc.checkpoint(sparkCheckpointPath.toString());
}
 
开发者ID:apache,项目名称:beam,代码行数:24,代码来源:SparkRunnerStreamingContextFactory.java

示例2: processMQTT

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
/**
 * This will start the spark stream that is reading from the MQTT queue
 *
 * @param broker     - MQTT broker url
 * @param topic      - MQTT topic name
 * @param numSeconds - Number of seconds between batch size
 */
public void processMQTT(final String broker, final String topic, final int numSeconds) {

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT start");

    // Create the spark application and set the name to MQTT
    SparkConf sparkConf = new SparkConf().setAppName("MQTT");

    // Create the spark streaming context with a 'numSeconds' second batch size
    jssc = new JavaStreamingContext(sparkConf, Durations.seconds(numSeconds));
    jssc.checkpoint(checkpointDirectory);

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to read the MQTTUtils.createStream");
    //2. MQTTUtils to collect MQTT messages
    JavaReceiverInputDStream<String> messages = MQTTUtils.createStream(jssc, broker, topic);

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to do foreachRDD");
    //process the messages on the queue and save them to the database
    messages.foreachRDD(new SaveRDD());

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT prior to context.strt");
    // Start the context
    jssc.start();
    jssc.awaitTermination();
}
 
开发者ID:splicemachine,项目名称:splice-community-sample-code,代码行数:32,代码来源:SparkStreamingMQTT.java

示例3: sparkInit

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public void sparkInit() {
  PropertyConfigurator.configure(SparkTwitterStreaming.class.getClassLoader().getResource("log4j.properties"));
  //		note: import org.apache.log4j.Logger;
  //		note: import org.apache.log4j.Level;
  //		Logger.getLogger("org").setLevel(Level.WARN);
  //		Logger.getLogger("akka").setLevel(Level.WARN);
  // Set spark streaming info
  ssc = new JavaStreamingContext(
    "local[2]", "SparkTwitterStreamingJava", 
    new Duration(1000), System.getenv("SPARK_HOME"), 
    JavaStreamingContext.jarOfClass(SparkTwitterStreaming.class));

  //	HDFS directory for checkpointing
  /*
   * checkpoint saves the RDD to an HDFS file
   * http://apache-spark-user-list.1001560.n3.nabble.com/checkpoint-and-not-running-out-of-disk-space-td1525.html
   * dfs.namenode.checkpoint.dir -> hdfs-site.xml
   */
  //		String checkpointDir = TutorialHelper.getHdfsUrl() + "/checkpoint/";

  String checkpointDir = "file:///Users/feiyu/workspace/Hadoop/hdfs/namesecondary/checkpoint";
  ssc.checkpoint(checkpointDir);
}
 
开发者ID:faustineinsun,项目名称:WiseCrowdRec,代码行数:24,代码来源:SparkTwitterStreaming.java

示例4: setup

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
@Override
public void setup() {
	// Create a StreamingContext with a SparkConf configuration
	SparkConf sparkConf = new SparkConf(false)
			.setAppName("JaiSpark")
			.setSparkHome("target/sparkhome")
			.setMaster("local")
			.set("spark.executor.memory", "128m")
			.set("spark.local.dir",
					new File("target/sparkhome/tmp").getAbsolutePath())
			.set("spark.cores.max", "2").set("spark.akka.threads", "2")
			.set("spark.akka.timeout", "60").set("spark.logConf", "true")
			.set("spark.cleaner.delay", "3700")
			.set("spark.cleaner.ttl", "86400")
			.set("spark.shuffle.spill", "false")
			.set("spark.driver.host", "localhost")
			.set("spark.driver.port", "43214");
	jssc = new JavaStreamingContext(sparkConf, new Duration(5000));

	String checkpointDir = hadoopClusterService.getHDFSUri()
			+ "/sparkcheckpoint";
	jssc.checkpoint(checkpointDir);
	startFlumeStream();
}
 
开发者ID:jaibeermalik,项目名称:searchanalytics-bigdata,代码行数:25,代码来源:SparkStreamServiceImpl.java

示例5: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
 System.setProperty("hadoop.home.dir", "E:\\hadoop");

   SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
   JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
   streamingContext.checkpoint("E:\\hadoop\\checkpoint");
// Initial state RDD input to mapWithState
   @SuppressWarnings("unchecked")
   List<Tuple2<String, Integer>> tuples =Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
   JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
   
   JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
   
   JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
  
   JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
  


  // Update the cumulative count function
  Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc =
      new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one,
            State<Integer> state) {
          int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
          Tuple2<String, Integer> output = new Tuple2<>(word, sum);
          state.update(sum);
          return output;
        }
      };

  // DStream made of get cumulative counts that get updated in every batch
  JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

  stateDstream.print();
  streamingContext.start();
  streamingContext.awaitTermination();
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:40,代码来源:WordCountSocketStateful.java

示例6: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws InterruptedException {

		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");

		SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
				.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();

		JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
				Durations.milliseconds(1000));
		JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
		jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");

		JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
			ObjectMapper mapper = new ObjectMapper();
			return mapper.readValue(x, FlightDetails.class);
		});
		
		

		JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
				.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));

		Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
				flightId, curFlightDetail, state) -> {
			List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();

			boolean isLanded = false;

			if (curFlightDetail.isPresent()) {
				details.add(curFlightDetail.get());
				if (curFlightDetail.get().isLanded()) {
					isLanded = true;
				}
			}
			Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);

			if (isLanded) {
				state.remove();
			} else {
				state.update(details);
			}
			return new Tuple2<String, Double>(flightId, avgSpeed);
		};

		JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
				.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
		
		streamWithState.print();
		jssc.start();
		jssc.awaitTermination();
	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:52,代码来源:StateFulProcessingExample.java

示例7: createContext

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
protected static JavaStreamingContext createContext(String ip, int port, String checkpointDirectory) {
	SparkConf sparkConf = new SparkConf().setAppName("WordCountRecoverableEx").setMaster("local[*]");
	JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
	streamingContext.checkpoint(checkpointDirectory);
	// Initial state RDD input to mapWithState
	@SuppressWarnings("unchecked")
	List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
	JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);

	JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream(ip,port, StorageLevels.MEMORY_AND_DISK_SER);

	JavaDStream<String> words = StreamingLines.flatMap(str -> Arrays.asList(str.split(" ")).iterator());

	JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str -> new Tuple2<>(str, 1))
			.reduceByKey((count1, count2) -> count1 + count2);

	// Update the cumulative count function
	Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
		@Override
		public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
			int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
			Tuple2<String, Integer> output = new Tuple2<>(word, sum);
			state.update(sum);
			return output;
		}
	};

	// DStream made of get cumulative counts that get updated in every batch
	JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts
			.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

	stateDstream.print();
	return streamingContext;
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:35,代码来源:WordCountRecoverableEx.java

示例8: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("SparkJoinTest");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
        ssc.checkpoint("checkpoint");

        JavaReceiverInputDStream<String> lines = ssc.socketTextStream("127.0.0.1", 9999);
        JavaPairDStream<String, Long> nameStream = lines.flatMap(new FlatMapFunction<String, String>() {
            public Iterable<String> call(String l) throws Exception {
                return Arrays.asList(l.split(" "));
            }
        }).mapToPair(new PairFunction<String, String, Long>() {
            public Tuple2<String, Long> call(String w) throws Exception {
                return new Tuple2<>(w, 1L);
            }
        }).window(Durations.seconds(30), Durations.seconds(10));

        JavaReceiverInputDStream<String> lines2 = ssc.socketTextStream("127.0.0.1", 9998);
        JavaPairDStream<String, Long> nameAgeStream = lines2.mapToPair(new PairFunction<String, String, Long>() {
            @Override
            public Tuple2<String, Long> call(String s) throws Exception {
                String[] list = s.split(" ");
                String name = list[0];
                long age = 0L;
                if (list.length > 1)
                    age = Long.parseLong(list[1]);
                return new Tuple2<String, Long>(name, age);
            }
        }).window(Durations.seconds(11), Durations.seconds(11));

//        nameStream.print();
//        nameAgeStream.print();
        JavaPairDStream<String, Tuple2<Long, Long>> joinedStream = nameStream.join(nameAgeStream);
        joinedStream.print();

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:wangyangjun,项目名称:StreamBench,代码行数:38,代码来源:SparkJoinTest.java

示例9: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
       streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  	 List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10));
    JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
		    

    JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
    
    JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
   
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
   
    wordCounts.print();
    wordCounts.window(Durations.minutes(8)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
    wordCounts.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
    wordCounts.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
    wordCounts.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
    wordCounts.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
    //comment these two operation to make it run
    wordCounts.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
    wordCounts.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
      .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
       streamingContext.start();
       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:51,代码来源:WindowBatchInterval.java

示例10: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {
  	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
 System.setProperty("hadoop.home.dir", "E:\\hadoop");
  	//Logger rootLogger = LogManager.getRootLogger();
 		//rootLogger.setLevel(Level.WARN); 
      SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");    
      JavaSparkContext sc = new JavaSparkContext(conf);
      JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
      streamingContext.checkpoint("E:\\hadoop\\checkpoint");
      Logger rootLogger = LogManager.getRootLogger();
 		rootLogger.setLevel(Level.WARN); 
      Map<String, Object> kafkaParams = new HashMap<>();
      kafkaParams.put("bootstrap.servers", "10.0.75.1:9092");
      kafkaParams.put("key.deserializer", StringDeserializer.class);
      kafkaParams.put("value.deserializer", StringDeserializer.class);
      kafkaParams.put("group.id", "use_a_separate_group_id_for_each_strea");
      kafkaParams.put("auto.offset.reset", "latest");
     // kafkaParams.put("enable.auto.commit", false);

      Collection<String> topics = Arrays.asList("mytopic", "anothertopic");

      final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(streamingContext,LocationStrategies.PreferConsistent(),
      				ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));

      JavaPairDStream<String, String> pairRDD = stream.mapToPair(record-> new Tuple2<>(record.key(), record.value()));
     
      pairRDD.foreachRDD(pRDD-> { pRDD.foreach(tuple-> System.out.println(new Date()+" :: Kafka msg key ::"+tuple._1() +" the val is ::"+tuple._2()));});
     
      JavaDStream<String> tweetRDD = pairRDD.map(x-> x._2()).map(new TweetText());
      
      tweetRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" :: "+x)));
      
     JavaDStream<String> hashtagRDD = tweetRDD.flatMap(twt-> Arrays.stream(twt.split(" ")).filter(str-> str.contains("#")).collect(Collectors.toList()).iterator() );
 
      hashtagRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(x)));
      
      JavaPairDStream<String, Long> cntByVal = hashtagRDD.countByValue();
      
      cntByVal.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     /* hashtagRDD.window(Durations.seconds(60), Durations.seconds(30))
                .countByValue()
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     hashtagRDD.countByValueAndWindow(Durations.seconds(60), Durations.seconds(30))
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println("The window&count tag is ::"+x._1() +" and the val is ::"+x._2())));
      */
     hashtagRDD.window(Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     
     /*hashtagRDD.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     /* hashtagRDD.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     
      streamingContext.start();
      try {
	streamingContext.awaitTermination();
} catch (InterruptedException e) {
	// TODO Auto-generated catch block
	e.printStackTrace();
}
  }
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:72,代码来源:KafkaExample.java

示例11: setUp

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
@Before
public void setUp() {
    SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("TestingSparkStreaming");
    jssc = new JavaStreamingContext(conf, new Duration(1000));
    jssc.checkpoint("checkpoint");
}
 
开发者ID:agapic,项目名称:Twitch-Streamer,代码行数:7,代码来源:LocalSparkStreaming.java

示例12: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
 //read Spark and Cassandra properties and create SparkConf
 Properties prop = PropertyFileReader.readPropertyFile();		
 SparkConf conf = new SparkConf()
		 .setAppName(prop.getProperty("com.iot.app.spark.app.name"))
		 .setMaster(prop.getProperty("com.iot.app.spark.master"))
		 .set("spark.cassandra.connection.host", prop.getProperty("com.iot.app.cassandra.host"))
		 .set("spark.cassandra.connection.port", prop.getProperty("com.iot.app.cassandra.port"))
		 .set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive"));		 
 //batch interval of 5 seconds for incoming stream		 
 JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));	
 //add check point directory
 jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir"));
 
 //read and set Kafka properties
 Map<String, String> kafkaParams = new HashMap<String, String>();
 kafkaParams.put("zookeeper.connect", prop.getProperty("com.iot.app.kafka.zookeeper"));
 kafkaParams.put("metadata.broker.list", prop.getProperty("com.iot.app.kafka.brokerlist"));
 String topic = prop.getProperty("com.iot.app.kafka.topic");
 Set<String> topicsSet = new HashSet<String>();
 topicsSet.add(topic);
 //create direct kafka stream
 JavaPairInputDStream<String, IoTData> directKafkaStream = KafkaUtils.createDirectStream(
	        jssc,
	        String.class,
	        IoTData.class,
	        StringDecoder.class,
	        IoTDataDecoder.class,
	        kafkaParams,
	        topicsSet
	    );
 logger.info("Starting Stream Processing");
 
 //We need non filtered stream for poi traffic data calculation
 JavaDStream<IoTData> nonFilteredIotDataStream = directKafkaStream.map(tuple -> tuple._2());
 
 //We need filtered stream for total and traffic data calculation
 JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );

 // Check vehicle Id is already processed
 JavaMapWithStateDStream<String, IoTData, Boolean, Tuple2<IoTData,Boolean>> iotDStreamWithStatePairs = iotDataPairStream
					.mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour

 // Filter processed vehicle ids and keep un-processed
 JavaDStream<Tuple2<IoTData,Boolean>> filteredIotDStreams = iotDStreamWithStatePairs.map(tuple2 -> tuple2)
					.filter(tuple -> tuple._2.equals(Boolean.FALSE));

 // Get stream of IoTdata
 JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1);
 
 //cache stream as it is used in total and window based computation
 filteredIotDataStream.cache();
 	 
 //process data
 IoTTrafficDataProcessor iotTrafficProcessor = new IoTTrafficDataProcessor();
 iotTrafficProcessor.processTotalTrafficData(filteredIotDataStream);
 iotTrafficProcessor.processWindowTrafficData(filteredIotDataStream);

 //poi data
 POIData poiData = new POIData();
 poiData.setLatitude(33.877495);
 poiData.setLongitude(-95.50238);
 poiData.setRadius(30);//30 km
 
 //broadcast variables. We will monitor vehicles on Route 37 which are of type Truck
 Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
 //call method  to process stream
 iotTrafficProcessor.processPOIData(nonFilteredIotDataStream,broadcastPOIValues);
 
 //start context
 jssc.start();            
 jssc.awaitTermination();  
}
 
开发者ID:baghelamit,项目名称:iot-traffic-monitor,代码行数:74,代码来源:IoTDataProcessor.java

示例13: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Stateful Network Word Count");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
        ssc.checkpoint("checkpoint");

        ssc.addStreamingListener(new PerformanceStreamingListener());


        JavaReceiverInputDStream<String> lines = ssc.socketTextStream("127.0.0.1", 9999);

        JavaPairDStream<String, Long> wordCounts = lines.flatMap(new FlatMapFunction<String, String>() {
            public Iterable<String> call(String l) throws Exception {
                return Arrays.asList(l.split(" "));
            }
        }).mapToPair(new PairFunction<String, String, Long>() {
            public Tuple2<String, Long> call(String w) throws Exception {
                return new Tuple2<>(w, 1L);
            }
        })
                .reduceByKey(new Function2<Long, Long, Long>() {
                    @Override
                    public Long call(Long aLong, Long aLong2) throws Exception {
                        return aLong + aLong2;
                    }
                })
                .updateStateByKey(new Function2<List<Long>, Optional<Long>, Optional<Long>>() {
                    public Optional<Long> call(List<Long> values, Optional<Long> state) throws Exception {
                        if (values == null || values.isEmpty()) {
                            return state;
                        }
                        long sum = 0L;
                        for (Long v : values) {
                            sum += v;
                        }

                        return Optional.of(state.or(0L) + sum);
                    }
                });
//                .updateStateByKey(new Function2<List<Iterable<Long>>, Optional<Long>, Optional<Long>>() {
//                    @Override
//                    public Optional<Long> call(List<Iterable<Long>> iterables, Optional<Long> longOptional) throws Exception {
//                        if (iterables == null || iterables.isEmpty()) {
//                            return longOptional;
//                        }
//                        long sum = 0L;
//                        for (Iterable<Long> iterable : iterables) {
//                            for(Long l : iterable)
//                                sum += l;
//                        }
//                        return Optional.of(longOptional.or(0L) + sum);
//                    }
//                });

        wordCounts.print();
        wordCounts.foreach(new Function2<JavaPairRDD<String, Long>, Time, Void>() {
            @Override
            public Void call(JavaPairRDD<String, Long> stringLongJavaPairRDD, Time time) throws Exception {
                return null;
            }
        });
        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:wangyangjun,项目名称:StreamBench,代码行数:64,代码来源:StreamingWordCount.java

示例14: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
  Flags.setFromCommandLineArgs(THE_OPTIONS, args);

  // Startup the Spark Conf.
  SparkConf conf = new SparkConf()
      .setAppName("A Databricks Reference Application: Logs Analysis with Spark");
  JavaStreamingContext jssc = new JavaStreamingContext(conf,
      Flags.getInstance().getSlideInterval());

  // Checkpointing must be enabled to use the updateStateByKey function & windowed operations.
  jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());

  // This methods monitors a directory for new files to read in for streaming.
  JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());

  JavaDStream<ApacheAccessLog> accessLogsDStream
    = logData.map(new Functions.ParseFromLogLine()).cache();

  final LogAnalyzerTotal logAnalyzerTotal = new LogAnalyzerTotal();
  final LogAnalyzerWindowed logAnalyzerWindowed = new LogAnalyzerWindowed();

  // Process the DStream which gathers stats for all of time.
  logAnalyzerTotal.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);

  // Calculate statistics for the last time interval.
  logAnalyzerWindowed.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);

  // Render the output each time there is a new RDD in the accessLogsDStream.
  final Renderer renderer = new Renderer();
  accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
      public Void call(JavaRDD<ApacheAccessLog> rdd) {
        // Call this to output the stats.
        try {
          renderer.render(logAnalyzerTotal.getLogStatistics(),
                          logAnalyzerWindowed.getLogStatistics());
        } catch (Exception e) {
        }
        return null;
      }
    });

  // Start the streaming server.
  jssc.start();              // Start the computation
  jssc.awaitTermination();   // Wait for the computation to terminate
}
 
开发者ID:holdenk,项目名称:learning-spark-examples,代码行数:46,代码来源:LogAnalyzerAppMain.java


注:本文中的org.apache.spark.streaming.api.java.JavaStreamingContext.checkpoint方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。