当前位置: 首页>>代码示例>>Java>>正文


Java JavaInputDStream.foreachRDD方法代码示例

本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaInputDStream.foreachRDD方法的典型用法代码示例。如果您正苦于以下问题:Java JavaInputDStream.foreachRDD方法的具体用法?Java JavaInputDStream.foreachRDD怎么用?Java JavaInputDStream.foreachRDD使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.streaming.api.java.JavaInputDStream的用法示例。


在下文中一共展示了JavaInputDStream.foreachRDD方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public void start() {
    SparkConf sparkConf = getSparkConf();
    streamingContext = new JavaStreamingContext(sparkConf,
            Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec())));
    JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext);
    JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message()));

    pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data
    dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager()));
    streamingContext.start();
}
 
开发者ID:ameyamk,项目名称:spark-streaming-direct-kafka,代码行数:12,代码来源:StreamingEngine.java

示例2: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public synchronized void start() {
  String id = getID();
  if (id != null) {
    log.info("Starting Speed Layer {}", id);
  }

  streamingContext = buildStreamingContext();
  log.info("Creating message stream from topic");
  JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
  JavaPairDStream<K,M> pairDStream =
      kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));

  KafkaConsumer<String,U> consumer = new KafkaConsumer<>(
      ConfigUtils.keyValueToProperties(
          "group.id", "OryxGroup-" + getLayerName() + "-" + UUID.randomUUID(),
          "bootstrap.servers", updateTopicBroker,
          "max.partition.fetch.bytes", maxMessageSize,
          "key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer",
          "value.deserializer", updateDecoderClass.getName(),
          // Do start from the beginning of the update queue
          "auto.offset.reset", "earliest"
      ));
  consumer.subscribe(Collections.singletonList(updateTopic));
  consumerIterator = new ConsumeDataIterator<>(consumer);

  modelManager = loadManagerInstance();
  Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration();
  new Thread(LoggingCallable.log(() -> {
    try {
      modelManager.consume(consumerIterator, hadoopConf);
    } catch (Throwable t) {
      log.error("Error while consuming updates", t);
      close();
    }
  }).asRunnable(), "OryxSpeedLayerUpdateConsumerThread").start();

  pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));

  // Must use the raw Kafka stream to get offsets
  kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));

  log.info("Starting Spark Streaming");

  streamingContext.start();
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:46,代码来源:SpeedLayer.java

示例3: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public synchronized void start() { // 加锁,单线程执行
  String id = getID();
  if (id != null) {
    log.info("Starting Batch Layer {}", id);
  }

  streamingContext = buildStreamingContext();
  JavaSparkContext sparkContext = streamingContext.sparkContext();//saprk初始化方法
  Configuration hadoopConf = sparkContext.hadoopConfiguration();

  //设置路径
  Path checkpointPath = new Path(new Path(modelDirString), ".checkpoint");
  log.info("Setting checkpoint dir to {}", checkpointPath);
  sparkContext.setCheckpointDir(checkpointPath.toString());

  //spark 读取kafka的topic
  log.info("Creating message stream from topic");
  JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
  JavaPairDStream<K,M> pairDStream =
      kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));

  Class<K> keyClass = getKeyClass();
  Class<M> messageClass = getMessageClass();

  //对每条spark里读取的kafka信息做处理
  pairDStream.foreachRDD(
      new BatchUpdateFunction<>(getConfig(),
                                keyClass,
                                messageClass,
                                keyWritableClass,
                                messageWritableClass,
                                dataDirString,
                                modelDirString,
                                loadUpdateInstance(),
                                streamingContext));

  // "Inline" saveAsNewAPIHadoopFiles to be able to skip saving empty RDDs
  // spark读取kafka数据,写入到hdfs上,每条数据进行处理
  pairDStream.foreachRDD(new SaveToHDFSFunction<>(
      dataDirString + "/oryx",
      "data",
      keyClass,
      messageClass,
      keyWritableClass,
      messageWritableClass,
      hadoopConf));

  // Must use the raw Kafka stream to get offsets
  kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));

  if (maxDataAgeHours != NO_MAX_AGE) {
    pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
                                                 dataDirString,
                                                 Pattern.compile("-(\\d+)\\."),
                                                 maxDataAgeHours));
  }
  if (maxModelAgeHours != NO_MAX_AGE) {
    pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
                                                 modelDirString,
                                                 Pattern.compile("(\\d+)"),
                                                 maxModelAgeHours));
  }

  log.info("Starting Spark Streaming");

  streamingContext.start();
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:68,代码来源:BatchLayer.java

示例4: processRuleUpdate

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
private static void processRuleUpdate(JavaStreamingContext jssc, String brokers, Set<String> topicsSet,
		final AnalyticsEngineManager engineManager) {
	Map<String, Object> kafkaParams = new HashMap<String, Object>();
	kafkaParams.put("metadata.broker.list", brokers);
	kafkaParams.put("bootstrap.servers", brokers);
	kafkaParams.put("spark.streaming.kafka.maxRatePerPartition", "100");
	kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
	kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
	kafkaParams.put("group.id", "MyAnalyticsEngineConsumerGroup1");
	kafkaParams.put("enable.auto.commit", false);
	kafkaParams.put("auto.offset.reset", "earliest");

	System.out.println("Initiate kafka messages for rules....");
	// Create direct kafka stream with brokers and topics
	ConsumerStrategy<String, String> consumerStrategy = ConsumerStrategies.Subscribe(topicsSet, kafkaParams);
	JavaInputDStream<ConsumerRecord<String, String>> streams = KafkaUtils.createDirectStream(jssc,
			LocationStrategies.PreferConsistent(), consumerStrategy);

	System.out.println("Waiting for kafka messages of rules....");

	// Get the data
	streams.foreachRDD(rdd -> {
		rdd.collect().forEach(consumerRecord -> {
			String key = consumerRecord.key();
			long offset = consumerRecord.offset();
			int partition = consumerRecord.partition();
			String topic = consumerRecord.topic();
			String value = consumerRecord.value();
			System.out.println("consumerRecord:" + consumerRecord.toString());
			System.out.println("[ruleupdate]key:" + key + ", value:" + value);

			engineManager.getEngine().addRule(key, value);
		});

		OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
		// some time later, after outputs have completed
		((CanCommitOffsets) streams.inputDStream()).commitAsync(offsetRanges);
	});

	System.out.println("Prepare rule validation....");

}
 
开发者ID:osswangxining,项目名称:another-rule-based-analytics-on-spark,代码行数:43,代码来源:AnalyticsEngine.java


注:本文中的org.apache.spark.streaming.api.java.JavaInputDStream.foreachRDD方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。