本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaInputDStream.foreachRDD方法的典型用法代码示例。如果您正苦于以下问题:Java JavaInputDStream.foreachRDD方法的具体用法?Java JavaInputDStream.foreachRDD怎么用?Java JavaInputDStream.foreachRDD使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.streaming.api.java.JavaInputDStream
的用法示例。
在下文中一共展示了JavaInputDStream.foreachRDD方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public void start() {
SparkConf sparkConf = getSparkConf();
streamingContext = new JavaStreamingContext(sparkConf,
Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec())));
JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext);
JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message()));
pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data
dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager()));
streamingContext.start();
}
示例2: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public synchronized void start() {
String id = getID();
if (id != null) {
log.info("Starting Speed Layer {}", id);
}
streamingContext = buildStreamingContext();
log.info("Creating message stream from topic");
JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
JavaPairDStream<K,M> pairDStream =
kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));
KafkaConsumer<String,U> consumer = new KafkaConsumer<>(
ConfigUtils.keyValueToProperties(
"group.id", "OryxGroup-" + getLayerName() + "-" + UUID.randomUUID(),
"bootstrap.servers", updateTopicBroker,
"max.partition.fetch.bytes", maxMessageSize,
"key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer",
"value.deserializer", updateDecoderClass.getName(),
// Do start from the beginning of the update queue
"auto.offset.reset", "earliest"
));
consumer.subscribe(Collections.singletonList(updateTopic));
consumerIterator = new ConsumeDataIterator<>(consumer);
modelManager = loadManagerInstance();
Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration();
new Thread(LoggingCallable.log(() -> {
try {
modelManager.consume(consumerIterator, hadoopConf);
} catch (Throwable t) {
log.error("Error while consuming updates", t);
close();
}
}).asRunnable(), "OryxSpeedLayerUpdateConsumerThread").start();
pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));
// Must use the raw Kafka stream to get offsets
kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));
log.info("Starting Spark Streaming");
streamingContext.start();
}
示例3: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
public synchronized void start() { // 加锁,单线程执行
String id = getID();
if (id != null) {
log.info("Starting Batch Layer {}", id);
}
streamingContext = buildStreamingContext();
JavaSparkContext sparkContext = streamingContext.sparkContext();//saprk初始化方法
Configuration hadoopConf = sparkContext.hadoopConfiguration();
//设置路径
Path checkpointPath = new Path(new Path(modelDirString), ".checkpoint");
log.info("Setting checkpoint dir to {}", checkpointPath);
sparkContext.setCheckpointDir(checkpointPath.toString());
//spark 读取kafka的topic
log.info("Creating message stream from topic");
JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
JavaPairDStream<K,M> pairDStream =
kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));
Class<K> keyClass = getKeyClass();
Class<M> messageClass = getMessageClass();
//对每条spark里读取的kafka信息做处理
pairDStream.foreachRDD(
new BatchUpdateFunction<>(getConfig(),
keyClass,
messageClass,
keyWritableClass,
messageWritableClass,
dataDirString,
modelDirString,
loadUpdateInstance(),
streamingContext));
// "Inline" saveAsNewAPIHadoopFiles to be able to skip saving empty RDDs
// spark读取kafka数据,写入到hdfs上,每条数据进行处理
pairDStream.foreachRDD(new SaveToHDFSFunction<>(
dataDirString + "/oryx",
"data",
keyClass,
messageClass,
keyWritableClass,
messageWritableClass,
hadoopConf));
// Must use the raw Kafka stream to get offsets
kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));
if (maxDataAgeHours != NO_MAX_AGE) {
pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
dataDirString,
Pattern.compile("-(\\d+)\\."),
maxDataAgeHours));
}
if (maxModelAgeHours != NO_MAX_AGE) {
pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
modelDirString,
Pattern.compile("(\\d+)"),
maxModelAgeHours));
}
log.info("Starting Spark Streaming");
streamingContext.start();
}
示例4: processRuleUpdate
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入方法依赖的package包/类
private static void processRuleUpdate(JavaStreamingContext jssc, String brokers, Set<String> topicsSet,
final AnalyticsEngineManager engineManager) {
Map<String, Object> kafkaParams = new HashMap<String, Object>();
kafkaParams.put("metadata.broker.list", brokers);
kafkaParams.put("bootstrap.servers", brokers);
kafkaParams.put("spark.streaming.kafka.maxRatePerPartition", "100");
kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("group.id", "MyAnalyticsEngineConsumerGroup1");
kafkaParams.put("enable.auto.commit", false);
kafkaParams.put("auto.offset.reset", "earliest");
System.out.println("Initiate kafka messages for rules....");
// Create direct kafka stream with brokers and topics
ConsumerStrategy<String, String> consumerStrategy = ConsumerStrategies.Subscribe(topicsSet, kafkaParams);
JavaInputDStream<ConsumerRecord<String, String>> streams = KafkaUtils.createDirectStream(jssc,
LocationStrategies.PreferConsistent(), consumerStrategy);
System.out.println("Waiting for kafka messages of rules....");
// Get the data
streams.foreachRDD(rdd -> {
rdd.collect().forEach(consumerRecord -> {
String key = consumerRecord.key();
long offset = consumerRecord.offset();
int partition = consumerRecord.partition();
String topic = consumerRecord.topic();
String value = consumerRecord.value();
System.out.println("consumerRecord:" + consumerRecord.toString());
System.out.println("[ruleupdate]key:" + key + ", value:" + value);
engineManager.getEngine().addRule(key, value);
});
OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
// some time later, after outputs have completed
((CanCommitOffsets) streams.inputDStream()).commitAsync(offsetRanges);
});
System.out.println("Prepare rule validation....");
}