本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaInputDStream类的典型用法代码示例。如果您正苦于以下问题:Java JavaInputDStream类的具体用法?Java JavaInputDStream怎么用?Java JavaInputDStream使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
JavaInputDStream类属于org.apache.spark.streaming.api.java包,在下文中一共展示了JavaInputDStream类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: providesKafkaInputStream
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Provides
JavaInputDStream<ConsumerRecord<String, RawRating>> providesKafkaInputStream(JavaStreamingContext streamingContext) {
Map<String, Object> kafkaParams = new HashedMap();
kafkaParams.put("bootstrap.servers", "localhost:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", JsonDeserializer.class);
kafkaParams.put("serializedClass", RawRating.class);
kafkaParams.put("group.id", "rating_stream");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("topicA", "topicB");
return KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, RawRating>Subscribe(topics, kafkaParams)
);
}
示例2: create
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
JavaStreamingContext result = new JavaStreamingContext(sparkConf, new Duration(duration));
Map<String, String> props = new HashMap<>();
if (!autoOffsetValue.isEmpty()) {
props.put(AbstractStreamingBinding.AUTO_OFFSET_RESET, autoOffsetValue);
}
logMessage("topic list " + topic, isRunningInMesos);
logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
props.putAll(extraKafkaConfigs);
for (Map.Entry<String, String> map : props.entrySet()) {
logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
}
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
JavaPairInputDStream<byte[], byte[]> dStream;
if (offsetHelper.isSDCCheckPointing()) {
JavaInputDStream stream =
KafkaUtils.createDirectStream(
result,
byte[].class,
byte[].class,
Tuple2.class,
props,
MaprStreamsOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions),
MESSAGE_HANDLER_FUNCTION
);
ClassTag<byte[]> byteClassTag = scala.reflect.ClassTag$.MODULE$.apply(byte[].class);
dStream = JavaPairInputDStream.fromInputDStream(stream.inputDStream(), byteClassTag, byteClassTag);
} else {
dStream =
KafkaUtils.createDirectStream(result, byte[].class, byte[].class,
props, new HashSet<>(Arrays.asList(topic.split(","))));
}
Driver$.MODULE$.foreach(dStream.dstream(), MaprStreamsOffsetManagerImpl.get());
return result;
}
示例3: main
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf sc = new SparkConf()
.setMaster("local[2]") // local mode with 2 threads
.setAppName("RealtimeSpeedCalculator");
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, new Duration(60 * 1000L));
// Kafka configuration
Map<String, Object> kafkaParams = new HashMap();
kafkaParams.put("bootstrap.servers", "10.128.184.199:9121");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", 0);
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("topic-taxi");
JavaInputDStream<ConsumerRecord<String, String>> stream =
KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
);
stream.map(record -> {
System.out.println("#############");
return record.value();
}).count();
// streamingContext.start();
}
示例4: StreamingJob
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Inject
public StreamingJob(JavaStreamingContext streamingContext,
JavaInputDStream<ConsumerRecord<String, RawRating>> inputDStream,
CassandraIo<RawRating> ratingsIo
) {
this.streamingContext = streamingContext;
this.inputDStream = inputDStream;
this.ratingsIo = ratingsIo;
}
示例5: createDirectStream
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
/**
*
* @param <K>
* @param <V>
* @return
*/
public <K extends Object, V extends Object> JavaInputDStream<ConsumerRecord<K, V>> createDirectStream() {
JavaInputDStream<ConsumerRecord<K, V>> directKafkaStream
= KafkaUtils.
createDirectStream(jsc, LocationStrategies.PreferConsistent(),
ConsumerStrategies.Subscribe(topics, kafkaParams));
return directKafkaStream;
}
示例6: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public void start() {
SparkConf sparkConf = getSparkConf();
streamingContext = new JavaStreamingContext(sparkConf,
Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec())));
JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext);
JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message()));
pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data
dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager()));
streamingContext.start();
}
示例7: buildInputDStream
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public JavaInputDStream<MessageAndMetadata<String,byte[]>> buildInputDStream(
JavaStreamingContext streamingContext) {
HashMap<String, String> kafkaParams = config.getKafkaParams();
// Ugly compiler-pleasing acrobatics:
@SuppressWarnings("unchecked")
Class<MessageAndMetadata<String, byte[]>> streamClass =
(Class<MessageAndMetadata<String, byte[]>>) (Class<?>) MessageAndMetadata.class;
if (!KafkaManager.topicExists(config.getZkKafka(), config.getTopic())) {
throw new RuntimeException("Topic does not exist on server");
}
Map<TopicAndPartition, Long> seedOffsetsMap = KafkaManager.getOffsets(config.getZkKafka(),
config.getZkOffsetManager(), config.getKafkaGroupId(), config.getTopic(), config.getKafkaParams());
// TODO: try generics, instead of hardcoded values
JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = org.apache.spark.streaming.kafka.KafkaUtils.createDirectStream(
streamingContext,
String.class, // change as necessary
byte[].class, // change as necessary
StringDecoder.class,
DefaultDecoder.class,
streamClass,
kafkaParams,
seedOffsetsMap,
Functions.<MessageAndMetadata<String, byte[]>>identity());
return dStream;
}
示例8: testEsRDDWriteIndexCreationDisabled
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Test
public void testEsRDDWriteIndexCreationDisabled() throws Exception {
ExpectingToThrow expecting = expectingToThrow(EsHadoopIllegalArgumentException.class).from(ssc);
Map<String, Object> doc1 = new HashMap<>();
doc1.put("one", null);
Set<String> values = new HashSet<>();
values.add("2");
doc1.put("two", values);
doc1.put("three", ".");
Map<String, Object> doc2 = new HashMap<>();
doc2.put("OTP", "Otopeni");
doc2.put("SFO", "San Fran");
List<Map<String, Object>> docs = new ArrayList<>();
docs.add(doc1);
docs.add(doc2);
String target = wrapIndex("spark-test-nonexisting/scala-basic-write");
Map<String, String> localConf = new HashMap<>(cfg);
localConf.put(ES_INDEX_AUTO_CREATE, "no");
JavaRDD<Map<String, Object>> batch = sc.parallelize(docs);
Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
rddQueue.add(batch);
JavaInputDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue, true);
// apply closure
JavaEsSparkStreaming.saveToEs(dstream, target, localConf);
ssc.start();
TimeUnit.SECONDS.sleep(2); // Let the processing happen
ssc.stop(false, true);
assertTrue(!RestUtils.exists(target));
expecting.assertExceptionFound();
}
示例9: main
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Logger rootLogger = LogManager.getRootLogger();
//rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
streamingContext.checkpoint("E:\\hadoop\\checkpoint");
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "10.0.75.1:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "use_a_separate_group_id_for_each_strea");
kafkaParams.put("auto.offset.reset", "latest");
// kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("mytopic", "anothertopic");
final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(streamingContext,LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
JavaPairDStream<String, String> pairRDD = stream.mapToPair(record-> new Tuple2<>(record.key(), record.value()));
pairRDD.foreachRDD(pRDD-> { pRDD.foreach(tuple-> System.out.println(new Date()+" :: Kafka msg key ::"+tuple._1() +" the val is ::"+tuple._2()));});
JavaDStream<String> tweetRDD = pairRDD.map(x-> x._2()).map(new TweetText());
tweetRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" :: "+x)));
JavaDStream<String> hashtagRDD = tweetRDD.flatMap(twt-> Arrays.stream(twt.split(" ")).filter(str-> str.contains("#")).collect(Collectors.toList()).iterator() );
hashtagRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(x)));
JavaPairDStream<String, Long> cntByVal = hashtagRDD.countByValue();
cntByVal.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The count tag is ::"+x._1() +" and the val is ::"+x._2())));
/* hashtagRDD.window(Durations.seconds(60), Durations.seconds(30))
.countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.countByValueAndWindow(Durations.seconds(60), Durations.seconds(30))
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println("The window&count tag is ::"+x._1() +" and the val is ::"+x._2())));
*/
hashtagRDD.window(Durations.minutes(8)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
/*hashtagRDD.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
/* hashtagRDD.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
streamingContext.start();
try {
streamingContext.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例10: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public synchronized void start() {
String id = getID();
if (id != null) {
log.info("Starting Speed Layer {}", id);
}
streamingContext = buildStreamingContext();
log.info("Creating message stream from topic");
JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
JavaPairDStream<K,M> pairDStream =
kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));
KafkaConsumer<String,U> consumer = new KafkaConsumer<>(
ConfigUtils.keyValueToProperties(
"group.id", "OryxGroup-" + getLayerName() + "-" + UUID.randomUUID(),
"bootstrap.servers", updateTopicBroker,
"max.partition.fetch.bytes", maxMessageSize,
"key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer",
"value.deserializer", updateDecoderClass.getName(),
// Do start from the beginning of the update queue
"auto.offset.reset", "earliest"
));
consumer.subscribe(Collections.singletonList(updateTopic));
consumerIterator = new ConsumeDataIterator<>(consumer);
modelManager = loadManagerInstance();
Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration();
new Thread(LoggingCallable.log(() -> {
try {
modelManager.consume(consumerIterator, hadoopConf);
} catch (Throwable t) {
log.error("Error while consuming updates", t);
close();
}
}).asRunnable(), "OryxSpeedLayerUpdateConsumerThread").start();
pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));
// Must use the raw Kafka stream to get offsets
kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));
log.info("Starting Spark Streaming");
streamingContext.start();
}
示例11: buildInputDStream
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
protected final JavaInputDStream<ConsumerRecord<K,M>> buildInputDStream(
JavaStreamingContext streamingContext) {
Preconditions.checkArgument(
KafkaUtils.topicExists(inputTopicLockMaster, inputTopic),
"Topic %s does not exist; did you create it?", inputTopic);
if (updateTopic != null && updateTopicLockMaster != null) {
Preconditions.checkArgument(
KafkaUtils.topicExists(updateTopicLockMaster, updateTopic),
"Topic %s does not exist; did you create it?", updateTopic);
}
String groupID = getGroupID();
// TODO can we get rid of use of the old API in fillInLatestOffsets?
Map<String,String> oldKafkaParams = new HashMap<>();
oldKafkaParams.put("zookeeper.connect", inputTopicLockMaster); // needed for SimpleConsumer later
oldKafkaParams.put("group.id", groupID);
// Don't re-consume old messages from input by default
oldKafkaParams.put("auto.offset.reset", "largest"); // becomes "latest" in Kafka 0.9+
oldKafkaParams.put("metadata.broker.list", inputBroker);
// Newer version of metadata.broker.list:
oldKafkaParams.put("bootstrap.servers", inputBroker);
Map<String,Object> kafkaParams = new HashMap<>();
kafkaParams.put("zookeeper.connect", inputTopicLockMaster); // needed for SimpleConsumer later
kafkaParams.put("group.id", groupID);
// Don't re-consume old messages from input by default
kafkaParams.put("auto.offset.reset", "latest"); // Ignored by Kafka 0.10 Spark integration
kafkaParams.put("bootstrap.servers", inputBroker);
kafkaParams.put("key.deserializer", keyDecoderClass.getName());
kafkaParams.put("value.deserializer", messageDecoderClass.getName());
Map<Pair<String,Integer>,Long> offsets =
KafkaUtils.getOffsets(inputTopicLockMaster, groupID, inputTopic);
KafkaUtils.fillInLatestOffsets(offsets, oldKafkaParams);
log.info("Initial offsets: {}", offsets);
Map<TopicPartition,Long> kafkaOffsets = new HashMap<>(offsets.size());
offsets.forEach((tAndP, offset) -> kafkaOffsets.put(
new TopicPartition(tAndP.getFirst(), tAndP.getSecond()), offset));
LocationStrategy locationStrategy = LocationStrategies.PreferConsistent();
ConsumerStrategy<K,M> consumerStrategy = ConsumerStrategies.Subscribe(
Collections.singleton(inputTopic), kafkaParams, kafkaOffsets);
return org.apache.spark.streaming.kafka010.KafkaUtils.createDirectStream(
streamingContext,
locationStrategy,
consumerStrategy);
}
示例12: start
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public synchronized void start() { // 加锁,单线程执行
String id = getID();
if (id != null) {
log.info("Starting Batch Layer {}", id);
}
streamingContext = buildStreamingContext();
JavaSparkContext sparkContext = streamingContext.sparkContext();//saprk初始化方法
Configuration hadoopConf = sparkContext.hadoopConfiguration();
//设置路径
Path checkpointPath = new Path(new Path(modelDirString), ".checkpoint");
log.info("Setting checkpoint dir to {}", checkpointPath);
sparkContext.setCheckpointDir(checkpointPath.toString());
//spark 读取kafka的topic
log.info("Creating message stream from topic");
JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
JavaPairDStream<K,M> pairDStream =
kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));
Class<K> keyClass = getKeyClass();
Class<M> messageClass = getMessageClass();
//对每条spark里读取的kafka信息做处理
pairDStream.foreachRDD(
new BatchUpdateFunction<>(getConfig(),
keyClass,
messageClass,
keyWritableClass,
messageWritableClass,
dataDirString,
modelDirString,
loadUpdateInstance(),
streamingContext));
// "Inline" saveAsNewAPIHadoopFiles to be able to skip saving empty RDDs
// spark读取kafka数据,写入到hdfs上,每条数据进行处理
pairDStream.foreachRDD(new SaveToHDFSFunction<>(
dataDirString + "/oryx",
"data",
keyClass,
messageClass,
keyWritableClass,
messageWritableClass,
hadoopConf));
// Must use the raw Kafka stream to get offsets
kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));
if (maxDataAgeHours != NO_MAX_AGE) {
pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
dataDirString,
Pattern.compile("-(\\d+)\\."),
maxDataAgeHours));
}
if (maxModelAgeHours != NO_MAX_AGE) {
pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
modelDirString,
Pattern.compile("(\\d+)"),
maxModelAgeHours));
}
log.info("Starting Spark Streaming");
streamingContext.start();
}
示例13: processRuleUpdate
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
private static void processRuleUpdate(JavaStreamingContext jssc, String brokers, Set<String> topicsSet,
final AnalyticsEngineManager engineManager) {
Map<String, Object> kafkaParams = new HashMap<String, Object>();
kafkaParams.put("metadata.broker.list", brokers);
kafkaParams.put("bootstrap.servers", brokers);
kafkaParams.put("spark.streaming.kafka.maxRatePerPartition", "100");
kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("group.id", "MyAnalyticsEngineConsumerGroup1");
kafkaParams.put("enable.auto.commit", false);
kafkaParams.put("auto.offset.reset", "earliest");
System.out.println("Initiate kafka messages for rules....");
// Create direct kafka stream with brokers and topics
ConsumerStrategy<String, String> consumerStrategy = ConsumerStrategies.Subscribe(topicsSet, kafkaParams);
JavaInputDStream<ConsumerRecord<String, String>> streams = KafkaUtils.createDirectStream(jssc,
LocationStrategies.PreferConsistent(), consumerStrategy);
System.out.println("Waiting for kafka messages of rules....");
// Get the data
streams.foreachRDD(rdd -> {
rdd.collect().forEach(consumerRecord -> {
String key = consumerRecord.key();
long offset = consumerRecord.offset();
int partition = consumerRecord.partition();
String topic = consumerRecord.topic();
String value = consumerRecord.value();
System.out.println("consumerRecord:" + consumerRecord.toString());
System.out.println("[ruleupdate]key:" + key + ", value:" + value);
engineManager.getEngine().addRule(key, value);
});
OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
// some time later, after outputs have completed
((CanCommitOffsets) streams.inputDStream()).commitAsync(offsetRanges);
});
System.out.println("Prepare rule validation....");
}
示例14: create
import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
JavaStreamingContext result = new JavaStreamingContext(sparkConf, new Duration(duration));
Map<String, String> props = new HashMap<>();
props.putAll(extraKafkaConfigs);
for (Map.Entry<String, String> map : props.entrySet()) {
logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
}
props.put("metadata.broker.list", metaDataBrokerList);
props.put(GROUP_ID_KEY, groupId);
if (!autoOffsetValue.isEmpty()) {
autoOffsetValue = getConfigurableAutoOffsetResetIfNonEmpty(autoOffsetValue);
props.put(AUTO_OFFSET_RESET, autoOffsetValue);
}
logMessage("Meta data broker list " + metaDataBrokerList, isRunningInMesos);
logMessage("Topic is " + topic, isRunningInMesos);
logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
JavaPairInputDStream<byte[], byte[]> dStream;
if (offsetHelper.isSDCCheckPointing()) {
JavaInputDStream<Tuple2<byte[], byte[]>> stream =
KafkaUtils.createDirectStream(
result,
byte[].class,
byte[].class,
DefaultDecoder.class,
DefaultDecoder.class,
(Class<Tuple2<byte[], byte[]>>) ((Class)(Tuple2.class)),
props,
KafkaOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions),
MESSAGE_HANDLER_FUNCTION
);
ClassTag<byte[]> byteClassTag = scala.reflect.ClassTag$.MODULE$.apply(byte[].class);
dStream = JavaPairInputDStream.fromInputDStream(stream.inputDStream(), byteClassTag, byteClassTag);
} else {
dStream =
KafkaUtils.createDirectStream(result, byte[].class, byte[].class, DefaultDecoder.class, DefaultDecoder.class,
props, new HashSet<>(Arrays.asList(topic.split(","))));
}
Driver$.MODULE$.foreach(dStream.dstream(), KafkaOffsetManagerImpl.get());
return result;
}