当前位置: 首页>>代码示例>>Java>>正文


Java JavaInputDStream类代码示例

本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaInputDStream的典型用法代码示例。如果您正苦于以下问题:Java JavaInputDStream类的具体用法?Java JavaInputDStream怎么用?Java JavaInputDStream使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


JavaInputDStream类属于org.apache.spark.streaming.api.java包,在下文中一共展示了JavaInputDStream类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: providesKafkaInputStream

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Provides
JavaInputDStream<ConsumerRecord<String, RawRating>> providesKafkaInputStream(JavaStreamingContext streamingContext) {
    Map<String, Object> kafkaParams = new HashedMap();
    kafkaParams.put("bootstrap.servers", "localhost:9092");
    kafkaParams.put("key.deserializer", StringDeserializer.class);
    kafkaParams.put("value.deserializer", JsonDeserializer.class);
    kafkaParams.put("serializedClass", RawRating.class);
    kafkaParams.put("group.id", "rating_stream");
    kafkaParams.put("auto.offset.reset", "latest");
    kafkaParams.put("enable.auto.commit", false);
    Collection<String> topics = Arrays.asList("topicA", "topicB");

    return KafkaUtils.createDirectStream(
            streamingContext,
            LocationStrategies.PreferConsistent(),
            ConsumerStrategies.<String, RawRating>Subscribe(topics, kafkaParams)
    );
}
 
开发者ID:cosminseceleanu,项目名称:movie-recommender,代码行数:19,代码来源:SparkModule.java

示例2: create

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
  sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
  JavaStreamingContext result = new JavaStreamingContext(sparkConf, new Duration(duration));
  Map<String, String> props = new HashMap<>();
  if (!autoOffsetValue.isEmpty()) {
    props.put(AbstractStreamingBinding.AUTO_OFFSET_RESET, autoOffsetValue);
  }
  logMessage("topic list " + topic, isRunningInMesos);
  logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
  props.putAll(extraKafkaConfigs);
  for (Map.Entry<String, String> map : props.entrySet()) {
    logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
  }
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
  JavaPairInputDStream<byte[], byte[]> dStream;
  if (offsetHelper.isSDCCheckPointing()) {
    JavaInputDStream stream =
        KafkaUtils.createDirectStream(
            result,
            byte[].class,
            byte[].class,
            Tuple2.class,
            props,
            MaprStreamsOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions),
            MESSAGE_HANDLER_FUNCTION
        );
    ClassTag<byte[]> byteClassTag = scala.reflect.ClassTag$.MODULE$.apply(byte[].class);
    dStream = JavaPairInputDStream.fromInputDStream(stream.inputDStream(), byteClassTag, byteClassTag);
  } else {
    dStream =
        KafkaUtils.createDirectStream(result, byte[].class, byte[].class,
            props, new HashSet<>(Arrays.asList(topic.split(","))));
  }
  Driver$.MODULE$.foreach(dStream.dstream(), MaprStreamsOffsetManagerImpl.get());
  return result;
}
 
开发者ID:streamsets,项目名称:datacollector,代码行数:41,代码来源:MapRStreamingBinding.java

示例3: main

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public static void main(String[] args) {
        SparkConf sc = new SparkConf()
                .setMaster("local[2]") // local mode with 2 threads
                .setAppName("RealtimeSpeedCalculator");

        JavaStreamingContext streamingContext = new JavaStreamingContext(sc, new Duration(60 * 1000L));

        // Kafka configuration
        Map<String, Object> kafkaParams = new HashMap();
        kafkaParams.put("bootstrap.servers", "10.128.184.199:9121");
        kafkaParams.put("key.deserializer", StringDeserializer.class);
        kafkaParams.put("value.deserializer", StringDeserializer.class);
        kafkaParams.put("group.id", 0);
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);

        Collection<String> topics = Arrays.asList("topic-taxi");
        JavaInputDStream<ConsumerRecord<String, String>> stream =
                KafkaUtils.createDirectStream(
                        streamingContext,
                        LocationStrategies.PreferConsistent(),
                        ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
                );

        stream.map(record -> {
            System.out.println("#############");
            return record.value();
        }).count();

//        streamingContext.start();
    }
 
开发者ID:wang1365,项目名称:spark-traffic,代码行数:32,代码来源:StreamingApplication.java

示例4: StreamingJob

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Inject
public StreamingJob(JavaStreamingContext streamingContext,
                    JavaInputDStream<ConsumerRecord<String, RawRating>> inputDStream,
                    CassandraIo<RawRating> ratingsIo
) {
    this.streamingContext = streamingContext;
    this.inputDStream = inputDStream;
    this.ratingsIo = ratingsIo;
}
 
开发者ID:cosminseceleanu,项目名称:movie-recommender,代码行数:10,代码来源:StreamingJob.java

示例5: createDirectStream

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
/**
 *
 * @param <K>
 * @param <V>
 * @return
 */
public <K extends Object, V extends Object> JavaInputDStream<ConsumerRecord<K, V>> createDirectStream() {
  JavaInputDStream<ConsumerRecord<K, V>> directKafkaStream
      = KafkaUtils.
          createDirectStream(jsc, LocationStrategies.PreferConsistent(),
              ConsumerStrategies.Subscribe(topics, kafkaParams));
  return directKafkaStream;
}
 
开发者ID:hopshadoop,项目名称:hops-util,代码行数:14,代码来源:SparkConsumer.java

示例6: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public void start() {
    SparkConf sparkConf = getSparkConf();
    streamingContext = new JavaStreamingContext(sparkConf,
            Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec())));
    JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext);
    JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message()));

    pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data
    dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager()));
    streamingContext.start();
}
 
开发者ID:ameyamk,项目名称:spark-streaming-direct-kafka,代码行数:12,代码来源:StreamingEngine.java

示例7: buildInputDStream

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public JavaInputDStream<MessageAndMetadata<String,byte[]>> buildInputDStream(
        JavaStreamingContext streamingContext) {

    HashMap<String, String> kafkaParams = config.getKafkaParams();

    // Ugly compiler-pleasing acrobatics:
    @SuppressWarnings("unchecked")
    Class<MessageAndMetadata<String, byte[]>> streamClass =
            (Class<MessageAndMetadata<String, byte[]>>) (Class<?>) MessageAndMetadata.class;

    if (!KafkaManager.topicExists(config.getZkKafka(), config.getTopic())) {
        throw new RuntimeException("Topic does not exist on server");
    }

    Map<TopicAndPartition, Long> seedOffsetsMap = KafkaManager.getOffsets(config.getZkKafka(),
            config.getZkOffsetManager(), config.getKafkaGroupId(), config.getTopic(), config.getKafkaParams());

    // TODO: try generics, instead of hardcoded values
    JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = org.apache.spark.streaming.kafka.KafkaUtils.createDirectStream(
            streamingContext,
            String.class,  // change as necessary
            byte[].class,  // change as necessary
            StringDecoder.class,
            DefaultDecoder.class,
            streamClass,
            kafkaParams,
            seedOffsetsMap,
            Functions.<MessageAndMetadata<String, byte[]>>identity());
    return dStream;
}
 
开发者ID:ameyamk,项目名称:spark-streaming-direct-kafka,代码行数:31,代码来源:AbstractSparkLayer.java

示例8: testEsRDDWriteIndexCreationDisabled

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Test
public void testEsRDDWriteIndexCreationDisabled() throws Exception {
    ExpectingToThrow expecting = expectingToThrow(EsHadoopIllegalArgumentException.class).from(ssc);

    Map<String, Object> doc1 = new HashMap<>();
    doc1.put("one", null);
    Set<String> values = new HashSet<>();
    values.add("2");
    doc1.put("two", values);
    doc1.put("three", ".");

    Map<String, Object> doc2 = new HashMap<>();
    doc2.put("OTP", "Otopeni");
    doc2.put("SFO", "San Fran");

    List<Map<String, Object>> docs = new ArrayList<>();
    docs.add(doc1);
    docs.add(doc2);

    String target = wrapIndex("spark-test-nonexisting/scala-basic-write");

    Map<String, String> localConf = new HashMap<>(cfg);
    localConf.put(ES_INDEX_AUTO_CREATE, "no");

    JavaRDD<Map<String, Object>> batch = sc.parallelize(docs);
    Queue<JavaRDD<Map<String, Object>>> rddQueue = new LinkedList<>();
    rddQueue.add(batch);
    JavaInputDStream<Map<String, Object>> dstream = ssc.queueStream(rddQueue, true);
    // apply closure
    JavaEsSparkStreaming.saveToEs(dstream, target, localConf);
    ssc.start();
    TimeUnit.SECONDS.sleep(2); // Let the processing happen
    ssc.stop(false, true);

    assertTrue(!RestUtils.exists(target));
    expecting.assertExceptionFound();
}
 
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:38,代码来源:AbstractJavaEsSparkStreamingTest.java

示例9: main

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public static void main(String[] args) {
  	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
 System.setProperty("hadoop.home.dir", "E:\\hadoop");
  	//Logger rootLogger = LogManager.getRootLogger();
 		//rootLogger.setLevel(Level.WARN); 
      SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");    
      JavaSparkContext sc = new JavaSparkContext(conf);
      JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
      streamingContext.checkpoint("E:\\hadoop\\checkpoint");
      Logger rootLogger = LogManager.getRootLogger();
 		rootLogger.setLevel(Level.WARN); 
      Map<String, Object> kafkaParams = new HashMap<>();
      kafkaParams.put("bootstrap.servers", "10.0.75.1:9092");
      kafkaParams.put("key.deserializer", StringDeserializer.class);
      kafkaParams.put("value.deserializer", StringDeserializer.class);
      kafkaParams.put("group.id", "use_a_separate_group_id_for_each_strea");
      kafkaParams.put("auto.offset.reset", "latest");
     // kafkaParams.put("enable.auto.commit", false);

      Collection<String> topics = Arrays.asList("mytopic", "anothertopic");

      final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(streamingContext,LocationStrategies.PreferConsistent(),
      				ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));

      JavaPairDStream<String, String> pairRDD = stream.mapToPair(record-> new Tuple2<>(record.key(), record.value()));
     
      pairRDD.foreachRDD(pRDD-> { pRDD.foreach(tuple-> System.out.println(new Date()+" :: Kafka msg key ::"+tuple._1() +" the val is ::"+tuple._2()));});
     
      JavaDStream<String> tweetRDD = pairRDD.map(x-> x._2()).map(new TweetText());
      
      tweetRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" :: "+x)));
      
     JavaDStream<String> hashtagRDD = tweetRDD.flatMap(twt-> Arrays.stream(twt.split(" ")).filter(str-> str.contains("#")).collect(Collectors.toList()).iterator() );
 
      hashtagRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(x)));
      
      JavaPairDStream<String, Long> cntByVal = hashtagRDD.countByValue();
      
      cntByVal.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     /* hashtagRDD.window(Durations.seconds(60), Durations.seconds(30))
                .countByValue()
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     hashtagRDD.countByValueAndWindow(Durations.seconds(60), Durations.seconds(30))
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println("The window&count tag is ::"+x._1() +" and the val is ::"+x._2())));
      */
     hashtagRDD.window(Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     
     /*hashtagRDD.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     /* hashtagRDD.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     
      streamingContext.start();
      try {
	streamingContext.awaitTermination();
} catch (InterruptedException e) {
	// TODO Auto-generated catch block
	e.printStackTrace();
}
  }
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:72,代码来源:KafkaExample.java

示例10: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public synchronized void start() {
  String id = getID();
  if (id != null) {
    log.info("Starting Speed Layer {}", id);
  }

  streamingContext = buildStreamingContext();
  log.info("Creating message stream from topic");
  JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
  JavaPairDStream<K,M> pairDStream =
      kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));

  KafkaConsumer<String,U> consumer = new KafkaConsumer<>(
      ConfigUtils.keyValueToProperties(
          "group.id", "OryxGroup-" + getLayerName() + "-" + UUID.randomUUID(),
          "bootstrap.servers", updateTopicBroker,
          "max.partition.fetch.bytes", maxMessageSize,
          "key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer",
          "value.deserializer", updateDecoderClass.getName(),
          // Do start from the beginning of the update queue
          "auto.offset.reset", "earliest"
      ));
  consumer.subscribe(Collections.singletonList(updateTopic));
  consumerIterator = new ConsumeDataIterator<>(consumer);

  modelManager = loadManagerInstance();
  Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration();
  new Thread(LoggingCallable.log(() -> {
    try {
      modelManager.consume(consumerIterator, hadoopConf);
    } catch (Throwable t) {
      log.error("Error while consuming updates", t);
      close();
    }
  }).asRunnable(), "OryxSpeedLayerUpdateConsumerThread").start();

  pairDStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));

  // Must use the raw Kafka stream to get offsets
  kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));

  log.info("Starting Spark Streaming");

  streamingContext.start();
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:46,代码来源:SpeedLayer.java

示例11: buildInputDStream

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
protected final JavaInputDStream<ConsumerRecord<K,M>> buildInputDStream(
    JavaStreamingContext streamingContext) {

  Preconditions.checkArgument(
      KafkaUtils.topicExists(inputTopicLockMaster, inputTopic),
      "Topic %s does not exist; did you create it?", inputTopic);
  if (updateTopic != null && updateTopicLockMaster != null) {
    Preconditions.checkArgument(
        KafkaUtils.topicExists(updateTopicLockMaster, updateTopic),
        "Topic %s does not exist; did you create it?", updateTopic);
  }

  String groupID = getGroupID();

  // TODO can we get rid of use of the old API in fillInLatestOffsets?
  Map<String,String> oldKafkaParams = new HashMap<>();
  oldKafkaParams.put("zookeeper.connect", inputTopicLockMaster); // needed for SimpleConsumer later
  oldKafkaParams.put("group.id", groupID);
  // Don't re-consume old messages from input by default
  oldKafkaParams.put("auto.offset.reset", "largest"); // becomes "latest" in Kafka 0.9+
  oldKafkaParams.put("metadata.broker.list", inputBroker);
  // Newer version of metadata.broker.list:
  oldKafkaParams.put("bootstrap.servers", inputBroker);

  Map<String,Object> kafkaParams = new HashMap<>();
  kafkaParams.put("zookeeper.connect", inputTopicLockMaster); // needed for SimpleConsumer later
  kafkaParams.put("group.id", groupID);
  // Don't re-consume old messages from input by default
  kafkaParams.put("auto.offset.reset", "latest"); // Ignored by Kafka 0.10 Spark integration
  kafkaParams.put("bootstrap.servers", inputBroker);
  kafkaParams.put("key.deserializer", keyDecoderClass.getName());
  kafkaParams.put("value.deserializer", messageDecoderClass.getName());

  Map<Pair<String,Integer>,Long> offsets =
      KafkaUtils.getOffsets(inputTopicLockMaster, groupID, inputTopic);
  KafkaUtils.fillInLatestOffsets(offsets, oldKafkaParams);
  log.info("Initial offsets: {}", offsets);

  Map<TopicPartition,Long> kafkaOffsets = new HashMap<>(offsets.size());
  offsets.forEach((tAndP, offset) -> kafkaOffsets.put(
      new TopicPartition(tAndP.getFirst(), tAndP.getSecond()), offset));

  LocationStrategy locationStrategy = LocationStrategies.PreferConsistent();
  ConsumerStrategy<K,M> consumerStrategy = ConsumerStrategies.Subscribe(
      Collections.singleton(inputTopic), kafkaParams, kafkaOffsets);
  return org.apache.spark.streaming.kafka010.KafkaUtils.createDirectStream(
      streamingContext,
      locationStrategy,
      consumerStrategy);
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:51,代码来源:AbstractSparkLayer.java

示例12: start

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
public synchronized void start() { // 加锁,单线程执行
  String id = getID();
  if (id != null) {
    log.info("Starting Batch Layer {}", id);
  }

  streamingContext = buildStreamingContext();
  JavaSparkContext sparkContext = streamingContext.sparkContext();//saprk初始化方法
  Configuration hadoopConf = sparkContext.hadoopConfiguration();

  //设置路径
  Path checkpointPath = new Path(new Path(modelDirString), ".checkpoint");
  log.info("Setting checkpoint dir to {}", checkpointPath);
  sparkContext.setCheckpointDir(checkpointPath.toString());

  //spark 读取kafka的topic
  log.info("Creating message stream from topic");
  JavaInputDStream<ConsumerRecord<K,M>> kafkaDStream = buildInputDStream(streamingContext);
  JavaPairDStream<K,M> pairDStream =
      kafkaDStream.mapToPair(mAndM -> new Tuple2<>(mAndM.key(), mAndM.value()));

  Class<K> keyClass = getKeyClass();
  Class<M> messageClass = getMessageClass();

  //对每条spark里读取的kafka信息做处理
  pairDStream.foreachRDD(
      new BatchUpdateFunction<>(getConfig(),
                                keyClass,
                                messageClass,
                                keyWritableClass,
                                messageWritableClass,
                                dataDirString,
                                modelDirString,
                                loadUpdateInstance(),
                                streamingContext));

  // "Inline" saveAsNewAPIHadoopFiles to be able to skip saving empty RDDs
  // spark读取kafka数据,写入到hdfs上,每条数据进行处理
  pairDStream.foreachRDD(new SaveToHDFSFunction<>(
      dataDirString + "/oryx",
      "data",
      keyClass,
      messageClass,
      keyWritableClass,
      messageWritableClass,
      hadoopConf));

  // Must use the raw Kafka stream to get offsets
  kafkaDStream.foreachRDD(new UpdateOffsetsFn<>(getGroupID(), getInputTopicLockMaster()));

  if (maxDataAgeHours != NO_MAX_AGE) {
    pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
                                                 dataDirString,
                                                 Pattern.compile("-(\\d+)\\."),
                                                 maxDataAgeHours));
  }
  if (maxModelAgeHours != NO_MAX_AGE) {
    pairDStream.foreachRDD(new DeleteOldDataFn<>(hadoopConf,
                                                 modelDirString,
                                                 Pattern.compile("(\\d+)"),
                                                 maxModelAgeHours));
  }

  log.info("Starting Spark Streaming");

  streamingContext.start();
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:68,代码来源:BatchLayer.java

示例13: processRuleUpdate

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
private static void processRuleUpdate(JavaStreamingContext jssc, String brokers, Set<String> topicsSet,
		final AnalyticsEngineManager engineManager) {
	Map<String, Object> kafkaParams = new HashMap<String, Object>();
	kafkaParams.put("metadata.broker.list", brokers);
	kafkaParams.put("bootstrap.servers", brokers);
	kafkaParams.put("spark.streaming.kafka.maxRatePerPartition", "100");
	kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
	kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
	kafkaParams.put("group.id", "MyAnalyticsEngineConsumerGroup1");
	kafkaParams.put("enable.auto.commit", false);
	kafkaParams.put("auto.offset.reset", "earliest");

	System.out.println("Initiate kafka messages for rules....");
	// Create direct kafka stream with brokers and topics
	ConsumerStrategy<String, String> consumerStrategy = ConsumerStrategies.Subscribe(topicsSet, kafkaParams);
	JavaInputDStream<ConsumerRecord<String, String>> streams = KafkaUtils.createDirectStream(jssc,
			LocationStrategies.PreferConsistent(), consumerStrategy);

	System.out.println("Waiting for kafka messages of rules....");

	// Get the data
	streams.foreachRDD(rdd -> {
		rdd.collect().forEach(consumerRecord -> {
			String key = consumerRecord.key();
			long offset = consumerRecord.offset();
			int partition = consumerRecord.partition();
			String topic = consumerRecord.topic();
			String value = consumerRecord.value();
			System.out.println("consumerRecord:" + consumerRecord.toString());
			System.out.println("[ruleupdate]key:" + key + ", value:" + value);

			engineManager.getEngine().addRule(key, value);
		});

		OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
		// some time later, after outputs have completed
		((CanCommitOffsets) streams.inputDStream()).commitAsync(offsetRanges);
	});

	System.out.println("Prepare rule validation....");

}
 
开发者ID:osswangxining,项目名称:another-rule-based-analytics-on-spark,代码行数:43,代码来源:AnalyticsEngine.java

示例14: create

import org.apache.spark.streaming.api.java.JavaInputDStream; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
  sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
  JavaStreamingContext result = new JavaStreamingContext(sparkConf, new Duration(duration));
  Map<String, String> props = new HashMap<>();
  props.putAll(extraKafkaConfigs);
  for (Map.Entry<String, String> map : props.entrySet()) {
    logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
  }
  props.put("metadata.broker.list", metaDataBrokerList);
  props.put(GROUP_ID_KEY, groupId);
  if (!autoOffsetValue.isEmpty()) {
    autoOffsetValue = getConfigurableAutoOffsetResetIfNonEmpty(autoOffsetValue);
    props.put(AUTO_OFFSET_RESET, autoOffsetValue);
  }
  logMessage("Meta data broker list " + metaDataBrokerList, isRunningInMesos);
  logMessage("Topic is " + topic, isRunningInMesos);
  logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
  JavaPairInputDStream<byte[], byte[]> dStream;
  if (offsetHelper.isSDCCheckPointing()) {
    JavaInputDStream<Tuple2<byte[], byte[]>> stream =
        KafkaUtils.createDirectStream(
            result,
            byte[].class,
            byte[].class,
            DefaultDecoder.class,
            DefaultDecoder.class,
            (Class<Tuple2<byte[], byte[]>>) ((Class)(Tuple2.class)),
            props,
            KafkaOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions),
            MESSAGE_HANDLER_FUNCTION
        );
    ClassTag<byte[]> byteClassTag = scala.reflect.ClassTag$.MODULE$.apply(byte[].class);
    dStream = JavaPairInputDStream.fromInputDStream(stream.inputDStream(), byteClassTag, byteClassTag);
  } else {
    dStream =
        KafkaUtils.createDirectStream(result, byte[].class, byte[].class, DefaultDecoder.class, DefaultDecoder.class,
            props, new HashSet<>(Arrays.asList(topic.split(","))));
  }
  Driver$.MODULE$.foreach(dStream.dstream(), KafkaOffsetManagerImpl.get());
  return result;
}
 
开发者ID:streamsets,项目名称:datacollector,代码行数:44,代码来源:SparkStreamingBinding.java


注:本文中的org.apache.spark.streaming.api.java.JavaInputDStream类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。