当前位置: 首页>>代码示例>>Scala>>正文


Scala KafkaUtils类代码示例

本文整理汇总了Scala中org.apache.spark.streaming.kafka.KafkaUtils的典型用法代码示例。如果您正苦于以下问题:Scala KafkaUtils类的具体用法?Scala KafkaUtils怎么用?Scala KafkaUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了KafkaUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: StreamingApp

//设置package包名称以及导入依赖的类
package spark.test

import data.processing.avro.AvroDecoder
import kafka.serializer.StringDecoder
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils


object StreamingApp {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Simple Streaming Application")
    val ssc = new StreamingContext(conf, Seconds(1))

    val topicsSet = "test".split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> "localhost:9092")

    val directKafkaStream = KafkaUtils.createDirectStream[String, Array[Byte], StringDecoder, DefaultDecoder](
      ssc, kafkaParams, topicsSet
    )



    directKafkaStream.foreachRDD(rdd =>
      rdd.foreachPartition(partitionOfRecords => {
        val avroDecoder = new AvroDecoder("/event-record.json")
        partitionOfRecords.map(m => (m._1, avroDecoder.decode(m._2))).foreach(m => println(m))
    }))


    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:ipogudin,项目名称:data-processing-examples,代码行数:36,代码来源:StreamingApp.scala

示例2: KafkaPayload

//设置package包名称以及导入依赖的类
package tools

import kafka.serializer.DefaultDecoder
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils

case class KafkaPayload(value: Array[Byte])

class KafkaDStreamSource(config: Map[String, String]) {

  def createSource(ssc: StreamingContext, topic: String): DStream[KafkaPayload] = {
    val kafkaParams = config
    val kafkaTopics = Set(topic)

    KafkaUtils.
      createDirectStream[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder](
      ssc,
      kafkaParams,
      kafkaTopics).
      map(dStream => KafkaPayload(dStream._2))
  }

}

object KafkaDStreamSource {
  def apply(config: Map[String, String]): KafkaDStreamSource = new KafkaDStreamSource(config)
} 
开发者ID:Antwnis,项目名称:kafka-streaming-examples,代码行数:29,代码来源:KafkaDStreamSource.scala

示例3: StationJourneyCountCustomApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.apache.spark.streaming.kafka.KafkaUtils
import kafka.serializer.StringDecoder
import org.apache.spark.storage.StorageLevel

object StationJourneyCountCustomApp {

  def main(args: Array[String]) {
    if (args.length != 7) {
      System.err.println(
        "Usage: StationJourneyCountApp <appname> <brokerUrl> <topic> <consumerGroupId> <zkQuorum> <checkpointDir> <outputPath>")
      System.exit(1)
    }

    val Seq(appName, brokerUrl, topic, consumerGroupId, zkQuorum, checkpointDir, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
      //.set("spark.streaming.receiver.writeAheadLog.enable", "true")

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    val topics = Map[String, Int](
      topic -> 1)
    val params = Map[String, String](
      "zookeeper.connect" -> zkQuorum,
      "group.id" -> consumerGroupId,
      "bootstrap.servers" -> brokerUrl)
    KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](ssc, params, topics, StorageLevel.MEMORY_ONLY_SER).map(_._2)
      .map(rec => rec.split(","))
      .map(rec => ((rec(3), rec(7)), 1))
      .reduceByKey(_ + _)
      .repartition(1)
      .map(rec => (rec._2, rec._1))
      .transform(rdd => rdd.sortByKey(ascending = false))
      .saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:52,代码来源:L5-14KafkaCustomConf.scala

示例4: createStream

//设置package包名称以及导入依赖的类
package it.agilelab.bigdata.wasp.consumers.readers

import it.agilelab.bigdata.wasp.core.WaspSystem
import it.agilelab.bigdata.wasp.core.WaspSystem._
import it.agilelab.bigdata.wasp.core.kafka.CheckOrCreateTopic
import it.agilelab.bigdata.wasp.core.logging.WaspLogger
import it.agilelab.bigdata.wasp.core.models.{DefaultConfiguration, TopicModel}
import it.agilelab.bigdata.wasp.core.utils.{AvroToJsonUtil, ConfigManager, JsonToByteArrayUtil}
import kafka.serializer.{DefaultDecoder, StringDecoder}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils



  //TODO: check warning (not understood)
  def createStream(group: String, topic: TopicModel)(implicit ssc: StreamingContext): DStream[String] = {
    val kafkaConfig = ConfigManager.getKafkaConfig

    val kafkaConfigMap: Map[String, String] = Map(
      "zookeeper.connect" -> kafkaConfig.zookeeper.toString,
      "zookeeper.connection.timeout.ms" -> kafkaConfig.zookeeper.timeout.getOrElse(DefaultConfiguration.timeout).toString
    )


    if (??[Boolean](WaspSystem.getKafkaAdminActor, CheckOrCreateTopic(topic.name, topic.partitions, topic.replicas))) {
      val receiver = KafkaUtils.createStream[String, Array[Byte], StringDecoder, DefaultDecoder](
        ssc,
        kafkaConfigMap + ("group.id" -> group),
        Map(topic.name -> 3),
        StorageLevel.MEMORY_AND_DISK_2
      )

      topic.topicDataType match {
        case "avro" => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
        case "json" => receiver.map(x => (x._1, JsonToByteArrayUtil.byteArrayToJson(x._2))).map(_._2)
        case _ => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
      }

    } else {
      logger.error(s"Topic not found on Kafka: $topic")
      throw new Exception(s"Topic not found on Kafka: $topic")
    }
  }
} 
开发者ID:agile-lab-dev,项目名称:wasp,代码行数:47,代码来源:KafkaReader.scala

示例5: StationJourneyCountDirectApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import kafka.serializer.StringDecoder
import org.apache.spark.streaming.kafka.KafkaUtils

object StationJourneyCountDirectApp {

  def main(args: Array[String]) {
    if (args.length != 7) {
      System.err.println(
        "Usage: StationJourneyCountApp <appname> <brokerUrl> <topic> <consumerGroupId> <zkQuorum> <checkpointDir> <outputPath>")
      System.exit(1)
    }

    val Seq(appName, brokerUrl, topic, consumerGroupId, zkQuorum, checkpointDir, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    val topics = Set(topic)
    val params = Map[String, String](
      "zookeeper.connect" -> zkQuorum,
      "group.id" -> consumerGroupId,
      "bootstrap.servers" -> brokerUrl)
    KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, params, topics).map(_._2)
      .map(rec => rec.split(","))
      .map(rec => ((rec(3), rec(7)), 1))
      .reduceByKey(_ + _)
      .repartition(1)
      .map(rec => (rec._2, rec._1))
      .transform(rdd => rdd.sortByKey(ascending = false))
      .saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:49,代码来源:L5-15KafkaDirect.scala

示例6: StationJourneyCountApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.apache.spark.streaming.kafka.KafkaUtils

object StationJourneyCountApp {

  def main(args: Array[String]) {
    if (args.length != 7) {
      System.err.println(
        "Usage: StationJourneyCountApp <appname> <brokerUrl> <topic> <consumerGroupId> <zkQuorum> <checkpointDir> <outputPath>")
      System.exit(1)
    }

    val Seq(appName, brokerUrl, topic, consumerGroupId, zkQuorum, checkpointDir, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
    //.set("spark.streaming.receiver.writeAheadLog.enable", "true")

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    val topics = Map[String, Int](
      topic -> 1)
    KafkaUtils.createStream(ssc, zkQuorum, consumerGroupId, topics, StorageLevel.MEMORY_ONLY_SER).map(_._2)
      .map(rec => rec.split(","))
      .map(rec => ((rec(3), rec(7)), 1))
      .reduceByKey(_ + _)
      .repartition(1)
      .map(rec => (rec._2, rec._1))
      .transform(rdd => rdd.sortByKey(ascending = false))
      .saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:47,代码来源:L5-13Kafka.scala

示例7: SparkJob

//设置package包名称以及导入依赖的类
package de.codecentric.dcos_intro.spark


import de.codecentric.dcos_intro.{Tweet, TweetDecoder}
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import com.datastax.spark.connector.streaming._


object SparkJob {

  def main(args: Array[String]) {

    val consumerTopic = args(0)
    val sparkConf = new SparkConf()
      .setAppName(getClass.getName)
      .set("spark.cassandra.connection.host", s"${args(1)}")
      .set("spark.cassandra.connection.port", s"${args(2)}")
    val consumerProperties = Map("bootstrap.servers" -> args(3), "auto.offset.reset" -> "smallest")
    val ssc = new StreamingContext(sparkConf, Seconds(1))

    val kafkaStream = KafkaUtils.createDirectStream[String, Tweet, StringDecoder, TweetDecoder](
      ssc,
      consumerProperties,
      Set(consumerTopic)
    )

    kafkaStream.map(tuple => tuple._2).saveToCassandra("dcos", "tweets")

    ssc.start()
    ssc.awaitTermination()
    ssc.stop()
  }
} 
开发者ID:ftrossbach,项目名称:intro-to-dcos,代码行数:37,代码来源:SparkJob.scala

示例8: ApplicationContext

//设置package包名称以及导入依赖的类
package com.playing.contexts

import com.playing.utils.SparkConfig
import kafka.serializer.StringDecoder
import org.apache.spark.streaming.kafka.KafkaUtils


object ApplicationContext {

  def main(args: Array[String]): Unit = {
    val Array(brokers, topics) = args

    // Create context with 2 second batch interval
    val ssc = SparkConfig.ssc

    // Create direct kafka stream with brokers and topics
    val topicsSet = topics.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
    val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
      ssc, kafkaParams, topicsSet)

    // Get the lines, split them into words, count the words and print
    val lines = messages.map(_._2)
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
    wordCounts.print()

    // Start the computation
    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:anand-singh,项目名称:playing-spark-streaming,代码行数:34,代码来源:ApplicationContext.scala

示例9: WeKafka

//设置package包名称以及导入依赖的类
package com.tuyoo.kafka

import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.hdfs.server.common.Storage
import org.apache.spark.storage.StorageLevel

class WeKafka extends Serializable{
   def getKafkaDStream(args:Array[String],ssc: StreamingContext):DStream[(String,String)]={
   val Array(zkQuorum, group, topics, numThreads) = args
    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val numInputDStreams = 10
//    val kafkaDStreams=(1 to numInputDStreams).map(_=>KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).persist(StorageLevel.MEMORY_AND_DISK).map(_._2).persist(StorageLevel.MEMORY_AND_DISK)
//    .map(_.replaceAll("\u0000", "")).persist(StorageLevel.MEMORY_AND_DISK))
//    ssc.union(kafkaDStreams)
    KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).persist(StorageLevel.MEMORY_AND_DISK)//.map(_._2).persist(StorageLevel.MEMORY_AND_DISK)
    //.map(_.replaceAll("\u0000", "")).persist(StorageLevel.MEMORY_AND_DISK)
  }
} 
开发者ID:lf328359263,项目名称:spark-redis,代码行数:21,代码来源:WeKafka.scala

示例10: SparkStreamingOnKafkaReceiver

//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.streaming

import org.apache.spark.SparkConf
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Durations, StreamingContext}


object SparkStreamingOnKafkaReceiver {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("SparkStreamingOnKafkaReceiver")
    val sc = new StreamingContext(conf, Durations.seconds(30))
    val topicMap = Map[String, Int]("HelloKafka" -> 1) // keytopic, value???????
    val lines = KafkaUtils.createStream(sc, "Master:2181,Worker1:2181,Worker2:2181", "MyFirstConsumerGroup", topicMap,
      StorageLevels.MEMORY_AND_DISK_SER_2)

    val words = lines.flatMap(_._2.split(" ")).map((_, 1))

    val wordCounts = words.reduceByKey(_+_)
    wordCounts.print()

    sc.start()
    sc.awaitTermination()
  }
} 
开发者ID:JJZHK,项目名称:MySpark,代码行数:26,代码来源:SparkStreamingOnKafkaReceiver.scala

示例11: InputManager

//设置package包名称以及导入依赖的类
package iomanager

import com.typesafe.config.Config
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}

import scala.collection.JavaConversions._

object InputManager {

  
  def createInputStream(ssc: StreamingContext, config: Config): DStream[Set[Int]] = {

    val windowDuration = Seconds(config.getInt("input.observationWindow"))
    val topics = config.getStringList("input.kafka.topics")
    val brokers = config.getStringList("input.kafka.brokers").reduce(_ + "," + _)
    val group = config.getString("input.kafka.group")

    val eventIndex = ssc.sparkContext.broadcast(config.getStringList("eventIndex")
      .map(_.split(",")).map(x => x.last -> x.head.toInt).toMap)
    val topicMap = topics.map((_, 2)).toMap
    val eventsStream =
      KafkaUtils.createStream(ssc, brokers, group, topicMap, StorageLevel.MEMORY_ONLY_2)
        .transform((data)=>{
          data.map(_._2).filter(eventIndex.value.contains)
            .map(eventIndex.value.get(_).get).distinct.map(x=>Set(x))
        })
      .reduce(_++_)
      .persist(StorageLevel.MEMORY_ONLY_2)

    eventsStream
      .window(windowDuration)
      .persist(StorageLevel.MEMORY_ONLY_2)
  }
} 
开发者ID:jandion,项目名称:SparkOFP,代码行数:38,代码来源:InputManager.scala

示例12: KafkaSource

//设置package包名称以及导入依赖的类
package com.ippontech.kafka

import com.ippontech.kafka.stores.OffsetsStore
import com.typesafe.scalalogging.slf4j.LazyLogging
import kafka.message.MessageAndMetadata
import kafka.serializer.Decoder
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.KafkaUtils

import scala.reflect.ClassTag

object KafkaSource extends LazyLogging {

  def kafkaStream[K: ClassTag, V: ClassTag, KD <: Decoder[K] : ClassTag, VD <: Decoder[V] : ClassTag]
  (ssc: StreamingContext, kafkaParams: Map[String, String], offsetsStore: OffsetsStore, topic: String): InputDStream[(K, V)] = {

    val topics = Set(topic)

    val storedOffsets = offsetsStore.readOffsets(topic)
    val kafkaStream = storedOffsets match {
      case None =>
        // start from the latest offsets
        KafkaUtils.createDirectStream[K, V, KD, VD](ssc, kafkaParams, topics)
      case Some(fromOffsets) =>
        // start from previously saved offsets
        val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message)
        KafkaUtils.createDirectStream[K, V, KD, VD, (K, V)](ssc, kafkaParams, fromOffsets, messageHandler)
    }

    // save the offsets
    kafkaStream.foreachRDD(rdd => offsetsStore.saveOffsets(topic, rdd))

    kafkaStream
  }

  // Kafka input stream
  def kafkaStream[K: ClassTag, V: ClassTag, KD <: Decoder[K] : ClassTag, VD <: Decoder[V] : ClassTag]
  (ssc: StreamingContext, brokers: String, offsetsStore: OffsetsStore, topic: String): InputDStream[(K, V)] =
    kafkaStream(ssc, Map("metadata.broker.list" -> brokers), offsetsStore, topic)

} 
开发者ID:ippontech,项目名称:spark-kafka-source,代码行数:43,代码来源:KafkaSource.scala

示例13: DataConsumer

//设置package包名称以及导入依赖的类
package com.safak

import kafka.serializer.StringDecoder
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}


object DataConsumer {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println(
        s"""
           |Usage: DirectKafkaWordCount <brokers> <topics>
           |  <brokers> is a list of one or more Kafka brokers
           |  <topics> is a list of one or more kafka topics to consume from
           |
        """.stripMargin)
      System.exit(1)
    }

    val Array(brokers, topics) = args

    val sparkConf = new SparkConf()
      .setAppName("StreamingApp")
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    // Create direct kafka stream with brokers and topics
    val topicsSet = topics.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)

    // Create a new stream which can decode byte arrays.
    val messageStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
      ssc,
      kafkaParams,
      topicsSet)

    messageStream.foreachRDD(rdd => {

      if (!rdd.isEmpty()) {
        val rdd2 = rdd.map(_._2)
        println("New Batch")
        println("Rdd Count: " + rdd2.count())
        rdd2.collect().foreach(println)
      }
    });

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:sskapci,项目名称:spark_consumer,代码行数:53,代码来源:Application.scala

示例14: Run

//设置package包名称以及导入依赖的类
import Schemas.{Sales_v2, Shipments_v1}
import io.confluent.kafka.serializers.KafkaAvroDecoder
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.SparkConf
import redis.embedded.RedisServer

object Run extends App {

  // Start embedded Redis Server
  val redisServer = new RedisServer(6379)
  redisServer.start()

  val sparkConf = new SparkConf()
    .setAppName("e-commerce-demo-inventory")
    .setMaster("local[2]")
  val ssc = new StreamingContext(sparkConf, Seconds(4))

  val kafkaParams = Map[String, String](
    "auto.offset.reset" -> "smallest",
    "zookeeper.connect" -> "cloudera.landoop.com:22181",
    "group.id" -> "group111112",
    "metadata.broker.list" -> "cloudera.landoop.com:29092",
    "schema.registry.url" -> "http://cloudera.landoop.com:28081")

  val salesTopic = Set("generator-sales")
  val salesStream = KafkaUtils.createDirectStream[Object, Object, KafkaAvroDecoder, KafkaAvroDecoder](ssc, kafkaParams, salesTopic)

  val shipmentsTopic = Set("generator-shipments")
  val shipmentsStream = KafkaUtils.createDirectStream[Object, Object, KafkaAvroDecoder, KafkaAvroDecoder](ssc, kafkaParams, shipmentsTopic)

  val sales = salesStream.map[Sales_v2](AvroConverter.getSale(_))
  val shipment = salesStream.map[Shipments_v1](AvroConverter.getShipment(_))

  sales.map(x => 1).reduce(_ + _)
  sales.print()

  shipment.map(x => 1).reduce(_ + _)
    .print()

  sys.ShutdownHookThread {
    println("Gracefully stopping Spark Streaming Application")
    ssc.stop(stopSparkContext = true, stopGracefully = true)
    redisServer.stop()
    println("Application stopped")
  }

  ssc.start()
  ssc.awaitTermination()

} 
开发者ID:Antwnis,项目名称:kafka-streaming-examples,代码行数:52,代码来源:Run.scala

示例15: KafkaDStreamSource

//设置package包名称以及导入依赖的类
package org.yuboxu.spark

import kafka.serializer.{StringDecoder, DefaultDecocder}
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils

class KafkaDStreamSource(config: Map[String, String]) {
  def createSource(ssc: StreamingContext, topic: String): DStream[KafkaPayload] = {
    val kafkaParams = config
    val kafkaTopics = Set(topic)

    KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
      // spark streaming context
      ssc,
      // kafka configuration parameters
      kafkaParams,
      // names of the topics to consume
      kafkaTopics).map(dstream => KafkaPayload(Option(dstream._1), dstream._2)
    )
  }
}

object KafkaDStreamSource {
  def apply(config: Map[String, String]): KafkaDStreamSource = new KafkaDStreamSource(config)
} 
开发者ID:CCA1,项目名称:Web-Popularity-Application,代码行数:27,代码来源:KafkaDStreamSource.scala


注:本文中的org.apache.spark.streaming.kafka.KafkaUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。