本文整理汇总了Scala中org.apache.kafka.common.serialization.StringDeserializer类的典型用法代码示例。如果您正苦于以下问题:Scala StringDeserializer类的具体用法?Scala StringDeserializer怎么用?Scala StringDeserializer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StringDeserializer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ProcessingKafkaApplication
//设置package包名称以及导入依赖的类
package com.packt.chapter8
import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}
import scala.concurrent.duration._
object ProcessingKafkaApplication extends App {
implicit val actorSystem = ActorSystem("SimpleStream")
implicit val actorMaterializer = ActorMaterializer()
val bootstrapServers = "localhost:9092"
val kafkaTopic = "akka_streams_topic"
val partition = 0
val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))
val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
.withBootstrapServers(bootstrapServers)
.withGroupId("akka_streams_group")
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
.withBootstrapServers(bootstrapServers)
val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
import GraphDSL.Implicits._
val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
val kafkaSink = Producer.plainSink(producerSettings)
val printlnSink = Sink.foreach(println)
val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())
tickSource ~> mapToProducerRecord ~> kafkaSink
kafkaSource ~> mapFromConsumerRecord ~> printlnSink
ClosedShape
})
runnableGraph.run()
}
示例2: KafkaUtility
//设置package包名称以及导入依赖的类
package com.knoldus.streaming.kafka
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
object KafkaUtility {
//TODO It should read from config
private val kafkaParams = Map(
"bootstrap.servers" -> "localhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"auto.offset.reset" -> "earliest",
"group.id" -> "tweet-consumer"
)
private val preferredHosts = LocationStrategies.PreferConsistent
def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] =
KafkaUtils.createDirectStream[String, String](
ssc,
preferredHosts,
ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams)
)
}
示例3: Consumer
//设置package包名称以及导入依赖的类
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.mllib.classification.SVMModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SparkSession
object Consumer {
def main(args: Array[String]): Unit = {
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "localhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("streaming")
val sparkConf = new SparkConf().setMaster("local[8]").setAppName("KafkaTest")
val streamingContext = new StreamingContext(sparkConf, Seconds(1))
// Create a input direct stream
val kafkaStream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val sc = SparkSession.builder().master("local[8]").appName("KafkaTest").getOrCreate()
val model = SVMModel.load(sc.sparkContext, "/home/xiaoyu/model")
val result = kafkaStream.map(record => (record.key, record.value))
result.foreachRDD(
patient => {
patient.collect().toBuffer.foreach(
(x: (Any, String)) => {
val features = x._2.split(',').map(x => x.toDouble).tail
println(model.predict(Vectors.dense(features)))
}
)
}
)
streamingContext.start()
streamingContext.awaitTermination()
}
}
示例4: Settings
//设置package包名称以及导入依赖的类
package com.scalaio.kafka.consumer
import akka.actor.ActorSystem
import akka.kafka.ConsumerMessage.CommittableMessage
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Sink
import com.scalaio.kafka.consumer.Settings.consumerSettings
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}
import scala.concurrent.Future
object Settings {
def consumerSettings(implicit system: ActorSystem) =
ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
.withBootstrapServers("localhost:9092")
.withGroupId("CommittableSourceConsumer")
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
def producerSettings(implicit system: ActorSystem) =
ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
.withBootstrapServers("localhost:9092")
}
object CommittableSource extends App {
type KafkaMessage = CommittableMessage[Array[Byte], String]
implicit val system = ActorSystem("CommittableSourceConsumerMain")
implicit val materializer = ActorMaterializer()
implicit val ec = system.dispatcher
// explicit commit
Consumer
.committableSource(consumerSettings, Subscriptions.topics("topic1"))
.mapAsync(1) { msg =>
BusinessController.handleMessage(msg.record.value)
.flatMap(response => msg.committableOffset.commitScaladsl())
.recoverWith { case e => msg.committableOffset.commitScaladsl() }
}
.runWith(Sink.ignore)
}
object BusinessController {
type Service[A, B] = A => Future[B]
val handleMessage: Service[String, String] =
(message) => Future.successful(message.toUpperCase)
}
示例5: ReadyKafkaProducer
//设置package包名称以及导入依赖的类
package com.bencassedy.readykafka.producer
import java.util.Properties
import java.util.concurrent.TimeUnit
import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer}
import org.apache.kafka.common.serialization.{StringSerializer, StringDeserializer}
class ReadyKafkaProducer {
case class KafkaProducerConfigs(brokerList: String = "127.0.0.1:9092") {
val properties = new Properties()
properties.put("bootstrap.servers", brokerList)
properties.put("key.serializer", classOf[StringSerializer])
properties.put("value.serializer", classOf[StringSerializer])
// properties.put("serializer.class", classOf[StringDeserializer])
// properties.put("batch.size", 16384)
// properties.put("linger.ms", 1)
// properties.put("buffer.memory", 33554432)
}
val producer = new KafkaProducer[String, String](KafkaProducerConfigs().properties)
def produce(topic: String, messages: Iterable[String]): Unit = {
messages.foreach { m =>
producer.send(new ProducerRecord[String, String](topic, m))
}
producer.close(100L, TimeUnit.MILLISECONDS)
}
}
示例6: SimpleConsumer
//设置package包名称以及导入依赖的类
package com.landoop.kafka.ws.core.operations
import java.util.Properties
import com.landoop.kafka.ws.KafkaConstants
import io.confluent.kafka.serializers.KafkaAvroDeserializer
import kafka.utils.VerifiableProperties
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.StringDeserializer
object SimpleConsumer {
// Get a simple string consumer
def getConsumer[T](kafkaBrokers: String, maxRecords: Option[Int] = None): KafkaConsumer[T, T] = {
val props = new Properties()
props.put(KafkaConstants.BOOTSTRAP_SERVER, kafkaBrokers)
props.put(KafkaConstants.KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer")
props.put(KafkaConstants.VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer")
maxRecords.find(_ > 0).map { max =>
props.put(KafkaConstants.MAX_POLL_RECORDS, max.toString)
}
val vProps = new VerifiableProperties(props)
val consumer = new KafkaConsumer[T, T](props)
consumer
}
def createNewConsumerWithConsumerGroup(kafkaBrokers: String, group: String): KafkaConsumer[String, String] = {
assert(group.length > 1, "Invalid group length")
val properties = new Properties()
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBrokers)
properties.put(ConsumerConfig.GROUP_ID_CONFIG, group)
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000")
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, (new StringDeserializer).getClass.getName)
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, (new StringDeserializer).getClass.getName)
new KafkaConsumer(properties)
}
def getStringAvroConsumer[T](kafkaBrokers: String, schemaRegistry: String, maxRecords: Option[Int] = None): KafkaConsumer[String, T] = {
val props = new Properties()
props.put(KafkaConstants.BOOTSTRAP_SERVER, kafkaBrokers)
props.put(KafkaConstants.KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer")
props.put(KafkaConstants.VALUE_DESERIALIZER, classOf[KafkaAvroDeserializer].getCanonicalName)
props.put(KafkaConstants.SCHEMA_REGISTRY_URL, schemaRegistry)
maxRecords.find(_ > 0).map { max =>
props.put(KafkaConstants.MAX_POLL_RECORDS, max.toString)
}
val vProps = new VerifiableProperties(props)
val consumer = new KafkaConsumer[String, T](props)
consumer
}
}
示例7: StreamConsumer
//设置package包名称以及导入依赖的类
package consumers
import akka.Done
import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.stream.{ActorMaterializer, ActorMaterializerSettings}
import cats.data.Xor
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.consumer.internals.PartitionAssignor.Subscription
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import io.circe._
import io.circe.generic.auto._
import cats.data.Xor.{Left, Right}
import model.Employee
import scala.concurrent.Future
object StreamConsumer extends App{
implicit val actorSystem = ActorSystem("consumer-actors", ConfigFactory.load())
implicit val materializer = ActorMaterializer(ActorMaterializerSettings(actorSystem))
lazy val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
.withBootstrapServers("localhost:9092")
.withGroupId("group13")
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")//"latest")
.withProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
.withProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000")
lazy val subscription = Subscriptions.topics("raw-data-1")
lazy val db = new Processor()
Consumer.plainSource(consumerSettings, subscription)
.mapAsync(4){
db.processMessage
}
.runWith(Sink.ignore)
}
class Processor {
def processMessage(record: ConsumerRecord[Array[Byte], String]): Future[Done] ={
println(s"DB.save: ${record.value()}")
Option(record.value()).foreach{ jsonString =>
val mayBeEmp: Xor[Error, Employee] = jawn.decode[Employee](jsonString)
mayBeEmp match {
case Left(error) => println(error)
case Right(emp) => println(s"employee name: ${emp.name}")
}
}
Future.successful(Done) }
}
示例8: TopicHandler
//设置package包名称以及导入依赖的类
package org.hpi.esb.datavalidator.kafka
import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Source
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.hpi.esb.util.OffsetManagement
case class TopicHandler(topicName: String, numberOfMessages: Long, topicSource: Source[ConsumerRecord[String, String], Consumer.Control])
object TopicHandler {
def create(topicName: String, system: ActorSystem): TopicHandler = {
val uuid = java.util.UUID.randomUUID.toString
val consumerSettings: ConsumerSettings[String, String] = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
.withBootstrapServers("192.168.30.208:9092,192.168.30.207:9092,192.168.30.141:9092")
.withGroupId(uuid)
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
.withProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, Int.MaxValue.toString)
.withProperty(ConsumerConfig.FETCH_MAX_BYTES_CONFIG, Int.MaxValue.toString)
.withProperty(ConsumerConfig.FETCH_MIN_BYTES_CONFIG, "20485000")
val partition = 0
val topicSource = createSource(consumerSettings, topicName, partition)
val numberOfMessages = OffsetManagement.getNumberOfMessages(topicName, partition)
new TopicHandler(topicName, numberOfMessages, topicSource)
}
def createSource(consumerSettings: ConsumerSettings[String, String], topicName: String, partition: Int) = {
val subscription = Subscriptions.assignmentWithOffset(
new TopicPartition(topicName, partition) -> 0L
)
Consumer.plainSource(consumerSettings, subscription)
}
}
示例9: EventDeserialiser
//设置package包名称以及导入依赖的类
package serialisation
import java.util
import model.Event
import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer}
import org.json4s.native.Serialization
import org.json4s.native.Serialization.read
import org.json4s.{Formats, NoTypeHints}
class EventDeserialiser extends Deserializer[Event] {
implicit val formats: Formats = Serialization.formats(NoTypeHints)
val stringDeserialiser = new StringDeserializer
override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {
stringDeserialiser.configure(configs, isKey)
}
override def deserialize(topic: String, data: Array[Byte]): Event = {
val stringValue = stringDeserialiser.deserialize(topic, data)
read[Event](stringValue)
}
override def close(): Unit = {
stringDeserialiser.close()
}
}
示例10: KafkaTest
//设置package包名称以及导入依赖的类
package com.blazedb.spark
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Duration, StreamingContext}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
object KafkaTest {
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "localhost:9092,anotherhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val sess = SparkSession.builder.master("local").getOrCreate
val sc = sess.sparkContext
val ssc = new StreamingContext(sc, Duration(1000L))
val topics = Array("topicA", "topicB")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
stream.map(record => (record.key, record.value))
}
示例11: Main
//设置package包名称以及导入依赖的类
import akka.actor.ActorSystem
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.kafka.scaladsl.Consumer
import akka.stream.ActorMaterializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import scala.concurrent.Future
object Main {
def main(args: Array[String]): Unit = {
implicit val system = ActorSystem.apply("akka-stream-kafka")
implicit val materializer = ActorMaterializer()
val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
.withBootstrapServers("localhost:9092;localhost:9093")
.withGroupId("group1")
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
Consumer.committableSource(consumerSettings, Subscriptions.topics("topic1"))
.mapAsync(1)(msg => {
msg.committableOffset.commitScaladsl
Future.successful(msg)
})
.runForeach(msg => println(s"partition: ${msg.record.partition}; value: ${msg.record.value}"))
}
}
示例12: ReactiveKafkaSingleConsumerMultipleProducerScala
//设置package包名称以及导入依赖的类
package org.rgcase.reactivekafka
import akka.actor.ActorSystem
import akka.kafka.ConsumerMessage.{ CommittableMessage, CommittableOffsetBatch }
import akka.kafka.ProducerMessage.Message
import akka.kafka.scaladsl.{ Consumer, Producer }
import akka.kafka.{ ConsumerSettings, ProducerSettings, Subscriptions }
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{ Flow, Sink }
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.serialization.{ ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer }
class ReactiveKafkaSingleConsumerMultipleProducerScala extends App {
implicit val system = ActorSystem("reactivekafkascala")
implicit val mat = ActorMaterializer()
val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
.withBootstrapServers("localhost:9092")
.withGroupId("group1")
.withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
.withBootstrapServers("localhost:9093")
val kafkaSource =
Consumer.committableSource(consumerSettings, Subscriptions.topics("sourcetopic"))
def toProducerMessage(topic: String) = (msg: CommittableMessage[Array[Byte], String]) ?
Message[Array[Byte], String, CommittableMessage[Array[Byte], String]](new ProducerRecord(topic, msg.record.value), msg)
val producerFlow1 =
Flow.fromFunction(toProducerMessage("targettopic1")).via(Producer.flow(producerSettings)).map(_.message.passThrough)
val producerFlow2 =
Flow.fromFunction(toProducerMessage("targettopic2")).via(Producer.flow(producerSettings)).map(_.message.passThrough)
val producerFlow3 =
Flow.fromFunction(toProducerMessage("targettopic3")).via(Producer.flow(producerSettings)).map(_.message.passThrough)
kafkaSource
.via(producerFlow1)
.via(producerFlow2)
.via(producerFlow3)
.batch(max = 20, first ? CommittableOffsetBatch.empty.updated(first.committableOffset)) { (batch, elem) ?
batch.updated(elem.committableOffset)
}.mapAsync(3)(_.commitScaladsl())
.runWith(Sink.ignore)
}
开发者ID:rgcase,项目名称:testplayground,代码行数:52,代码来源:ReactiveKafkaSingleConsumerMultipleProducerScala.scala
示例13: config
//设置package包名称以及导入依赖的类
package com.example
import akka.actor.Actor
import akka.event.LoggingAdapter
import cakesolutions.kafka.akka.KafkaConsumerActor.Subscribe
import cakesolutions.kafka.akka.{ConsumerRecords, KafkaConsumerActor, KafkaProducerActor, ProducerRecords}
import cakesolutions.kafka.{KafkaProducer, KafkaProducerRecord}
import com.example.PingPongProtocol.PingPongMessage
import com.typesafe.config.Config
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import scala.util.Random
trait KafkaConfig{
def config:Config
def log: LoggingAdapter
def randomString(len: Int= 5): String = Random.alphanumeric.take(len).mkString("")
}
trait PingPongConsumer extends KafkaConfig{
this: Actor =>
//for pattern matching in our receive method
val msgExtractor = ConsumerRecords.extractor[java.lang.String, PingPongMessage]
val kafkaConsumerActor = context.actorOf(
KafkaConsumerActor.props(config,new StringDeserializer(), new JsonDeserializer[PingPongMessage], self),
"PingKafkaConsumerActor"
)
def subscribe(topics: List[String]) =
kafkaConsumerActor ! Subscribe.AutoPartition(topics)
}
trait PingPongProducer extends KafkaConfig{
this: Actor =>
val kafkaProducerConf = KafkaProducer.Conf(
bootstrapServers = config.getString("bootstrap.servers"),
keySerializer = new StringSerializer(),
valueSerializer = new JsonSerializer[PingPongMessage])
val kafkaProducerActor = context.actorOf(KafkaProducerActor.props( kafkaProducerConf))
def submitMsg(topics: List[String], msg: PingPongMessage) = {
log.info(s"Placing $msg on ${topics.mkString(",")}")
topics.foreach(topic => kafkaProducerActor ! ProducerRecords(List(KafkaProducerRecord(topic, randomString(3), msg))))
}
}
示例14: SimplestStreaming
//设置package包名称以及导入依赖的类
package tech.elephant.spark
import grizzled.slf4j.Logger
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import org.apache.spark.streaming.kafka010.ConsumerStrategies._
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies._
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext, SparkSession}
object SimplestStreaming {
val logger = Logger[this.type]
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("Simplest streaming (spark 2.0) from Kafka SSL")
.enableHiveSupport()
.getOrCreate()
val sparkContext = spark.sparkContext
val streamingContext = new StreamingContext(sparkContext, Seconds(10))
// expects jaas.conf, appropriate keytab, and kafka.client.truststore.jks passed in as part of spark-submit
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "<fqdn of kafka broker>:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "test1",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean),
"security.protocol" -> "SASL_SSL",
"ssl.truststore.location" -> "./kafka.client.truststore.jks",
"ssl.truststore.password" -> "change-me-to-something-safe"
)
val topic = Set("simplest")
val stream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topic, kafkaParams)
)
stream.foreachRDD { rdd =>
// Get the singleton instance of SparkSession
val spark = SparkSession.builder.config(rdd.sparkContext.getConf).getOrCreate()
import spark.implicits._
val df = rdd.map( consumerRecord => {
consumerRecord.value()
}).toDF()
df.show()
}
// start the computation
streamingContext.start()
streamingContext.awaitTermination()
}
}
示例15: RsvpStreaming
//设置package包名称以及导入依赖的类
package com.github.mmolimar.asks.streaming
import java.util.UUID
import com.github.mmolimar.askss.common.implicits._
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
object RsvpStreaming extends App with LazyLogging {
val filter = config.getString("spark.filter").toLowerCase.split(",").toList
val ssc = new StreamingContext(buildSparkConfig, Seconds(5))
//TODO
kafkaStream(ssc)
.map(_.value())
.map(_.toEvent)
.filter(rsvp => {
filter.exists(rsvp.event.get.event_name.contains(_))
})
.print()
ssc.start()
ssc.awaitTermination()
def buildSparkConfig(): SparkConf = {
new SparkConf()
.setMaster(config.getString("spark.master"))
.setAppName("RsvpStreaming")
.set("spark.streaming.ui.retainedBatches", "5")
.set("spark.streaming.backpressure.enabled", "true")
.set("spark.sql.parquet.compression.codec", "snappy")
.set("spark.sql.parquet.mergeSchema", "true")
.set("spark.sql.parquet.binaryAsString", "true")
}
def kafkaStream(ssc: StreamingContext): InputDStream[ConsumerRecord[String, String]] = {
val topics = Set(config.getString("kafka.topic"))
val kafkaParams = Map[String, Object](
"metadata.broker.list" -> config.getString("kafka.brokerList"),
"enable.auto.commit" -> config.getBoolean("kafka.autoCommit").toString,
"auto.offset.reset" -> config.getString("kafka.autoOffset"),
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> config.getString("kafka.brokerList"),
ConsumerConfig.GROUP_ID_CONFIG -> s"consumer-${UUID.randomUUID}",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer]
)
val consumerStrategy = ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
KafkaUtils.createDirectStream[String, String](ssc, LocationStrategies.PreferConsistent, consumerStrategy)
}
}