本文整理汇总了Scala中kafka.serializer.StringDecoder类的典型用法代码示例。如果您正苦于以下问题:Scala StringDecoder类的具体用法?Scala StringDecoder怎么用?Scala StringDecoder使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StringDecoder类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: StreamingApp
//设置package包名称以及导入依赖的类
package spark.test
import data.processing.avro.AvroDecoder
import kafka.serializer.StringDecoder
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka.KafkaUtils
object StreamingApp {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Simple Streaming Application")
val ssc = new StreamingContext(conf, Seconds(1))
val topicsSet = "test".split(",").toSet
val kafkaParams = Map[String, String]("metadata.broker.list" -> "localhost:9092")
val directKafkaStream = KafkaUtils.createDirectStream[String, Array[Byte], StringDecoder, DefaultDecoder](
ssc, kafkaParams, topicsSet
)
directKafkaStream.foreachRDD(rdd =>
rdd.foreachPartition(partitionOfRecords => {
val avroDecoder = new AvroDecoder("/event-record.json")
partitionOfRecords.map(m => (m._1, avroDecoder.decode(m._2))).foreach(m => println(m))
}))
ssc.start()
ssc.awaitTermination()
}
}
示例2: KafkaStreaming
//设置package包名称以及导入依赖的类
package org.myorganization.spark.streaming
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{StreamingContext, Seconds}
import org.apache.spark.streaming.kafka._
import kafka.serializer.StringDecoder
object KafkaStreaming {
def main(args: Array[String]): Unit = {
val (batchDuration, topics, bootstrapServers) = getParams(args)
val conf = new SparkConf().setAppName("gpKafkaStreaming")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(batchDuration))
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("bootstrap.servers" -> bootstrapServers, "auto.offset.reset" -> "smallest")
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
val data = messages.map(_._2)
val loggerSerializerLogs = data.map(_.split("""\s+"""))
.filter(x => x.length > 6)
.map(x => (x(0), x(6)))
.filter(filterLogLines)
.map(x => x._1)
val logCounts = loggerSerializerLogs.map(x => (x, 1L)).reduceByKey(_ + _)
logCounts.print(10)
ssc.start()
ssc.awaitTermination()
}
def filterLogLines(line: Tuple2[String, String]): Boolean = {
val pattern = """logger.+"""
line._2.matches(pattern)
}
def getParams(args: Array[String]): Tuple3[Int, String, String] = {
if (args.length !=3 ) {
System.err.println(s"""
|Usage: spark-kafka.sh <sampling-period> <topics> <bootstrap-servers>
| <sampling-period> is the duration of each batch (in seconds)
| <topics> is a list of one or more kafka topics to consume from
| <bootstrap-servers> is a list of one or more Kafka bootstrap servers
|
""".stripMargin)
System.exit(1)
}
Tuple3[Int, String, String](args(0).toInt, args(1), args(2))
}
}
示例3: DirectKafkaWordCount
//设置package包名称以及导入依赖的类
package example.spark
import kafka.serializer.StringDecoder
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf
object DirectKafkaWordCount {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println(s"""
|Usage: DirectKafkaWordCount <brokers> <topics>
| <brokers> is a list of one or more Kafka brokers
| <topics> is a list of one or more kafka topics to consume from
|
""".stripMargin)
System.exit(1)
}
val Array(brokers, topics) = args
// Create context with 2 second batch interval
//no need to create spark context...
val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[4]")
val ssc = new StreamingContext(sparkConf, Seconds(2))
// Create direct kafka stream with brokers and topics
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
// Get the lines, split them into words, count the words and print
val lines = messages.map(_._2)
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
wordCounts.print()
// Start the computation
ssc.start()
ssc.awaitTermination()
}
}
示例4: createStream
//设置package包名称以及导入依赖的类
package it.agilelab.bigdata.wasp.consumers.readers
import it.agilelab.bigdata.wasp.core.WaspSystem
import it.agilelab.bigdata.wasp.core.WaspSystem._
import it.agilelab.bigdata.wasp.core.kafka.CheckOrCreateTopic
import it.agilelab.bigdata.wasp.core.logging.WaspLogger
import it.agilelab.bigdata.wasp.core.models.{DefaultConfiguration, TopicModel}
import it.agilelab.bigdata.wasp.core.utils.{AvroToJsonUtil, ConfigManager, JsonToByteArrayUtil}
import kafka.serializer.{DefaultDecoder, StringDecoder}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
//TODO: check warning (not understood)
def createStream(group: String, topic: TopicModel)(implicit ssc: StreamingContext): DStream[String] = {
val kafkaConfig = ConfigManager.getKafkaConfig
val kafkaConfigMap: Map[String, String] = Map(
"zookeeper.connect" -> kafkaConfig.zookeeper.toString,
"zookeeper.connection.timeout.ms" -> kafkaConfig.zookeeper.timeout.getOrElse(DefaultConfiguration.timeout).toString
)
if (??[Boolean](WaspSystem.getKafkaAdminActor, CheckOrCreateTopic(topic.name, topic.partitions, topic.replicas))) {
val receiver = KafkaUtils.createStream[String, Array[Byte], StringDecoder, DefaultDecoder](
ssc,
kafkaConfigMap + ("group.id" -> group),
Map(topic.name -> 3),
StorageLevel.MEMORY_AND_DISK_2
)
topic.topicDataType match {
case "avro" => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
case "json" => receiver.map(x => (x._1, JsonToByteArrayUtil.byteArrayToJson(x._2))).map(_._2)
case _ => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
}
} else {
logger.error(s"Topic not found on Kafka: $topic")
throw new Exception(s"Topic not found on Kafka: $topic")
}
}
}
示例5: DirectKafkaWordCount
//设置package包名称以及导入依赖的类
package example.spark
import kafka.serializer.StringDecoder
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.SparkConf
object DirectKafkaWordCount {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println(s"""
|Usage: DirectKafkaWordCount <brokers> <topics>
| <brokers> is a list of one or more Kafka brokers
| <topics> is a list of one or more kafka topics to consume from
|
""".stripMargin)
System.exit(1)
}
val Array(brokers, topics) = args
// Create context with 2 second batch interval
val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[4]")
val ssc = new StreamingContext(sparkConf, Seconds(2))
// Create direct kafka stream with brokers and topics
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
// Get the lines, split them into words, count the words and print
val lines = messages.map(_._2)
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
wordCounts.print()
// Start the computation
ssc.start()
ssc.awaitTermination()
}
}
示例6: SparkJob
//设置package包名称以及导入依赖的类
package de.codecentric.dcos_intro.spark
import de.codecentric.dcos_intro.{Tweet, TweetDecoder}
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import com.datastax.spark.connector.streaming._
object SparkJob {
def main(args: Array[String]) {
val consumerTopic = args(0)
val sparkConf = new SparkConf()
.setAppName(getClass.getName)
.set("spark.cassandra.connection.host", s"${args(1)}")
.set("spark.cassandra.connection.port", s"${args(2)}")
val consumerProperties = Map("bootstrap.servers" -> args(3), "auto.offset.reset" -> "smallest")
val ssc = new StreamingContext(sparkConf, Seconds(1))
val kafkaStream = KafkaUtils.createDirectStream[String, Tweet, StringDecoder, TweetDecoder](
ssc,
consumerProperties,
Set(consumerTopic)
)
kafkaStream.map(tuple => tuple._2).saveToCassandra("dcos", "tweets")
ssc.start()
ssc.awaitTermination()
ssc.stop()
}
}
示例7: ApplicationContext
//设置package包名称以及导入依赖的类
package com.playing.contexts
import com.playing.utils.SparkConfig
import kafka.serializer.StringDecoder
import org.apache.spark.streaming.kafka.KafkaUtils
object ApplicationContext {
def main(args: Array[String]): Unit = {
val Array(brokers, topics) = args
// Create context with 2 second batch interval
val ssc = SparkConfig.ssc
// Create direct kafka stream with brokers and topics
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
ssc, kafkaParams, topicsSet)
// Get the lines, split them into words, count the words and print
val lines = messages.map(_._2)
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
wordCounts.print()
// Start the computation
ssc.start()
ssc.awaitTermination()
}
}
示例8: KafkaSourcePythonHelper
//设置package包名称以及导入依赖的类
package com.ippontech.kafka
import com.ippontech.kafka.stores.{OffsetsStore, ZooKeeperOffsetsStore}
import kafka.serializer.StringDecoder
import org.apache.spark.streaming.api.java.{JavaDStream, JavaStreamingContext}
object KafkaSourcePythonHelper {
def kafkaStream(jssc: JavaStreamingContext, brokers: String, offsetsStore: OffsetsStore,
topic: String): JavaDStream[(String, String)] = {
val dstream = KafkaSource.kafkaStream[String, String, StringDecoder, StringDecoder](jssc.ssc, brokers, offsetsStore, topic)
val jdstream = new JavaDStream(dstream)
jdstream
}
def kafkaStream(jssc: JavaStreamingContext, brokers: String, zkHosts: String, zkPath: String,
topic: String): JavaDStream[(String, String)] = {
val offsetsStore = new ZooKeeperOffsetsStore(zkHosts, zkPath)
val dstream = KafkaSource.kafkaStream[String, String, StringDecoder, StringDecoder](jssc.ssc, brokers, offsetsStore, topic)
val jdstream = new JavaDStream(dstream)
jdstream
}
}
示例9: KafkaDStreamSource
//设置package包名称以及导入依赖的类
package org.yuboxu.spark
import kafka.serializer.{StringDecoder, DefaultDecocder}
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils
class KafkaDStreamSource(config: Map[String, String]) {
def createSource(ssc: StreamingContext, topic: String): DStream[KafkaPayload] = {
val kafkaParams = config
val kafkaTopics = Set(topic)
KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
// spark streaming context
ssc,
// kafka configuration parameters
kafkaParams,
// names of the topics to consume
kafkaTopics).map(dstream => KafkaPayload(Option(dstream._1), dstream._2)
)
}
}
object KafkaDStreamSource {
def apply(config: Map[String, String]): KafkaDStreamSource = new KafkaDStreamSource(config)
}