本文整理汇总了Scala中org.apache.spark.streaming.Seconds类的典型用法代码示例。如果您正苦于以下问题:Scala Seconds类的具体用法?Scala Seconds怎么用?Scala Seconds使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Seconds类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: KMeansClusteringApp
//设置package包名称以及导入依赖的类
package org.apress.prospark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.StreamingKMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
object KMeansClusteringApp {
def main(args: Array[String]) {
if (args.length != 4) {
System.err.println(
"Usage: KMeansClusteringApp <appname> <batchInterval> <hostname> <port>")
System.exit(1)
}
val Seq(appName, batchInterval, hostname, port) = args.toSeq
val conf = new SparkConf()
.setAppName(appName)
.setJars(SparkContext.jarOfClass(this.getClass).toSeq)
val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
val substream = ssc.socketTextStream(hostname, port.toInt)
.filter(!_.contains("NaN"))
.map(_.split(" "))
.filter(f => f(1) != "0")
val orientationStream = substream
.map(f => Seq(1, 4, 5, 6, 10, 11, 12, 20, 21, 22, 26, 27, 28, 36, 37, 38, 42, 43, 44).map(i => f(i)).toArray)
.map(arr => arr.map(_.toDouble))
.filter(f => f(0) == 1.0 || f(0) == 2.0 || f(0) == 3.0)
.map(f => LabeledPoint(f(0), Vectors.dense(f.slice(1, f.length))))
val test = orientationStream.transform(rdd => rdd.randomSplit(Array(0.3, 0.7))(0))
val train = orientationStream.transformWith(test, (r1: RDD[LabeledPoint], r2: RDD[LabeledPoint]) => r1.subtract(r2)).cache()
val model = new StreamingKMeans()
.setK(3)
.setDecayFactor(0)
.setRandomCenters(18, 0.0)
model.trainOn(train.map(v => v.features))
val prediction = model.predictOnValues(test.map(v => (v.label, v.features)))
ssc.start()
ssc.awaitTermination()
}
}
示例2: Consumer
//设置package包名称以及导入依赖的类
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.mllib.classification.SVMModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SparkSession
object Consumer {
def main(args: Array[String]): Unit = {
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "localhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("streaming")
val sparkConf = new SparkConf().setMaster("local[8]").setAppName("KafkaTest")
val streamingContext = new StreamingContext(sparkConf, Seconds(1))
// Create a input direct stream
val kafkaStream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val sc = SparkSession.builder().master("local[8]").appName("KafkaTest").getOrCreate()
val model = SVMModel.load(sc.sparkContext, "/home/xiaoyu/model")
val result = kafkaStream.map(record => (record.key, record.value))
result.foreachRDD(
patient => {
patient.collect().toBuffer.foreach(
(x: (Any, String)) => {
val features = x._2.split(',').map(x => x.toDouble).tail
println(model.predict(Vectors.dense(features)))
}
)
}
)
streamingContext.start()
streamingContext.awaitTermination()
}
}
示例3: StatefulWordcount
//设置package包名称以及导入依赖的类
package com.test.spark
import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.kafka010.ConsumerStrategies
import org.apache.spark.streaming.kafka010.LocationStrategies
import org.apache.spark.streaming.kafka010.KafkaUtils
object StatefulWordcount extends App {
val conf = new SparkConf().setAppName("Stateful Wordcount").setMaster("local[2]")
val ssc = new StreamingContext(conf, Seconds(10))
val kafkaParams = Map[String, String]("bootstrap.servers" -> "localhost:9092", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "group.id" -> "mygroup", "auto.offset.reset" -> "earliest")
val topics = Set("widas")
val inputKafkaStream = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
val words = inputKafkaStream.transform { rdd =>
rdd.flatMap(record => (record.value().toString.split(" ")))
}
val wordpairs = words.map(word => (word, 1))
ssc.checkpoint("/Users/nagainelu/bigdata/jobs/WordCount_checkpoint")
val updateFunc = (values: Seq[Int], state: Option[Int]) => {
val currentCount = values.foldLeft(0)(_ + _)
val previousCount = state.getOrElse(0)
Some(currentCount + previousCount)
}
val wordCounts = wordpairs.reduceByKey(_ + _).updateStateByKey(updateFunc)
wordCounts.print()
ssc.start()
ssc.awaitTermination()
}
示例4: T01
//设置package包名称以及导入依赖的类
package streaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object T01 {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.split(" "))
// not necessary since Spark 1.3
// Count each word in each batch
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
// Print the first ten elements of each RDD generated in this DStream to the console
wordCounts.print()
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
// nc -lk 9999
// ./bin/run-example streaming.T01 localhost 9999
}
}
示例5: Predict
//设置package包名称以及导入依赖的类
package com.databricks.apps.twitterClassifier
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeansModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Predict extends App {
import SparkSetup._
val options = PredictOptions.parse(args)
val ssc = new StreamingContext(sc, Seconds(options.intervalInSecs))
Predictor.doIt(options, sc, ssc)
}
object Predictor {
def doIt(options: PredictOptions, sc: SparkContext, ssc: StreamingContext) {
println("Initializing the the KMeans model...")
val model: KMeansModel = new KMeansModel(sc.objectFile[Vector](options.modelDirectory.getCanonicalPath).collect)
println("Materializing Twitter stream...")
TwitterUtils.createStream(ssc, maybeTwitterAuth)
.map(_.getText)
.foreachRDD { rdd =>
rdd.filter(t => model.predict(featurize(t)) == options.clusterNumber)
.foreach(print) // register DStream as an output stream and materialize it
}
println("Initialization complete, starting streaming computation.")
ssc.start()
ssc.awaitTermination()
}
}
示例6: StreamingWordCount
//设置package包名称以及导入依赖的类
package org.examples.scala.examples
import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.spark._
import org.apache.spark.SparkContext._
object StreamingWordCount {
def run(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage BasicStreamingExample <master> <output>")
}
val Array(master, output) = args.take(2)
val conf = new SparkConf().setMaster(master).setAppName("BasicStreamingExample")
val ssc = new StreamingContext(conf, Seconds(30))
val lines = ssc.socketTextStream("localhost" , 7777)
val words = lines.flatMap(_.split(" "))
val wc = words.map(x => (x, 1)).reduceByKey((x, y) => x + y)
wc.saveAsTextFiles(output)
wc.print
println("pandas: sscstart")
ssc.start()
println("pandas: awaittermination")
ssc.awaitTermination()
println("pandas: done!")
}
}
示例7: KafkaStreaming
//设置package包名称以及导入依赖的类
package org.myorganization.spark.streaming
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{StreamingContext, Seconds}
import org.apache.spark.streaming.kafka._
import kafka.serializer.StringDecoder
object KafkaStreaming {
def main(args: Array[String]): Unit = {
val (batchDuration, topics, bootstrapServers) = getParams(args)
val conf = new SparkConf().setAppName("gpKafkaStreaming")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(batchDuration))
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("bootstrap.servers" -> bootstrapServers, "auto.offset.reset" -> "smallest")
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
val data = messages.map(_._2)
val loggerSerializerLogs = data.map(_.split("""\s+"""))
.filter(x => x.length > 6)
.map(x => (x(0), x(6)))
.filter(filterLogLines)
.map(x => x._1)
val logCounts = loggerSerializerLogs.map(x => (x, 1L)).reduceByKey(_ + _)
logCounts.print(10)
ssc.start()
ssc.awaitTermination()
}
def filterLogLines(line: Tuple2[String, String]): Boolean = {
val pattern = """logger.+"""
line._2.matches(pattern)
}
def getParams(args: Array[String]): Tuple3[Int, String, String] = {
if (args.length !=3 ) {
System.err.println(s"""
|Usage: spark-kafka.sh <sampling-period> <topics> <bootstrap-servers>
| <sampling-period> is the duration of each batch (in seconds)
| <topics> is a list of one or more kafka topics to consume from
| <bootstrap-servers> is a list of one or more Kafka bootstrap servers
|
""".stripMargin)
System.exit(1)
}
Tuple3[Int, String, String](args(0).toInt, args(1), args(2))
}
}
示例8: SimpleApp
//设置package包名称以及导入依赖的类
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object SimpleApp {
def main(args: Array[String]) : Unit = {
val conf = new SparkConf().setAppName("Sinmple Application").set("spark.driver.allowMultipleContexts", "true")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(conf, Seconds(5))
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.toLowerCase.split(" "))
words.foreachRDD { rdd =>
val sqlContext = SQLContext.getOrCreate(rdd.sparkContext)
import sqlContext.implicits._
val wordsDataFrame = rdd.toDF("words")
wordsDataFrame.registerTempTable("allwords")
val wcdf = sqlContext.sql("select words,count(*) as total from allwords group by words")
wcdf.show()
import org.elasticsearch.spark.sql._
wcdf.saveToEs("wordcount/wc")
}
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}
示例9: Main
//设置package包名称以及导入依赖的类
import Fqueue.{FqueueReceiver, FqueueSender}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Main {
private def sendData() = {
val fqueuSender = new FqueueSender("localhost:18740", 4, 4000)
fqueuSender.connect()
while (true) {
val ret = fqueuSender.enQueue("track_BOdao2015*", "123")
Thread.sleep(1000)
}
fqueuSender.stop()
}
private def getData() = {
val fqueueReceiver = new FqueueReceiver("localhost:18740", 4, 4000)
fqueueReceiver.connect()
val data = fqueueReceiver.deQueue("track_BOdao2015*")
println(data.getOrElse("null"))
fqueueReceiver.stop()
}
def main(args: Array[String]) {
new Thread("fqueue sender") {
override def run() { sendData() }
}.start()
val config = new SparkConf().setAppName("testfqueue").setMaster("local[2]")
val ssc = new StreamingContext(config, Seconds(5))
val lines = ssc.receiverStream(new FqueueStreamingReceiver("localhost:18740", 4, 4000))
lines.print()
ssc.start()
ssc.awaitTermination()
}
}
示例10: SparkStreamKinesis
//设置package包名称以及导入依赖的类
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK_2
import org.apache.spark.streaming.kinesis._
import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}
object SparkStreamKinesis{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Spark Kinesis").setMaster("local[4]")
val ssc = new StreamingContext(conf, Seconds(1))
println("Spark Streaming")
val kinesisStream = KinesisUtils.createStream(ssc, "sparrow-ci",
"sparrow-ci",
"kinesis.us-east-1.amazonaws.com",
"us-east-1",
LATEST,
Duration(2000),
MEMORY_AND_DISK_2)
kinesisStream.print()
kinesisStream.flatMap(new String(_))
.foreachRDD(_.collect().foreach(print))
ssc.start()
ssc.awaitTermination()
}
}
示例11: LogAnalyzerWindowed
//设置package包名称以及导入依赖的类
package com.databricks.apps.logs
import scala.math.Ordering
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.dstream.DStream
class LogAnalyzerWindowed(val windowLength: Long, val slideInterval: Long) extends AnalyzeFunctions with Serializable {
import LogStatistics.EMPTY_LOG_STATISTICS
var logStatistics = EMPTY_LOG_STATISTICS
def processAccessLogs(accessLogsDStream: DStream[ApacheAccessLog]): Unit = {
val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream
.window(Seconds(windowLength), Seconds(slideInterval))
windowDStream.foreachRDD(accessLogs => {
if (accessLogs.count() == 0) {
logStatistics = EMPTY_LOG_STATISTICS
} else {
logStatistics = LogStatistics(contentSizeStats(accessLogs).get,
responseCodeCount(accessLogs).take(100).toMap,
filterIPAddress(ipAddressCount(accessLogs)).take(100),
endpointCount(accessLogs).top(10)(Ordering.by[(String, Long), Long](_._2)).toMap)
}
})
}
def getLogStatistics: LogStatistics = logStatistics
}
示例12: LogAnalyzerStreamingImportDirectory
//设置package包名称以及导入依赖的类
package com.databricks.apps.logs.chapter2
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import com.databricks.apps.logs.{ApacheAccessLog, LogAnalyzerRDD}
object LogAnalyzerStreamingImportDirectory extends App {
val WINDOW_LENGTH = Seconds(30)
val SLIDE_INTERVAL = Seconds(10)
val spark = SparkSession
.builder()
.appName("Log Analyzer Import Streaming HDFS")
.getOrCreate()
val streamingContext = new StreamingContext(spark.sparkContext, SLIDE_INTERVAL)
val directory = args(0)
// This method monitors a directory for new files to read in for streaming.
val logData: DStream[String] = streamingContext.textFileStream(directory)
val accessLogsDStream: DStream[ApacheAccessLog] = logData.map(ApacheAccessLog.parseLogLine)
val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream.window(WINDOW_LENGTH, SLIDE_INTERVAL)
val logAnalyzerRDD = LogAnalyzerRDD(spark)
windowDStream.foreachRDD(accessLogs => {
if (accessLogs.count() == 0) {
println("No access logs received in this time interval")
} else {
val logStatistics = logAnalyzerRDD.processRdd(accessLogs)
logStatistics.printToStandardOut()
}
})
// Start the streaming server.
streamingContext.start() // Start the computation
streamingContext.awaitTermination() // Wait for the computation to terminate
}
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:42,代码来源:LogAnalyzerStreamingImportDirectory.scala
示例13: DashboardController
//设置package包名称以及导入依赖的类
package controllers
import javax.inject.{Inject, Singleton}
import akka.actor.ActorSystem
import akka.stream.Materializer
import org.apache.spark.streaming.Seconds
import play.api.Configuration
import play.api.inject.ApplicationLifecycle
import play.api.libs.streams.ActorFlow
import play.api.mvc._
import services.{SparkService, StreamActor}
@Singleton()
class DashboardController @Inject() ()(implicit system: ActorSystem, materializer: Materializer, configuration : Configuration, lifecycle: ApplicationLifecycle) extends Controller {
val sparkService = SparkService.getInstance(configuration, lifecycle)
def index = Action {
Ok(views.html.index())
}
def stream = WebSocket.accept[String, String] { request =>
val filters = Seq("Euro")
val tweets = sparkService.getTwitterStream(filters)
val stream = tweets
// .map { tweet =>
// tweet.getText
// }
// .flatMap { text => text.split("\\s") }
// .filter(_.startsWith("#"))
.map { tweet =>
Option(tweet.getPlace).map(_.getCountry)
}
.filter(_.isDefined)
.map(_.get)
.map { element => (element, 1)}
.reduceByKeyAndWindow(_ + _, Seconds(60))
.transform( count => count.sortBy(_._2, false))
ActorFlow.actorRef(out => StreamActor.props(out, stream))
}
}
示例14: EnrichmentInAStream
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object EnrichmentInAStream {
def main(args:Array[String]): Unit = {
def main(args:Array[String]): Unit = {
val host = args(0)
val port = args(1)
val checkpointFolder = args(2)
val isLocal = true
val sparkSession = if (isLocal) {
SparkSession.builder
.master("local")
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.config("spark.driver.host","127.0.0.1")
.config("spark.sql.parquet.compression.codec", "gzip")
.enableHiveSupport()
.getOrCreate()
} else {
SparkSession.builder
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.enableHiveSupport()
.getOrCreate()
}
val ssc = new StreamingContext(sparkSession.sparkContext.getConf, Seconds(1))
ssc.checkpoint(checkpointFolder)
val lines = ssc.socketTextStream(host, port.toInt)
val words = lines.flatMap(_.split(" "))
words.foreachRDD(rdd => rdd.foreachPartition(wordIt => {
//make connection to storage layer
// May use static connection
wordIt.foreach(word => {
word.toUpperCase
//write to storage location
})
}))
ssc.start()
ssc.awaitTermination()
}
}
}
示例15: CountingInAStreamExpBatchCounting
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object CountingInAStreamExpBatchCounting {
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
def main(args:Array[String]): Unit = {
val host = args(0)
val port = args(1)
val checkpointFolder = args(2)
val isLocal = true
val sparkSession = if (isLocal) {
SparkSession.builder
.master("local")
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.config("spark.driver.host","127.0.0.1")
.config("spark.sql.parquet.compression.codec", "gzip")
.enableHiveSupport()
.master("local[3]")
.getOrCreate()
} else {
SparkSession.builder
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.enableHiveSupport()
.getOrCreate()
}
val ssc = new StreamingContext(sparkSession.sparkContext, Seconds(2))
ssc.checkpoint(checkpointFolder)
val lines = ssc.socketTextStream(host, port.toInt)
val words = lines.flatMap(line => line.toLowerCase.split(" "))
val wordCounts = words.map(word => (word, 1))
.reduceByKey((a,b) => a + b)
wordCounts.foreachRDD(rdd => {
println("{")
val localCollection = rdd.collect()
println(" size:" + localCollection.length)
localCollection.foreach(r => println(" " + r))
println("}")
})
ssc.start()
ssc.awaitTermination()
}
}