当前位置: 首页>>代码示例>>Scala>>正文


Scala OutputMode类代码示例

本文整理汇总了Scala中org.apache.spark.sql.streaming.OutputMode的典型用法代码示例。如果您正苦于以下问题:Scala OutputMode类的具体用法?Scala OutputMode怎么用?Scala OutputMode使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了OutputMode类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: HttpTextSinkProvider

//设置package包名称以及导入依赖的类
package org.apache.spark.sql.execution.streaming

import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.internal.Logging
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.sources.StreamSinkProvider
import org.apache.spark.sql.streaming.OutputMode

import Params._

class HttpTextSinkProvider extends StreamSinkProvider with DataSourceRegister {
	def createSink(
		sqlContext: SQLContext,
		parameters: Map[String, String],
		partitionColumns: Seq[String],
		outputMode: OutputMode): Sink = {
		new HttpTextSink(parameters.getString("httpServletUrl"), parameters.getString("topic"), parameters.getBool("useGzipCompress", true));
	}

	def shortName(): String = "httpText"
}

class HttpTextSink(httpPostURL: String, topic: String, useGzipCompress: Boolean) extends Sink with Logging {
	val sender = new HttpTextSender(httpPostURL);
	val RETRY_TIMES = 5;
	val SLEEP_TIME = 100;

	override def addBatch(batchId: Long, data: DataFrame) {
		//send data to the HTTP server
		var success = false;
		var retried = 0;
		while (!success && retried < RETRY_TIMES) {
			try {
				retried += 1;
				sender.sendTextArray(topic, batchId, data.collect().map { _.get(0).asInstanceOf[String] }, useGzipCompress);
				success = true;
			}
			catch {
				case e: Throwable ? {
					success = false;
					super.logWarning(s"failed to send", e);
					if (retried < RETRY_TIMES) {
						val sleepTime = SLEEP_TIME * retried;
						super.logWarning(s"will retry to send after ${sleepTime}ms");
						Thread.sleep(sleepTime);
					}
					else {
						throw e;
					}
				}
			}
		}
	}
} 
开发者ID:bluejoe2008,项目名称:spark-http-stream,代码行数:57,代码来源:HttpTextSink.scala

示例2: CustomSinkProvider

//设置package包名称以及导入依赖的类
package com.knockdata.spark.highcharts

import com.knockdata.spark.highcharts.model.Highcharts
import org.apache.spark.sql._
import org.apache.spark.sql.execution.streaming.Sink
import org.apache.spark.sql.sources.StreamSinkProvider
import org.apache.spark.sql.streaming.OutputMode

class CustomSinkProvider extends StreamSinkProvider {
  def createSink(
                  sqlContext: SQLContext,
                  parameters: Map[String, String],
                  partitionColumns: Seq[String],
                  outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {

        val chartId = parameters("chartId")
        val chartParagraphId = parameters("chartParagraphId")

        println(s"batchId: $batchId, chartId: $chartId, chartParagraphId: $chartParagraphId")
//        data.show(3)

        val z = Registry.get(s"$chartId-z").asInstanceOf[ZeppelinContextHolder]
        val seriesHolder = Registry.get(s"$chartId-seriesHolder").asInstanceOf[SeriesHolder]
        val outputMode = Registry.get(s"$chartId-outputMode").asInstanceOf[CustomOutputMode]

        seriesHolder.dataFrame = data

        val result = seriesHolder.result
        val (normalSeriesList, drilldownSeriesList) = outputMode.result(result._1, result._2)

        val chart = new Highcharts(normalSeriesList, seriesHolder.chartId)
          .drilldown(drilldownSeriesList)

        val plotData = chart.plotData
//        val escaped = plotData.replace("%angular", "")
//        println(s" put $chartParagraphId $escaped")
        z.put(chartParagraphId, plotData)
        println(s"run $chartParagraphId")
        z.run(chartParagraphId)
      }
    }
  }
} 
开发者ID:knockdata,项目名称:spark-highcharts,代码行数:46,代码来源:CustomSinkProvider.scala

示例3: StructuredStreamingKafka

//设置package包名称以及导入依赖的类
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.OutputMode

object StructuredStreamingKafka extends App {

  Logger.getLogger("org").setLevel(Level.ERROR)

  val spark = SparkSession
    .builder
    .appName("StructuredStreamingKafka")
    .master("local[*]")
    .getOrCreate()

  import spark.implicits._

  spark
    .readStream
    .format("kafka")
      .option("kafka.bootstrap.servers", "localhost:9092")
      .option("subscribe", "data")
      .option("startingOffsets", "earliest")
      .option("failOnDataLoss", "false")
    .load()

    .groupBy(window($"timestamp", "10 seconds"))
    .count()

    .selectExpr(
      "date_format(window.end, \"y-MM-dd hh:mm:ss\") AS key",
      "CAST(count AS STRING) AS value")
    .writeStream
    .format("kafka")
      .option("kafka.bootstrap.servers", "localhost:9092")
      .option("topic", "stats")
      .option("checkpointLocation", "checkpoints")
    .outputMode(OutputMode.Update())
    .start().awaitTermination()
} 
开发者ID:lightbend-reference-architectures,项目名称:structured-streaming-kafka,代码行数:41,代码来源:StructuredStreamingKafka.scala

示例4: CustomSinkProvider

//设置package包名称以及导入依赖的类
package com.rockiey.kafka

import org.apache.spark.sql._
import org.apache.spark.sql.execution.streaming.Sink
import org.apache.spark.sql.sources.StreamSinkProvider
import org.apache.spark.sql.streaming.OutputMode

class CustomSinkProvider extends StreamSinkProvider {
  def createSink(
                  sqlContext: SQLContext,
                  parameters: Map[String, String],
                  partitionColumns: Seq[String],
                  outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {
        data.printSchema()

        data.show()
        println(s"count ${data.count()}")
      }
    }
  }
} 
开发者ID:rockie-yang,项目名称:explore-spark-kafka,代码行数:24,代码来源:CustomSinkProvider.scala

示例5: ClickHouseSinkProvider

//设置package包名称以及导入依赖的类
package io.clickhouse.ext.spark.streaming

import io.clickhouse.ext.ClickHouseUtils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{Encoders, SQLContext}
import org.apache.spark.sql.sources.StreamSinkProvider
import org.apache.spark.sql.streaming.OutputMode
import scala.reflect.{ClassTag, classTag}
import scala.reflect.runtime.universe.TypeTag

abstract class ClickHouseSinkProvider[T <: Product: ClassTag](implicit tag: TypeTag[T]) extends StreamSinkProvider with Serializable with Logging {

  def clickHouseServers: Seq[(String, Int)]
  def dbName: String
  def tableName: Option[String] = None
  def eventDateColumnName: String
  def indexColumns: Seq[String]
  def partitionFunc: (org.apache.spark.sql.Row) => java.sql.Date

  override def createSink(
                           sqlContext: SQLContext,
                           parameters: Map[String, String],
                           partitionColumns: Seq[String],
                           outputMode: OutputMode): ClickHouseSink[T] = {

    val typeEncoder = Encoders.product[T]
    val schema = typeEncoder.schema
    val _tableName = tableName.get //tableName.getOrElse(classOf[T].getName)

    val createTableSql = ClickHouseUtils.createTableIfNotExistsSql(
      schema,
      dbName,
      _tableName,
      eventDateColumnName,
      indexColumns
    )
    log.info("create new table sql:")
    log.info(createTableSql)

    val connection = ClickHouseUtils.createConnection(getConnectionString())
    try{
      connection.createStatement().execute(createTableSql)
    }finally {
      connection.close()
      log.info(s"ClickHouse table ${dbName}.${_tableName} created")
    }

    log.info("Creating ClickHouse sink")
    new ClickHouseSink[T](dbName, _tableName, eventDateColumnName)(getConnectionString)(partitionFunc)
  }

  def getConnectionString(): (String, Int) = clickHouseServers.head

} 
开发者ID:DmitryBe,项目名称:spark-streaming-clickhouse,代码行数:55,代码来源:ClickHouseSinkProvider.scala

示例6:

//设置package包名称以及导入依赖的类
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
val fromKafkaTopic1ToConsole = spark.
  readStream.
  format("kafka").
  option("subscribe", "topic1").
  option("kafka.bootstrap.servers", "localhost:9092").
  option("startingoffsets", "earliest").  // latest, earliest or JSON with {"topicA":{"part":offset,"p1":-1},"topicB":{"0":-2}}
  load.
  select($"key" cast "string", $"value" cast "string"). // deserialize records
  as[(String, String)].
  writeStream.
  trigger(Trigger.ProcessingTime("10 seconds")).
  queryName("from-kafka-to-console").
  outputMode(OutputMode.Append).
  format("console").
  start

// ...after some time
fromKafkaTopic1ToConsole.stop 
开发者ID:jaceklaskowski,项目名称:spark-structured-streaming-book,代码行数:20,代码来源:KafkaSourceExample.scala


注:本文中的org.apache.spark.sql.streaming.OutputMode类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。