本文整理汇总了Scala中org.apache.spark.sql.catalyst.encoders.ExpressionEncoder类的典型用法代码示例。如果您正苦于以下问题:Scala ExpressionEncoder类的具体用法?Scala ExpressionEncoder怎么用?Scala ExpressionEncoder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ExpressionEncoder类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ClickHouseSink
//设置package包名称以及导入依赖的类
package io.clickhouse.ext.spark.streaming
import io.clickhouse.ext.ClickHouseUtils
import io.clickhouse.ext.tools.Utils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{DataFrame, Encoders}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.execution.streaming.Sink
import scala.reflect.{ClassTag, classTag}
import scala.reflect.runtime.universe.TypeTag
class ClickHouseSink[T <: Product: ClassTag](dbName: String, tableName: String, eventDataColumn: String)
(getConnectionString: () => (String, Int)) // -> (host, port)
(partitionFunc: (org.apache.spark.sql.Row) => java.sql.Date)
(implicit tag: TypeTag[T]) extends Sink with Serializable with Logging {
override def addBatch(batchId: Long, data: DataFrame) = {
val res = data.queryExecution.toRdd.mapPartitions{ iter =>
val stateUpdateEncoder = Encoders.product[T]
val schema = stateUpdateEncoder.schema
val exprEncoder = stateUpdateEncoder.asInstanceOf[ExpressionEncoder[T]]
if(iter.nonEmpty){
val clickHouseHostPort = getConnectionString()
Utils.using(ClickHouseUtils.createConnection(clickHouseHostPort)){ connection =>
val insertStatement = ClickHouseUtils.prepareInsertStatement(connection, dbName, tableName, eventDataColumn)(schema)
iter.foreach{ internalRow =>
val caseClassInstance = exprEncoder.resolveAndBind(
schema.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
).fromRow(internalRow)
val row = org.apache.spark.sql.Row.fromTuple(caseClassInstance)
ClickHouseUtils.batchAdd(schema, row)(insertStatement)(partitionFunc)
}
val inserted = insertStatement.executeBatch().sum
log.info(s"inserted $inserted -> (${clickHouseHostPort._1}:${clickHouseHostPort._2})")
List(inserted).toIterator
} // end: close connection
} else {
Iterator.empty
}
} // end: mapPartition
val insertedCount = res.collect().sum
log.info(s"Batch $batchId's inserted total: $insertedCount")
}
}
示例2: KarpsStubs
//设置package包名称以及导入依赖的类
package org.apache.spark.sql
import org.apache.spark.SparkContext
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
object KarpsStubs {
def withExecutionId[T](sc: SparkContext, executionId: String)(body: => T): T= {
SQLExecution.withExecutionId(sc, executionId)(body)
}
def withNewExecutionId[T](
sparkSession: SparkSession,
queryExecution: QueryExecution)(body: => T): T = {
SQLExecution.withNewExecutionId(sparkSession, queryExecution)(body)
}
def getBoundEncoder(df: DataFrame): ExpressionEncoder[Row] = {
df.exprEnc.resolveAndBind(df.logicalPlan.output,
df.sparkSession.sessionState.analyzer)
}
def getExpression(c: Column): Expression = c.expr
def makeColumn(exp: Expression): Column = Column.apply(exp)
}
示例3: BenchmarkTestData
//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.performance
import com.datawizards.sparklocal.TestModel.Person
import com.datawizards.sparklocal.performance.BenchmarkModel.{InputDataSets, InputRDDs}
import com.datawizards.sparklocal.session.{ExecutionEngine, SparkSessionAPI}
import com.datawizards.sparklocal.implicits._
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.scalacheck.Arbitrary
import org.scalacheck.Shapeless._
import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag
object BenchmarkTestData {
lazy val dataSets10Elements: InputDataSets[Person] = createInputDataSets(people10Elements)
lazy val dataSets100Elements: InputDataSets[Person] = createInputDataSets(people100Elements)
lazy val dataSets1000Elements: InputDataSets[Person] = createInputDataSets(people1000Elements)
lazy val dataSets100000Elements: InputDataSets[Person] = createInputDataSets(people100000Elements)
lazy val rdds10Elements: InputRDDs[Person] = createInputRDDs(people10Elements)
lazy val rdds100Elements: InputRDDs[Person] = createInputRDDs(people100Elements)
lazy val rdds1000Elements: InputRDDs[Person] = createInputRDDs(people1000Elements)
lazy val rdds100000Elements: InputRDDs[Person] = createInputRDDs(people100000Elements)
def createInputDataSets[T: ClassTag: TypeTag](data: Seq[T]): InputDataSets[T] = {
implicit val encoder = ExpressionEncoder[T]()
InputDataSets(
scalaEagerImpl = scalaEagerSession.createDataset(data),
scalaLazyImpl = scalaLazySession.createDataset(data),
scalaParallelImpl = scalaParallelSession.createDataset(data),
scalaParallelLazyImpl = scalaParallelLazySession.createDataset(data),
sparkImpl = sparkSession.createDataset(data)
)
}
private def createInputRDDs[T: ClassTag](data: Seq[T]): InputRDDs[T] =
InputRDDs(
scalaEagerImpl = scalaEagerSession.createRDD(data),
scalaLazyImpl = scalaLazySession.createRDD(data),
scalaParallelImpl = scalaParallelSession.createRDD(data),
scalaParallelLazyImpl = scalaParallelLazySession.createRDD(data),
sparkImpl = sparkSession.createRDD(data)
)
private lazy val scalaEagerSession = SparkSessionAPI.builder(ExecutionEngine.ScalaEager).master("local").getOrCreate()
private lazy val scalaLazySession = SparkSessionAPI.builder(ExecutionEngine.ScalaLazy).master("local").getOrCreate()
private lazy val scalaParallelSession = SparkSessionAPI.builder(ExecutionEngine.ScalaParallel).master("local").getOrCreate()
private lazy val scalaParallelLazySession = SparkSessionAPI.builder(ExecutionEngine.ScalaParallelLazy).master("local").getOrCreate()
private lazy val sparkSession = SparkSessionAPI.builder(ExecutionEngine.Spark).master("local").getOrCreate()
private lazy val peopleGenerator = implicitly[Arbitrary[Person]].arbitrary
private lazy val people10Elements = for(i <- 1 to 10) yield peopleGenerator.sample.get
private lazy val people100Elements = for(i <- 1 to 100) yield peopleGenerator.sample.get
private lazy val people1000Elements = for(i <- 1 to 1000) yield peopleGenerator.sample.get
private lazy val people100000Elements = for(i <- 1 to 100000) yield peopleGenerator.sample.get
}