本文整理汇总了Scala中org.apache.spark.sql.catalyst.expressions.AttributeReference类的典型用法代码示例。如果您正苦于以下问题:Scala AttributeReference类的具体用法?Scala AttributeReference怎么用?Scala AttributeReference使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了AttributeReference类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ClickHouseSink
//设置package包名称以及导入依赖的类
package io.clickhouse.ext.spark.streaming
import io.clickhouse.ext.ClickHouseUtils
import io.clickhouse.ext.tools.Utils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{DataFrame, Encoders}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.execution.streaming.Sink
import scala.reflect.{ClassTag, classTag}
import scala.reflect.runtime.universe.TypeTag
class ClickHouseSink[T <: Product: ClassTag](dbName: String, tableName: String, eventDataColumn: String)
(getConnectionString: () => (String, Int)) // -> (host, port)
(partitionFunc: (org.apache.spark.sql.Row) => java.sql.Date)
(implicit tag: TypeTag[T]) extends Sink with Serializable with Logging {
override def addBatch(batchId: Long, data: DataFrame) = {
val res = data.queryExecution.toRdd.mapPartitions{ iter =>
val stateUpdateEncoder = Encoders.product[T]
val schema = stateUpdateEncoder.schema
val exprEncoder = stateUpdateEncoder.asInstanceOf[ExpressionEncoder[T]]
if(iter.nonEmpty){
val clickHouseHostPort = getConnectionString()
Utils.using(ClickHouseUtils.createConnection(clickHouseHostPort)){ connection =>
val insertStatement = ClickHouseUtils.prepareInsertStatement(connection, dbName, tableName, eventDataColumn)(schema)
iter.foreach{ internalRow =>
val caseClassInstance = exprEncoder.resolveAndBind(
schema.map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
).fromRow(internalRow)
val row = org.apache.spark.sql.Row.fromTuple(caseClassInstance)
ClickHouseUtils.batchAdd(schema, row)(insertStatement)(partitionFunc)
}
val inserted = insertStatement.executeBatch().sum
log.info(s"inserted $inserted -> (${clickHouseHostPort._1}:${clickHouseHostPort._2})")
List(inserted).toIterator
} // end: close connection
} else {
Iterator.empty
}
} // end: mapPartition
val insertedCount = res.collect().sum
log.info(s"Batch $batchId's inserted total: $insertedCount")
}
}
示例2: NaiveOrderResolution
//设置package包名称以及导入依赖的类
package by.skaryna.rules
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, NamedExpression, SortOrder}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
import org.apache.spark.sql.catalyst.rules.Rule
object NaiveOrderResolution extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case Sort(Seq(order1), global1, Project(projectList, Sort(Seq(order2), global2, child2)))
if global1 == global2 && order1.direction == order2.direction &&
isOrderColumnRenamed(projectList, order1.child, order2.child) =>
val prunedProject = Project(projectList, child2)
Sort(Seq(order1), global1, prunedProject)
}
private[skaryna] def isOrderColumnRenamed(projectList: Seq[NamedExpression],
orderExpr1: Expression, orderExpr2: Expression): Boolean = {
val aliasMatch = projectList.collect {
case alias: Alias =>
alias.child == orderExpr2 && orderExpr1.isInstanceOf[AttributeReference] &&
alias.exprId == orderExpr1.asInstanceOf[AttributeReference].exprId
}
aliasMatch.contains(true)
}
}