本文整理汇总了Scala中org.apache.spark.sql.catalyst.plans.logical.Sort类的典型用法代码示例。如果您正苦于以下问题:Scala Sort类的具体用法?Scala Sort怎么用?Scala Sort使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Sort类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: NaiveOrderResolution
//设置package包名称以及导入依赖的类
package by.skaryna.rules
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, NamedExpression, SortOrder}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
import org.apache.spark.sql.catalyst.rules.Rule
object NaiveOrderResolution extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case Sort(Seq(order1), global1, Project(projectList, Sort(Seq(order2), global2, child2)))
if global1 == global2 && order1.direction == order2.direction &&
isOrderColumnRenamed(projectList, order1.child, order2.child) =>
val prunedProject = Project(projectList, child2)
Sort(Seq(order1), global1, prunedProject)
}
private[skaryna] def isOrderColumnRenamed(projectList: Seq[NamedExpression],
orderExpr1: Expression, orderExpr2: Expression): Boolean = {
val aliasMatch = projectList.collect {
case alias: Alias =>
alias.child == orderExpr2 && orderExpr1.isInstanceOf[AttributeReference] &&
alias.exprId == orderExpr1.asInstanceOf[AttributeReference].exprId
}
aliasMatch.contains(true)
}
}
示例2: NaiveOrderResolutionTest
//设置package包名称以及导入依赖的类
package by.skaryna.rules
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType
import org.scalatest.{BeforeAndAfter, FlatSpec}
class NaiveOrderResolutionTest extends FlatSpec with BeforeAndAfter {
private var spark: SparkSession = _
before {
spark = SparkSession.builder.
master("local[*]")
.withExtensions(extensions => extensions.injectOptimizerRule(session => NaiveOrderResolution))
.config("spark.ui.enabled", "false")
.getOrCreate()
}
after {
if (spark != null) {
spark.stop()
}
}
"NaiveOrderResolution" should "avoid unnecessary sorting" in {
val df = generateDataFrame(10)
val sorted = df.sort("key")
val renamed = sorted.withColumnRenamed("key", "key2")
val sortedAgain = renamed.sort("key2")
assert(checkOptimizedPlan(sortedAgain.queryExecution.optimizedPlan))
}
private def checkOptimizedPlan(logicalPlan: LogicalPlan): Boolean = logicalPlan match {
case Sort(_, _, Project(_, Project(_, _))) => true
case _ => false
}
private def generateDataFrame(cnt: Int): DataFrame = {
val ids = spark.sqlContext.range(0, cnt)
ids.withColumn("key", (rand() * 1000000).cast(LongType))
}
}