当前位置: 首页>>代码示例>>Scala>>正文


Scala Window类代码示例

本文整理汇总了Scala中org.apache.spark.sql.expressions.Window的典型用法代码示例。如果您正苦于以下问题:Scala Window类的具体用法?Scala Window怎么用?Scala Window使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Window类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: DrawdownCalculator

//设置package包名称以及导入依赖的类
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.max

object DrawdownCalculator {
  def calculate(sparkSession: SparkSession, df: DataFrame): Double = {

    val windowUptoCurrentRow = Window.orderBy("date").rowsBetween(Long.MinValue, 0)
    val dfWithRollingMaxPrice = df.withColumn("rolling_max_price",
                                              max(df("price")).over(windowUptoCurrentRow))

    val dfWithRollingDrawdowns = dfWithRollingMaxPrice.withColumn("rolling_dd",
      max(dfWithRollingMaxPrice("rolling_max_price") - dfWithRollingMaxPrice("price")).over(windowUptoCurrentRow))

    dfWithRollingDrawdowns.createOrReplaceTempView("DrawdownCalculation")

    val dfWithOrderedDrawndowns = sparkSession.sql("SELECT date, price, rolling_dd, rolling_max_price, " +
                                                   "(rolling_dd / rolling_max_price) as drawdown_pct " +
                                                   "FROM DrawdownCalculation ORDER BY drawdown_pct ASC")

    dfWithOrderedDrawndowns.show()

    val rollingDrawdown = dfWithOrderedDrawndowns.first().getDouble(2)
    val rollingMaxPrice = dfWithOrderedDrawndowns.first().getDouble(3)
    val maxDrawdownPct = dfWithOrderedDrawndowns.first().getDouble(4)

    maxDrawdownPct
  }
} 
开发者ID:tibkiss,项目名称:spark-risk-explorer,代码行数:30,代码来源:DrawdownCalculator.scala

示例2: naturalKeyColumns

//设置package包名称以及导入依赖的类
package org.alghimo.spark.dimensionalModelling

import org.apache.spark.sql._
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.{lit, max, row_number}


  def naturalKeyColumns: Seq[String]

  def enrichedDimensionsToDimensions(enrichedDims: EnrichedDimensions, refreshDimensionTable: Boolean = false): Dimensions = {
    val dimensions = dimensionTable(refreshDimensionTable)
    val maxSk = dimensions.select(max(surrogateKeyColumn)).as[Long].collect().head
    val rankWindow = Window.partitionBy().orderBy(naturalKeyColumns.head, naturalKeyColumns.tail:_*)

    enrichedDims
      .withColumn(surrogateKeyColumnName, lit(maxSk) + (row_number() over rankWindow))
      .withColumn(startTimestampColumnName, timestampColumn)
      .withColumn(endTimestampColumnName, lit(null).cast("timestamp"))
      .withColumn(isCurrentColumnName, lit(true))
      .selectExpr(dimensions.columns:_*)
      .as[DIM]
  }

  def keepOnlyMostRecentEvents(enrichedDimensions: EnrichedDimensions): EnrichedDimensions = {
    val naturalKeyWindow = Window.partitionBy(naturalKeyColumns.map(new Column(_)):_*).orderBy(timestampColumn.desc)

    enrichedDimensions
      .withColumn("row_num", row_number() over naturalKeyWindow)
      .filter("row_num = 1")
      .drop("row_num")
      .as[ENRICHED_DIM]
  }
} 
开发者ID:alghimo,项目名称:spark-dimensional-modelling,代码行数:34,代码来源:EnrichedDimensionOps.scala

示例3: SparkSQLSearch

//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.sql.search

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._


object SparkSQLSearch {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkSQLSearch").master("spark://Master:7077").enableHiveSupport().getOrCreate()
    import spark.implicits._
    spark.sql("use hive")
    var dfuv = spark.sql("select date, item, count(*) as uv from searchinfo group by date, item order by date")
      .withColumn("number", row_number().over(Window.partitionBy($"date").orderBy($"date", $"uv".desc)))
          .filter($"number".leq(5))
    var dfitems = spark.sql("select * from items")
    dfuv.join(dfitems, Seq("item")).select("date", "itemname", "uv").show(200)
  }
} 
开发者ID:JJZHK,项目名称:MySpark,代码行数:20,代码来源:SparkSQLSearch.scala


注:本文中的org.apache.spark.sql.expressions.Window类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。