当前位置: 首页>>代码示例>>Scala>>正文


Scala col类代码示例

本文整理汇总了Scala中org.apache.spark.sql.functions.col的典型用法代码示例。如果您正苦于以下问题:Scala col类的具体用法?Scala col怎么用?Scala col使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了col类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: DateTimeColumn

//设置package包名称以及导入依赖的类
package me.danielpes.spark.datetime

import org.apache.spark.sql.Column
import org.apache.spark.sql.types.{DataType, DateType, TimestampType}
import org.apache.spark.sql.functions.{col, udf}

class DateTimeColumn(val col: Column, dataType: DataType = TimestampType) {

  def +(p: Period): Column = dataType match {
    case _: DateType => udf((d: java.sql.Date) => new RichDate(d) + p).apply(col)
    case _: TimestampType => udf((ts: java.sql.Timestamp) => new RichDate(ts) + p).apply(col)
  }

  def -(p: Period): Column = this.+(-p)

  override def toString: String = s"{column: ${col.toString}, type: ${dataType.toString}}"
}

object DateTimeColumn {

  def apply(col: Column, dataType: DataType = TimestampType) = new DateTimeColumn(col, dataType)
  def apply(col: Column, typeString: String) = new DateTimeColumn(col, typeFromString(typeString))
  def apply(cName: String) = new DateTimeColumn(col(cName), TimestampType)
  def apply(cName: String, dataType: DataType) = new DateTimeColumn(col(cName), dataType)
  def apply(cName: String, typeString: String) = new DateTimeColumn(col(cName), typeFromString(typeString))

  private def typeFromString(s: String): DataType = s match {
    case "date" => DateType
    case "timestamp" => TimestampType
  }
} 
开发者ID:danielpes,项目名称:spark-datetime-lite,代码行数:32,代码来源:DateTimeColumn.scala

示例2: WordCounter

//设置package包名称以及导入依赖的类
package com.koverse.example.spark

import org.apache.spark.rdd.RDD
import com.koverse.sdk.data.SimpleRecord
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.lower

class WordCounter(
    textFieldName: String,
    tokenizationString: String) extends java.io.Serializable {
  
  def count(inputRecordsRdd: RDD[SimpleRecord]): RDD[SimpleRecord] = {
    
    // for each Record, tokenize the specified text field and count each occurrence
    val wordCountRdd = inputRecordsRdd.flatMap { record => record.get(textFieldName).toString().split(tokenizationString) }
                           .map { token => token.toLowerCase().trim() }
                           .map { token => (token, 1) }
                           .reduceByKey { (a,b) => a + b }

    // wordCountRdd is an RDD[(String, Int)] so a (word,count) tuple.
    // turn each tuple into an output Record with a "word" and "count" fields
    val outputRdd = wordCountRdd.map { case(word, count) => {

      val record = new SimpleRecord()
      record.put("word", word)
      record.put("count", count)
      record
    }}
    
    outputRdd
  }
  
  def count(inputDataFrame: DataFrame): DataFrame = {
    
     // Take the column that contains the text and tokenize and count the words
    val wordDF = inputDataFrame.explode(textFieldName, "word") { (text: String) => text.split(tokenizationString) }
    wordDF.select(lower(col("word")).as("lowerWord"))
          .groupBy("lowerWord")
          .count()
  }
  
} 
开发者ID:Koverse,项目名称:koverse-spark-examples,代码行数:44,代码来源:WordCounter.scala

示例3: setFunction

//设置package包名称以及导入依赖的类
package spark.feature

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param.{ParamMap, _}
import org.apache.spark.ml.util._
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, UserDefinedFunction}


  def setFunction(value: String=>Double) = set(function, value)
  def getFunction() =  $(function)

  override def transform(dataset: DataFrame): DataFrame = {
    val outputSchema = transformSchema(dataset.schema)
    val metadata = outputSchema($(outputCol)).metadata
    val dummy = udf { x: Any => $(expr) }
    var data = dataset.select(col("*"), dummy(col($(inputCols).head)).as("0"))
    val substitute: (String => ((String, Double) => String)) = name => (exp, elem) => exp.replace(name, elem.toString)
    def subst(v: String) = udf(substitute(v))
    $(inputCols).view.zipWithIndex foreach { case (v, i) => data = data.select(col("*"), subst(v)(data(i.toString), data(v)).as((i + 1).toString)).drop(i.toString) }
    val eval = udf($(function))
    data.select(col("*"), eval(data($(inputCols).length.toString)).as($(outputCol), metadata)).drop($(inputCols).length.toString)
  }


  override def transformSchema(schema: StructType): StructType = {
    // TODO: Assertions on inputCols
    val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
    val col = attrGroup.toStructField()
    require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
    StructType(schema.fields :+ col)
  }

  override def copy(extra: ParamMap): FeatureFuTransformer = defaultCopy(extra)
} 
开发者ID:laxmanjangley,项目名称:FFrame,代码行数:38,代码来源:FeatureFuTransformer.scala


注:本文中的org.apache.spark.sql.functions.col类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。