当前位置: 首页>>代码示例>>Scala>>正文


Scala Encoder类代码示例

本文整理汇总了Scala中org.apache.spark.sql.Encoder的典型用法代码示例。如果您正苦于以下问题:Scala Encoder类的具体用法?Scala Encoder怎么用?Scala Encoder使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Encoder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: GladLogLikelihoodAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

import scala.math.log

private[crowd] class GladLogLikelihoodAggregator(params: Broadcast[GladParams]) 
  extends Aggregator[GladPartial, GladLogLikelihoodAggregatorBuffer, Double]{

  def zero: GladLogLikelihoodAggregatorBuffer = GladLogLikelihoodAggregatorBuffer(0,-1)

  def reduce(b: GladLogLikelihoodAggregatorBuffer, a: GladPartial) : GladLogLikelihoodAggregatorBuffer = {
    val alphaVal = params.value.alpha(a.annotator.toInt)
    val betaVal = a.beta
    val sig = Functions.sigmoid(alphaVal*betaVal) 
    val p0 = 1-a.est
    val p1 = a.est
    val k0 = if (a.value == 0) sig else 1-sig 
    val k1 = if (a.value == 1) sig else 1-sig 
    GladLogLikelihoodAggregatorBuffer(b.agg + Functions.prodlog(p0,k0) 
                                          + Functions.prodlog(p1,k1), p1) 
  }

  def merge(b1: GladLogLikelihoodAggregatorBuffer, b2: GladLogLikelihoodAggregatorBuffer) : GladLogLikelihoodAggregatorBuffer = { 
    GladLogLikelihoodAggregatorBuffer(b1.agg + b2.agg, if (b1.classProb == -1) b2.classProb else b1.classProb)
  }

  def finish(reduction: GladLogLikelihoodAggregatorBuffer) =  {
    val agg = reduction.agg
    val w0 = params.value.w(0)
    val w1 = params.value.w(1)
    val lastVal = reduction.agg + Functions.prodlog((1-reduction.classProb),params.value.w(0)) + 
                      Functions.prodlog(reduction.classProb,params.value.w(1))
    lastVal
  }


  def bufferEncoder: Encoder[GladLogLikelihoodAggregatorBuffer] = Encoders.product[GladLogLikelihoodAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladLogLikelihoodAggregatorBuffer(agg: Double, classProb: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:49,代码来源:GladLogLikelihoodAggregator.scala

示例2: dimensionTableName

//设置package包名称以及导入依赖的类
package org.alghimo.spark.dimensionalModelling

import org.apache.spark.sql.{Column, Dataset, Encoder, Encoders}


  def dimensionTableName: String

  def tmpDimensionTableName: String = {
    val Array(dbName, tableName) = dimensionTableName.split('.')

    s"${dbName}.tmp_${tableName}"
  }

  def maxPartitionsInDimensionTable: Int = 400

  def dimensionTable(refresh: Boolean = false): Dataset[DIM] = {
    if (refresh) {
      spark.catalog.refreshTable(dimensionTableName)
    }

    spark.table(dimensionTableName).as[DIM]
  }

  def currentDimensions(refresh: Boolean = false): Dataset[DIM] = dimensionTable(refresh).filter(s"${isCurrentColumnName}")
  def notCurrentDimensions(refresh: Boolean = false): Dataset[DIM] = dimensionTable(refresh).filter(s"NOT ${isCurrentColumnName}")

  def save(ds: Dataset[DIM], useTempTable: Boolean = true): Dataset[DIM] = {
    println("Saving dimensions..")
    val toSave = if (useTempTable) {
      ds
        .coalesce(maxPartitionsInDimensionTable)
        .write
        .mode("overwrite")
        .saveAsTable(tmpDimensionTableName)

      spark.table(tmpDimensionTableName)
    } else {
      ds
    }

    toSave
      .coalesce(maxPartitionsInDimensionTable)
      .write
      .mode("overwrite")
      .saveAsTable(dimensionTableName)

    if (useTempTable) {
      spark.sql(s"DROP TABLE ${tmpDimensionTableName} PURGE")
    }

    dimensionTable(refresh = true)
  }
} 
开发者ID:alghimo,项目名称:spark-dimensional-modelling,代码行数:54,代码来源:DimensionTableProvider.scala

示例3: enrichedWithDimensionEncoder

//设置package包名称以及导入依赖的类
package org.alghimo.spark.dimensionalModelling

import org.apache.spark.sql.{Dataset, Encoder}


trait EnrichedDimensionWithDimensionProvider[ENRICHED_DIM <: (Product with Serializable), DIM <: (Product with Serializable)] {
  type EnrichedWithDimension = (ENRICHED_DIM, DIM)
  type DimensionWithEnriched = (DIM, ENRICHED_DIM)
  type JoinEnrichedWithDimension = Dataset[EnrichedWithDimension]
  type JoinDimensionWithEnriched = Dataset[DimensionWithEnriched]

  implicit def enrichedWithDimensionEncoder: Encoder[(ENRICHED_DIM, DIM)]
  implicit def dimensionWithEnrichedEncoder: Encoder[(DIM, ENRICHED_DIM)]

  implicit def enrichedWithDimensionToDimensionWithEnriched(e: EnrichedWithDimension): DimensionWithEnriched = e.swap
  implicit def dimensionWithEnrichedToEnrichedWithDimension(d: EnrichedWithDimension): DimensionWithEnriched = d.swap
  implicit def joinEnrichedWithDimensionToJoinDimensionWithEnriched(e: JoinEnrichedWithDimension): JoinDimensionWithEnriched = e.map(_.swap)
  implicit def joinDimensionWithEnrichedToJoinEnrichedWithDimension(d: JoinEnrichedWithDimension): JoinDimensionWithEnriched = d.map(_.swap)
} 
开发者ID:alghimo,项目名称:spark-dimensional-modelling,代码行数:20,代码来源:EnrichedDimensionWithDimensionProvider.scala

示例4: toKeyValueGroupedDataSet

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.dataset

import org.apache.spark.sql.{Encoder, KeyValueGroupedDataset, SparkSession}

import scala.reflect.ClassTag

trait KeyValueGroupedDataSetAPI[K, V] {
  protected lazy val spark: SparkSession = SparkSession.builder().getOrCreate()
  private[sparklocal] def toKeyValueGroupedDataSet(implicit encK: Encoder[K], encT: Encoder[V], encKT: Encoder[(K, V)]): KeyValueGroupedDataset[K, V]

  def count(): DataSetAPI[(K, Long)]
  def mapGroups[U: ClassTag](f: (K, Iterator[V]) => U)
                            (implicit enc: Encoder[U]=null): DataSetAPI[U]
  def reduceGroups(f: (V, V) => V): DataSetAPI[(K, V)]
  def mapValues[W: ClassTag](func: V => W)
                            (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W]
  def flatMapGroups[U: ClassTag](f: (K, Iterator[V]) => TraversableOnce[U])
                                (implicit enc: Encoder[U]=null): DataSetAPI[U]
  def keys: DataSetAPI[K]
  def cogroup[U: ClassTag, R: ClassTag](other: KeyValueGroupedDataSetAPI[K, U])
                                       (f: (K, Iterator[V], Iterator[U]) => TraversableOnce[R])
                                       (implicit
                                          encK: Encoder[K]=null,
                                          encV: Encoder[V]=null,
                                          encU: Encoder[U]=null,
                                          encR: Encoder[R]=null,
                                          encKV: Encoder[(K,V)]=null,
                                          encKU: Encoder[(K,U)]=null
                                       ): DataSetAPI[R]
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:31,代码来源:KeyValueGroupedDataSetAPI.scala

示例5: DataSetAPIScalaLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.`lazy`.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.{GenIterable, SeqView}
import scala.reflect.ClassTag

object DataSetAPIScalaLazyImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaLazyImpl(it.toSeq.seq.view)
}

class DataSetAPIScalaLazyImpl[T: ClassTag](private[sparklocal] val data: SeqView[T, Seq[T]]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = SeqView[T, Seq[T]]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaLazyImpl.create(it)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaLazyImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:39,代码来源:DataSetAPIScalaLazyImpl.scala

示例6: implicits

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.session

import com.datawizards.sparklocal.accumulator.{AccumulatorV2API, CollectionAccumulatorAPI, DoubleAccumulatorAPI, LongAccumulatorAPI}
import com.datawizards.sparklocal.broadcast.BroadcastAPI
import com.datawizards.sparklocal.impl.scala.accumulator.{CollectionAccumulatorAPIScalaImpl, DoubleAccumulatorAPIScalaImpl, LongAccumulatorAPIScalaImpl}
import com.datawizards.sparklocal.impl.scala.broadcast.BroadcastAPIScalaImpl
import com.datawizards.sparklocal.session.SparkSessionAPI
import org.apache.spark.sql.Encoder

import scala.reflect.ClassTag

trait SparkSessionAPIScalaBase extends SparkSessionAPI {

  object implicits {
    implicit def enc[T]: Encoder[T] = null
  }

  override def broadcast[T: ClassTag](value: T): BroadcastAPI[T] =
    new BroadcastAPIScalaImpl[T](value)

  override def longAccumulator: LongAccumulatorAPI =
    new LongAccumulatorAPIScalaImpl()

  override def longAccumulator(name: String): LongAccumulatorAPI =
    new LongAccumulatorAPIScalaImpl(Some(name))

  override def doubleAccumulator: DoubleAccumulatorAPI =
    new DoubleAccumulatorAPIScalaImpl()

  override def doubleAccumulator(name: String): DoubleAccumulatorAPI =
    new DoubleAccumulatorAPIScalaImpl(Some(name))

  override def collectionAccumulator[T]: CollectionAccumulatorAPI[T] =
    new CollectionAccumulatorAPIScalaImpl[T]()

  override def collectionAccumulator[T](name: String): CollectionAccumulatorAPI[T] =
    new CollectionAccumulatorAPIScalaImpl[T](Some(name))

  override def register(acc: AccumulatorV2API[_, _], name: String): Unit =
    {  }
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:42,代码来源:SparkSessionAPIScalaBase.scala

示例7: KeyValueGroupedDataSetAPIScalaParallelLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallellazy.dataset

import com.datawizards.sparklocal.dataset.KeyValueGroupedDataSetAPI
import com.datawizards.sparklocal.impl.scala.dataset.{DataSetAPIScalaBase, KeyValueGroupedDataSetAPIScalaBase}
import org.apache.spark.sql.Encoder

import scala.collection.{GenIterable, GenSeq}
import scala.reflect.ClassTag

class KeyValueGroupedDataSetAPIScalaParallelLazyImpl[K: ClassTag, T: ClassTag](private[sparklocal] val data: Map[K, GenSeq[T]]) extends KeyValueGroupedDataSetAPIScalaBase[K, T] {
  override type InternalCollection = Map[K, GenSeq[T]]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]=null): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelLazyImpl.create(it)

  override def mapValues[W: ClassTag](func: (T) => W)
                                     (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W] = {
    val mapped = data.mapValues(_.map(func))
    new KeyValueGroupedDataSetAPIScalaParallelLazyImpl(mapped)
  }

} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:23,代码来源:KeyValueGroupedDataSetAPIScalaParallelLazyImpl.scala

示例8: DataSetAPIScalaParallelLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallellazy.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.impl.scala.parallellazy.ParallelLazySeq
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.reflect.ClassTag

object DataSetAPIScalaParallelLazyImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelLazyImpl(new ParallelLazySeq(it.toSeq.par))
}

class DataSetAPIScalaParallelLazyImpl[T: ClassTag](private[sparklocal] val data: ParallelLazySeq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = ParallelLazySeq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelLazyImpl.create(it)

  private def create[U: ClassTag](data: ParallelLazySeq[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelLazyImpl(data)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaParallelLazyImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:43,代码来源:DataSetAPIScalaParallelLazyImpl.scala

示例9: DataSetAPIScalaEagerImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.eager.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.reflect.ClassTag

object DataSetAPIScalaEagerImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaEagerImpl(it.toSeq.seq)
}

class DataSetAPIScalaEagerImpl[T: ClassTag](private[sparklocal] val data: Seq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = Seq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaEagerImpl.create(it)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaEagerImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:39,代码来源:DataSetAPIScalaEagerImpl.scala

示例10: DataSetAPIScalaParallelImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallel.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.`lazy`.dataset.DataSetAPIScalaLazyImpl
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.collection.parallel.ParSeq
import scala.reflect.ClassTag

object DataSetAPIScalaParallelImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelImpl(it.toSeq.par)
}

class DataSetAPIScalaParallelImpl[T: ClassTag](private[sparklocal] val data: ParSeq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = ParSeq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelImpl.create(it)

  private def create[U: ClassTag](data: ParSeq[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelImpl(data)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaParallelImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:44,代码来源:DataSetAPIScalaParallelImpl.scala

示例11: KeyValueGroupedDataSetAPISparkImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.spark.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import org.apache.spark.sql.{Encoder, KeyValueGroupedDataset}

import scala.reflect.ClassTag

class KeyValueGroupedDataSetAPISparkImpl[K: ClassTag, T: ClassTag](private[dataset] val data: KeyValueGroupedDataset[K, T]) extends KeyValueGroupedDataSetAPI[K, T] {
  private def create[U: ClassTag](data: KeyValueGroupedDataset[K,U]) = new KeyValueGroupedDataSetAPISparkImpl(data)

  override private[sparklocal] def toKeyValueGroupedDataSet(implicit encK: Encoder[K], encT: Encoder[T], encKT: Encoder[(K, T)]) = data

  override def count(): DataSetAPI[(K, Long)] =
    DataSetAPI(data.count())

  override def mapValues[W: ClassTag](func: (T) => W)
                                     (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W] =
    create(data.mapValues(func))

  override def mapGroups[U: ClassTag](f: (K, Iterator[T]) => U)
                                     (implicit enc: Encoder[U]=null): DataSetAPI[U] =
    DataSetAPI(data.mapGroups(f))

  override def reduceGroups(f: (T, T) => T): DataSetAPI[(K, T)] =
    DataSetAPI(data.reduceGroups(f))

  override def flatMapGroups[U: ClassTag](f: (K, Iterator[T]) => TraversableOnce[U])
                                         (implicit enc: Encoder[U]=null): DataSetAPI[U] =
    DataSetAPI(data.flatMapGroups(f))

  override def keys: DataSetAPI[K] =
    DataSetAPI(data.keys)

  override def cogroup[U: ClassTag, R: ClassTag](other: KeyValueGroupedDataSetAPI[K, U])
                                                (f: (K, Iterator[T], Iterator[U]) => TraversableOnce[R])
                                                (implicit
                                                 encK: Encoder[K]=null,
                                                 encT: Encoder[T]=null,
                                                 encU: Encoder[U]=null,
                                                 encR: Encoder[R]=null,
                                                 encKT: Encoder[(K,T)]=null,
                                                 encKU: Encoder[(K,U)]=null
                                                ): DataSetAPI[R] = {
    DataSetAPI(data.cogroup(other.toKeyValueGroupedDataSet)(f))
  }


} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:49,代码来源:KeyValueGroupedDataSetAPISparkImpl.scala

示例12: GladAlphaAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class GladAlphaAggregator(params: Broadcast[GladParams], learningRate: Double) 
  extends Aggregator[GladPartial, GladAlphaAggregatorBuffer, Double]{

  def zero: GladAlphaAggregatorBuffer = GladAlphaAggregatorBuffer(0,-1) 
  
  def reduce(b: GladAlphaAggregatorBuffer, a: GladPartial) : GladAlphaAggregatorBuffer = {
    val alpha = params.value.alpha
    val al = alpha(a.annotator)
    val bet = a.beta
    val aest = a.est
    val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
    val p = if (a.value == 1) a.est else (1-a.est)
    val term = (p - sigmoidValue)*bet
    GladAlphaAggregatorBuffer(b.agg + term, al)
  }

  def merge(b1: GladAlphaAggregatorBuffer, b2: GladAlphaAggregatorBuffer) : GladAlphaAggregatorBuffer = { 
    GladAlphaAggregatorBuffer(b1.agg + b2.agg, if (b1.alpha == -1) b2.alpha else b1.alpha )
  }

  def finish(reduction: GladAlphaAggregatorBuffer) = {
    reduction.alpha + learningRate * reduction.agg
  }

  def bufferEncoder: Encoder[GladAlphaAggregatorBuffer] = Encoders.product[GladAlphaAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladAlphaAggregatorBuffer(agg: Double, alpha: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:40,代码来源:GladAlphaAggregator.scala

示例13: MulticlassMVAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.MulticlassAnnotation
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator

private[crowd] class MulticlassMVAggregator(nClasses: Int) extends Aggregator[MulticlassAnnotation, MulticlassMVPartial, Int]{

  def sumKey(map: Map[Int,Long], pair: (Int,Long)) = {
      val key = pair._1
      val value = pair._2
      val new_value = map.get(key) match {
        case Some(x) => x + value
        case None => value 
      }
      map.updated(key, new_value)
  }

  def zero: MulticlassMVPartial = MulticlassMVPartial(Vector.fill(nClasses)(0),0)

  def reduce(b: MulticlassMVPartial, a: MulticlassAnnotation) : MulticlassMVPartial = {
    MulticlassMVPartial(b.aggVect.updated(a.value, b.aggVect(a.value) + 1), b.count + 1) 
  }

  def merge(b1: MulticlassMVPartial, b2: MulticlassMVPartial) : MulticlassMVPartial = { 
    MulticlassMVPartial(b1.aggVect.zip(b2.aggVect).map(x => x._1 + x._2), b1.count + b2.count) 
  }

  def finish(reduction: MulticlassMVPartial) = {
      reduction.aggVect.indexOf(reduction.aggVect.max)
  }


  def bufferEncoder: Encoder[MulticlassMVPartial] = Encoders.product[MulticlassMVPartial]

  def outputEncoder: Encoder[Int] = Encoders.scalaInt
} 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:38,代码来源:MulticlassMVAggregator.scala

示例14: GladBetaAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class GladBetaAggregator(params: Broadcast[GladParams], learningRate: Double) 
  extends Aggregator[GladPartial, GladBetaAggregatorBuffer, Double]{

  def zero: GladBetaAggregatorBuffer = GladBetaAggregatorBuffer(0,-1) 
  
  def reduce(b: GladBetaAggregatorBuffer, a: GladPartial) : GladBetaAggregatorBuffer = {
    val alpha = params.value.alpha
    val al = alpha(a.annotator)
    val bet = a.beta
    val aest = a.est
    val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
    val p = if (a.value == 1) a.est else (1-a.est)
    val term = (p - sigmoidValue)*alpha(a.annotator)
    GladBetaAggregatorBuffer(b.agg + term, a.beta)
  }

  def merge(b1: GladBetaAggregatorBuffer, b2: GladBetaAggregatorBuffer) : GladBetaAggregatorBuffer = { 
    GladBetaAggregatorBuffer(b1.agg + b2.agg, if (b1.beta == -1) b2.beta else b1.beta)
  }

  def finish(reduction: GladBetaAggregatorBuffer) = {
    reduction.beta + learningRate * reduction.agg
  }

  def bufferEncoder: Encoder[GladBetaAggregatorBuffer] = Encoders.product[GladBetaAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladBetaAggregatorBuffer(agg: Double, beta: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:40,代码来源:GladBetaAggregator.scala

示例15: BinarySoftMVAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import org.apache.spark.sql.{Encoder, Encoders}
import com.enriquegrodrigo.spark.crowd.types.BinaryAnnotation
import org.apache.spark.sql.expressions.Aggregator

  private[crowd] class BinarySoftMVAggregator extends Aggregator[BinaryAnnotation, BinaryMVPartial, Double]{
  
    def zero: BinaryMVPartial = BinaryMVPartial(0,0)
  
    def reduce(b: BinaryMVPartial, a: BinaryAnnotation) : BinaryMVPartial = 
      BinaryMVPartial(b.aggValue+a.value, b.count + 1) 
  
    def merge(b1: BinaryMVPartial, b2: BinaryMVPartial) : BinaryMVPartial = 
      BinaryMVPartial(b1.aggValue + b2.aggValue, b1.count + b2.count) 
  
    def finish(reduction: BinaryMVPartial) =  {
      val numerator: Double = reduction.aggValue
      val denominator: Double = reduction.count
      if (denominator == 0) 
        throw new IllegalArgumentException() 
      else {
        (numerator / denominator)
      }
    }
  
    def bufferEncoder: Encoder[BinaryMVPartial] = Encoders.product[BinaryMVPartial]
  
    def outputEncoder: Encoder[Double] = Encoders.scalaDouble
  } 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:31,代码来源:BinarySoftMVAggregator.scala


注:本文中的org.apache.spark.sql.Encoder类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。