本文整理汇总了Scala中org.apache.spark.Partitioner类的典型用法代码示例。如果您正苦于以下问题:Scala Partitioner类的具体用法?Scala Partitioner怎么用?Scala Partitioner使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Partitioner类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ImageSectorPartitioner
//设置package包名称以及导入依赖的类
package org.pfcoperez.sparkmandelbrot.partitioners
import org.apache.spark.Partitioner
import org.pfcoperez.geometry.Primitives2D.{PixelFrame, Pixel, sector}
class ImageSectorPartitioner(val sectorSize: (Int, Int), val pixelFrame: PixelFrame) extends Partitioner {
override def numPartitions: Int = {
val sectorArea: Long = sectorSize.productIterator.reduce[Any] {
case (a: Int, b: Int) => a.toLong*b.toLong
}.asInstanceOf[Long]
(pixelFrame.area.toDouble/sectorArea.toDouble).ceil.toInt
}
override def getPartition(key: Any): Int = key match {
case (x: Int, y: Int) =>
val asLongPairSectorSize = (sectorSize._1.toLong, sectorSize._2.toLong)
val p = Pixel(x, y)
sector(p, asLongPairSectorSize)(pixelFrame).toInt
}
override def equals(obj: scala.Any): Boolean = obj match {
case that: ImageSectorPartitioner =>
that.sectorSize == sectorSize && that.pixelFrame == pixelFrame
case _ => false
}
}
示例2: GridPartitioner
//设置package包名称以及导入依赖的类
package se.kth.climate.fast.math
import org.apache.spark.Partitioner
class GridPartitioner(
val rows: Int,
val cols: Int,
val rowsPerPart: Int,
val colsPerPart: Int) extends Partitioner {
require(rows > 0)
require(cols > 0)
require(rowsPerPart > 0)
require(colsPerPart > 0)
private val rowPartitions = math.ceil(rows * 1.0 / rowsPerPart).toInt
private val colPartitions = math.ceil(cols * 1.0 / colsPerPart).toInt
override val numPartitions: Int = rowPartitions * colPartitions
def apply(rows: Int, cols: Int, suggestedNumPartitions: Int): GridPartitioner = {
require(suggestedNumPartitions > 0)
val scale = 1.0 / math.sqrt(suggestedNumPartitions)
val rowsPerPart = math.round(math.max(scale * rows, 1.0)).toInt
val colsPerPart = math.round(math.max(scale * cols, 1.0)).toInt
new GridPartitioner(rows, cols, rowsPerPart, colsPerPart)
}
}
示例3: HashAwarePartitioner
//设置package包名称以及导入依赖的类
package org.apache.spark.lineage
import org.apache.spark.Partitioner
import org.apache.spark.util.Utils
class HashAwarePartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any): Int = Utils.nonNegativeMod(key.asInstanceOf[Int], numPartitions)
override def equals(other: Any): Boolean = other match {
case h: HashAwarePartitioner =>
h.numPartitions == numPartitions
case _ =>
false
}
}
示例4: LocalityAwarePartitioner
//设置package包名称以及导入依赖的类
package org.apache.spark.lineage
import org.apache.spark.Partitioner
import org.apache.spark.lineage.LineageContext._
import org.apache.spark.util.PackIntIntoLong
class LocalityAwarePartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any): Int = key.asInstanceOf[RecordId]._1
override def equals(other: Any): Boolean = other match {
case h: LocalityAwarePartitioner =>
h.numPartitions == numPartitions
case _ =>
false
}
}
示例5: SimpleCustomPartitioner
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.partitioning
import org.apache.spark.Partitioner
import org.apache.spark.sql.SparkSession
object SimpleCustomPartitioner {
def main(args:Array[String]): Unit = {
val jsonPath = args(0)
val partitions = args(1).toInt
val sparkSession = SparkSession.builder
.master("local")
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.getOrCreate()
val jsonDf = sparkSession.read.json(jsonPath)
val partitionedRdd = jsonDf.rdd.map(row => {
val group = row.getAs[String]("group")
val time = row.getAs[Long]("time")
val value = row.getAs[Long]("value")
((group, time), value) //this a tuple with in a tuple
}).repartitionAndSortWithinPartitions(new SimpleCustomPartitioner(partitions))
val pairRdd = jsonDf.rdd.map(row => {
val group = row.getAs[String]("group")
val time = row.getAs[Long]("time")
val value = row.getAs[Long]("value")
((group, time), value) //this a tuple with in a tuple
})
pairRdd.reduceByKey(_ + _, 100)
pairRdd.reduceByKey(new SimpleCustomPartitioner(partitions), _ + _)
partitionedRdd.collect().foreach(r => {
println(r)
})
sparkSession.stop()
}
}
class SimpleCustomPartitioner(numOfParts:Int) extends Partitioner {
override def numPartitions: Int = numOfParts
override def getPartition(key: Any): Int = {
val k = key.asInstanceOf[(String, Long)]
Math.abs(k._1.hashCode) % numPartitions
}
}
示例6: AppleCustomPartitioner
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.partitioning
import java.util.Random
import org.apache.spark.Partitioner
class AppleCustomPartitioner(numOfParts:Int) extends Partitioner {
override def numPartitions: Int = numOfParts
def random = new Random()
override def getPartition(key: Any): Int = {
val k = key.asInstanceOf[(String, Long)]
val ticker = k._1
if (ticker.equals("apple")) {
val saltedTicker = ticker + random.nextInt(9)
Math.abs(saltedTicker.hashCode) % numPartitions
} else {
Math.abs(ticker.hashCode) % numPartitions
}
}
}
示例7: GainChartPartitioner
//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.gainchart
import com.paypal.risk.smunf.util.CommonFunction
import org.apache.spark.Partitioner
class GainChartPartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any): Int = {
val value = key match {
case x: GainChartIndicator => x.hashCode
case x: GainChartRecord => x.indicator.hashCode
case null => 0
case _ => key.hashCode
}
CommonFunction.nonNegativeMod(value, numPartitions)
}
override def equals(other: Any): Boolean = other match {
case h: GainChartPartitioner => h.numPartitions == numPartitions
case _ => false
}
override def hashCode: Int = numPartitions
}
示例8: VariablePartitioner
//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.util
import org.apache.spark.Partitioner
class VariablePartitioner(partitions: Int, useLabel: Boolean) extends Partitioner{
def numPartitions = partitions
def getPartition(key: Any): Int = {
val value = key match {
case x: VariableRecord => x.id.hashCode + { if (useLabel) x.label.hashCode else 0 }
case null => 0
case _ => key.hashCode
}
CommonFunction.nonNegativeMod(value, numPartitions)
}
override def equals(other: Any): Boolean = other match {
case h: VariablePartitioner => h.numPartitions == numPartitions
case _ => false
}
override def hashCode: Int = numPartitions
}
示例9: SizePartitioner
//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.math.stats
import com.paypal.risk.smunf.util.CommonFunction
import org.apache.spark.Partitioner
class SizePartitioner(partitions: Int) extends Partitioner {
def numPartitions = partitions
def getPartition(key: Any): Int = {
val value = key match {
case x: Int => x
case null => 0
case _ => key.hashCode
}
CommonFunction.nonNegativeMod(value, numPartitions)
}
override def equals(other: Any): Boolean = other match {
case h: SizePartitioner => h.numPartitions == numPartitions
case _ => false
}
override def hashCode: Int = numPartitions
}
示例10: parallelize
//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.rdd
import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import scala.collection.{GenIterable, GenMap, Map}
import scala.reflect.ClassTag
trait PairRDDFunctionsAPI[K, V] {
protected lazy val spark: SparkSession = SparkSession.builder().getOrCreate()
protected def parallelize[That: ClassTag](d: Seq[That]): RDD[That] = spark.sparkContext.parallelize(d)
protected def parallelize[That: ClassTag](d: GenIterable[That]): RDD[That] = parallelize(d.toList)
def mapValues[U: ClassTag](f: (V) => U): RDDAPI[(K, U)]
def keys: RDDAPI[K]
def values: RDDAPI[V]
def flatMapValues[U: ClassTag](f: (V) => TraversableOnce[U]): RDDAPI[(K, U)]
def countByKey(): GenMap[K, Long]
def reduceByKey(func: (V, V) => V): RDDAPI[(K, V)]
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDDAPI[(K, V)]
def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDDAPI[(K, V)]
def reduceByKeyLocally(func: (V, V) => V): Map[K, V]
def groupByKey(): RDDAPI[(K, GenIterable[V])]
def groupByKey(numPartitions: Int): RDDAPI[(K, GenIterable[V])]
def groupByKey(partitioner: Partitioner): RDDAPI[(K, GenIterable[V])]
def foldByKey(zeroValue: V)(func: (V, V) => V): RDDAPI[(K, V)]
def foldByKey(zeroValue: V, numPartitions: Int)(func: (V, V) => V): RDDAPI[(K, V)]
def foldByKey(zeroValue: V, partitioner: Partitioner)(func: (V, V) => V): RDDAPI[(K, V)]
def join[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (V, W))]
def join[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (V, W))]
def join[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (V, W))]
def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (V, Option[W]))]
def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (V, Option[W]))]
def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (V, Option[W]))]
def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (Option[V], W))]
def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (Option[V], W))]
def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (Option[V], W))]
def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (Option[V], Option[W]))]
def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (Option[V], Option[W]))]
def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (Option[V], Option[W]))]
def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
def cogroup[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)]): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
def cogroup[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)]): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
def cogroup[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
def collectAsMap(): GenMap[K, V]
def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, V)]
def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, V)]
def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)], p: Partitioner): RDDAPI[(K, V)]
def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
def aggregateByKey[U: ClassTag](zeroValue: U, partitioner: Partitioner)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
def aggregateByKey[U: ClassTag](zeroValue: U, numPartitions: Int)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
def partitionBy(partitioner: Partitioner): RDDAPI[(K, V)]
}
示例11: GlobalDataPartitioner
//设置package包名称以及导入依赖的类
package mljoin
import org.apache.spark.Partitioner
class GlobalDataPartitioner (numParts:Int, B_i_data_hash: Data2 => Long) extends Partitioner {
def getPartition(key: Any): Int = {
key match {
case null => 0
case _ => {
val tmp:Data2 = key.asInstanceOf[Data2]
nonNegativeMod(B_i_data_hash(tmp), numParts)
}
}
}
def numPartitions: Int = numParts
def nonNegativeMod(x: Long, mod: Int): Int = {
val rawMod = (x % mod).toInt
rawMod + (if (rawMod < 0) mod else 0)
}
}
示例12: GlobalModelPartitioner
//设置package包名称以及导入依赖的类
package mljoin
import org.apache.spark.Partitioner
class GlobalModelPartitioner (numParts:Int, B_i_model_hash: Model2 => Long) extends Partitioner {
def getPartition(key: Any): Int = {
key match {
case null => 0
case _ => {
val tmp:Model2 = key.asInstanceOf[Model2]
nonNegativeMod(B_i_model_hash(tmp), numParts)
}
}
}
def numPartitions: Int = numParts
def nonNegativeMod(x: Long, mod: Int): Int = {
val rawMod = (x % mod).toInt
rawMod + (if (rawMod < 0) mod else 0)
}
}
示例13: SymbolPartitioner
//设置package包名称以及导入依赖的类
package per.harenp.Hedgehog
import org.apache.spark.Partitioner
class SymbolPartitioner extends Partitioner
{
val numParts = 500
def actualSymbol(key: Any) = {
val fullPath = key.toString
val namePart = fullPath.split("/").last
namePart.split(Array('-','.')).head
}
override def numPartitions: Int = numParts
override def getPartition(key: Any): Int = {
val code = actualSymbol(key).hashCode % numPartitions
if (code < 0)
code + numPartitions
else
code
}
// Java equals method to let Spark compare our Partitioner objects
override def equals(other: Any): Boolean = other match {
case sp: SymbolPartitioner =>
sp.numPartitions == numPartitions
case _ =>
false
}
}