本文整理汇总了Scala中org.apache.spark.broadcast.Broadcast类的典型用法代码示例。如果您正苦于以下问题:Scala Broadcast类的具体用法?Scala Broadcast怎么用?Scala Broadcast使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Broadcast类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: GladLogLikelihoodAggregator
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators
import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast
import scala.math.log
private[crowd] class GladLogLikelihoodAggregator(params: Broadcast[GladParams])
extends Aggregator[GladPartial, GladLogLikelihoodAggregatorBuffer, Double]{
def zero: GladLogLikelihoodAggregatorBuffer = GladLogLikelihoodAggregatorBuffer(0,-1)
def reduce(b: GladLogLikelihoodAggregatorBuffer, a: GladPartial) : GladLogLikelihoodAggregatorBuffer = {
val alphaVal = params.value.alpha(a.annotator.toInt)
val betaVal = a.beta
val sig = Functions.sigmoid(alphaVal*betaVal)
val p0 = 1-a.est
val p1 = a.est
val k0 = if (a.value == 0) sig else 1-sig
val k1 = if (a.value == 1) sig else 1-sig
GladLogLikelihoodAggregatorBuffer(b.agg + Functions.prodlog(p0,k0)
+ Functions.prodlog(p1,k1), p1)
}
def merge(b1: GladLogLikelihoodAggregatorBuffer, b2: GladLogLikelihoodAggregatorBuffer) : GladLogLikelihoodAggregatorBuffer = {
GladLogLikelihoodAggregatorBuffer(b1.agg + b2.agg, if (b1.classProb == -1) b2.classProb else b1.classProb)
}
def finish(reduction: GladLogLikelihoodAggregatorBuffer) = {
val agg = reduction.agg
val w0 = params.value.w(0)
val w1 = params.value.w(1)
val lastVal = reduction.agg + Functions.prodlog((1-reduction.classProb),params.value.w(0)) +
Functions.prodlog(reduction.classProb,params.value.w(1))
lastVal
}
def bufferEncoder: Encoder[GladLogLikelihoodAggregatorBuffer] = Encoders.product[GladLogLikelihoodAggregatorBuffer]
def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}
private[crowd] case class GladLogLikelihoodAggregatorBuffer(agg: Double, classProb: Double)
示例2: PDF
//设置package包名称以及导入依赖的类
package org.hammerlab.coverage.two_sample
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.hammerlab.coverage
import org.hammerlab.coverage.histogram.JointHistogram.Depth
import spire.algebra.Monoid
case class PDF[C: Monoid](rdd: RDD[((Depth, Depth), C)],
filtersBroadcast: Broadcast[(Set[Depth], Set[Depth])],
maxDepth1: Depth,
maxDepth2: Depth)
extends coverage.PDF[C]
with CanDownSampleRDD[C]
case class CDF[C: Monoid](rdd: RDD[((Depth, Depth), C)],
filtersBroadcast: Broadcast[(Set[Depth], Set[Depth])])
extends coverage.CDF[C]
with CanDownSampleRDD[C]
示例3: rdd
//设置package包名称以及导入依赖的类
package org.hammerlab.coverage.two_sample
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.hammerlab.coverage.histogram.JointHistogram.Depth
trait CanDownSampleRDD[V] {
def rdd: RDD[((Depth, Depth), V)]
def filtersBroadcast: Broadcast[(Set[Depth], Set[Depth])]
@transient lazy val filtered = filterDistribution(filtersBroadcast)
private def filterDistribution(filtersBroadcast: Broadcast[(Set[Int], Set[Int])]): Array[((Depth, Depth), V)] =
(for {
((d1, d2), value) ? rdd
(d1Filter, d2Filter) = filtersBroadcast.value
if d1Filter(d1) && d2Filter(d2)
} yield
(d1, d2) ? value
)
.collect
.sortBy(_._1)
}
示例4: Configuration
//设置package包名称以及导入依赖的类
package org.hammerlab.hadoop
import java.io.{ ObjectInputStream, ObjectOutputStream }
import com.esotericsoftware.kryo.Kryo
import org.apache.hadoop.conf
import org.apache.hadoop.conf.{ Configuration ? HadoopConfiguration }
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
import org.hammerlab.hadoop.kryo.WritableSerializer
import org.hammerlab.kryo.serializeAs
class Configuration(@transient var value: HadoopConfiguration)
extends Serializable {
private def writeObject(out: ObjectOutputStream): Unit = {
value.write(out)
}
private def readObject(in: ObjectInputStream): Unit = {
value = new HadoopConfiguration(false)
value.readFields(in)
}
}
object Configuration {
def apply(loadDefaults: Boolean = true): Configuration =
new HadoopConfiguration(loadDefaults)
def apply(conf: HadoopConfiguration): Configuration =
new Configuration(conf)
implicit def wrapConfiguration(conf: HadoopConfiguration): Configuration =
apply(conf)
implicit def unwrapConfiguration(conf: Configuration): HadoopConfiguration =
conf.value
implicit def unwrapConfigurationBroadcast(confBroadcast: Broadcast[Configuration]): Configuration =
confBroadcast.value
implicit def sparkContextToHadoopConfiguration(sc: SparkContext): Configuration =
sc.hadoopConfiguration
implicit class ConfWrapper(val conf: HadoopConfiguration) extends AnyVal {
def serializable: Configuration =
Configuration(conf)
}
def register(kryo: Kryo): Unit = {
kryo.register(
classOf[conf.Configuration],
new WritableSerializer[conf.Configuration]
)
kryo.register(
classOf[Configuration],
serializeAs[Configuration, conf.Configuration]
)
}
}
示例5: ShortestPathLengthsFromCSV
//设置package包名称以及导入依赖的类
package ml.sparkling.graph.examples
import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes
import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes._
import ml.sparkling.graph.operators.algorithms.shortestpaths.ShortestPathsAlgorithm
import ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors.fastutils.FastUtilWithDistance.DataMap
import ml.sparkling.graph.operators.predicates.AllPathPredicate
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.graphx.{Graph, VertexId}
import scala.collection.JavaConversions._
object ShortestPathLengthsFromCSV extends ExampleApp {
def body()={
val shortestPaths =if(bucketSize == -1l)
ShortestPathsAlgorithm.computeShortestPathsLengths(partitionedGraph,AllPathPredicate,treatAsUndirected)
else
ShortestPathsAlgorithm.computeShortestPathsLengthsIterative(partitionedGraph,(g:Graph[_,_])=>bucketSize,treatAsUndirected)
val size: Broadcast[VertexId] =ctx.broadcast(partitionedGraph.numVertices)
partitionedGraph.outerJoinVertices(shortestPaths.vertices)(Util.dataTransformFunction(size) _).vertices.values.saveAsTextFile(out)
ctx.stop()
}
}
private object Util{
def dataTransformFunction(size: Broadcast[VertexId])(vId: VertexId,oldValue: String,pathsOption: Option[_ >: DataMap <: JMap[JLong, JDouble]])={
pathsOption.flatMap((paths)=>{
var entries=paths.entrySet().toList.sortBy(_.getKey)
val out=new StringBuilder()
out++=s"${oldValue},"
var a = 0l
while (a < size.value) {
if (entries.size > 0 && a == entries.head.getKey) {
out ++= s"${entries.head.getValue},"
entries = entries.drop(1)
}
else {
out ++= "0,"
}
a += 1l
}
out.setLength(out.length - 1)
Option(out.toString())
}).getOrElse(oldValue)
}
}
示例6: TFIDF
//设置package包名称以及导入依赖的类
package io.elegans.exercises
import org.apache.spark.broadcast.Broadcast
class TFIDF {
def getTfIDFAnnotatedVector(annotated_sentence: Map[String, Long],
dictionary: => Broadcast[Map[String, (Long, Long)]], num_of_documents: Long) :
Map[String, Tuple4[Long, Long, Long, Double]] = {
val freq_annotations = annotated_sentence
val max_term_raw_freq: Long = 0 //TODO: get the max raw frequency from the list of terms
val tfidf_annotated_terms: Map[String, Tuple4[Long, Long, Long, Double]] = // map on terms of the sentence
freq_annotations.map(term => { // maps on terms annotations
val term_string : String = "" //TODO: complete with the term string
val term_raw_freq : Long = 0 //TODO: complete with term raw frequency in sentence
val dictionary_term: Tuple2[Long, Long] = (0: Long, 0: Long) //TODO: get the pair (unique_id, num_docs_with_terms) from dictionary
val term_id : Long = 0 //TODO: the term unique id
val num_docs_with_terms : Long = 0 //TODO: the term num_docs_with_terms
val term_tfidf : Double = 0 //TODO: the tfidf
val dictionary_term_with_tfidf = (term_string, (term_raw_freq, term_id, num_docs_with_terms, term_tfidf))
dictionary_term_with_tfidf
})
tfidf_annotated_terms
}
}
示例7: SKGGenerator
//设置package包名称以及导入依赖的类
package kr.acon.generator.skg
import scala.Iterator
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet
import kr.acon.generator.BaseGenerator
import kr.acon.util.Util.RangePartitionFromDegreeRDD
object SKGGenerator extends BaseGenerator {
override val appName = "TrillionG: A Trillion-scale Synthetic Graph Generator using a Recursive Vector Model"
implicit class RecVecGenClass(self: RDD[Long]) extends Serializable {
def doRecVecGen(bskg: Broadcast[_ <: SKG], rng: Long) = {
self.mapPartitions {
case partitions =>
val skg = bskg.value
partitions.flatMap {
case u =>
val random = new SKG.randomClass(rng + u)
val degree = skg.getDegree(u, random)
if (degree < 1)
Iterator.empty
else {
val recVec = skg.getRecVec(u)
val sigmas = skg.getSigmas(recVec)
val p = recVec.last
val adjacency = new LongOpenHashBigSet(degree)
var i = 0
while (i < degree) {
adjacency.add(skg.determineEdge(u, recVec, sigmas, random))
i += 1
}
Iterator((u, adjacency))
}
}
}
}
}
override def run(sc: SparkContext): RDD[(Long, LongOpenHashBigSet)] = {
val verticesRDD = largeVertices(sc)
val bskg = sc.broadcast(SKG.constructFrom(parser))
val degreeRDD = verticesRDD.map(vid => (vid, bskg.value.getExpectedDegree(vid)))
val partitionedVertices = degreeRDD.rangePartition(parser.machine, parser.n, parser.e)
val edges = partitionedVertices.doRecVecGen(bskg, parser.rng)
edges
}
}
示例8: TransformContext
//设置package包名称以及导入依赖的类
package com.microsoft.partnercatalyst.fortis.spark.transformcontext
import com.microsoft.partnercatalyst.fortis.spark.dto.{BlacklistedTerm, SiteSettings}
import com.microsoft.partnercatalyst.fortis.spark.transforms.image.ImageAnalyzer
import com.microsoft.partnercatalyst.fortis.spark.transforms.language.LanguageDetector
import com.microsoft.partnercatalyst.fortis.spark.transforms.locations.LocationsExtractorFactory
import com.microsoft.partnercatalyst.fortis.spark.transforms.sentiment.SentimentDetectorAuth
import com.microsoft.partnercatalyst.fortis.spark.transforms.topic.KeywordExtractor
import org.apache.spark.broadcast.Broadcast
case class TransformContext(
siteSettings: SiteSettings = null,
langToKeywordExtractor: Broadcast[Map[String, KeywordExtractor]] = null,
blacklist: Broadcast[Seq[BlacklistedTerm]] = null,
locationsExtractorFactory: Broadcast[LocationsExtractorFactory] = null,
// The following objects have a small serialized forms. Consequently, we don't bother to broadcast them
// (instead, they're serialized into each task that uses them).
imageAnalyzer: ImageAnalyzer = null,
languageDetector: LanguageDetector = null,
sentimentDetectorAuth: SentimentDetectorAuth = null
)
示例9: DawidSkenePartialModel
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.types
import org.apache.spark.sql.Dataset
import org.apache.spark.broadcast.Broadcast
private[crowd] case class DawidSkenePartialModel(dataset: Dataset[DawidSkenePartial], params: Broadcast[DawidSkeneParams],
logLikelihood: Double, improvement: Double, nClasses: Int,
nAnnotators: Long) {
def modify(nDataset: Dataset[DawidSkenePartial] =dataset,
nParams: Broadcast[DawidSkeneParams] =params,
nLogLikelihood: Double =logLikelihood,
nImprovement: Double =improvement,
nNClasses: Int =nClasses,
nNAnnotators: Long =nAnnotators) =
new DawidSkenePartialModel(nDataset, nParams,
nLogLikelihood,
nImprovement,
nNClasses,
nNAnnotators)
}
示例10: GladPartialModel
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.types
import org.apache.spark.sql.Dataset
import org.apache.spark.broadcast.Broadcast
private[crowd] case class GladPartialModel(dataset: Dataset[GladPartial], params: Broadcast[GladParams],
logLikelihood: Double, improvement: Double,
nAnnotators: Int) {
def modify(nDataset: Dataset[GladPartial] =dataset,
nParams: Broadcast[GladParams] =params,
nLogLikelihood: Double =logLikelihood,
nImprovement: Double =improvement,
nNAnnotators: Int =nAnnotators) =
new GladPartialModel(nDataset, nParams,
nLogLikelihood,
nImprovement,
nNAnnotators)
}
示例11: RaykarBinaryPartialModel
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.types
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.DataFrame
import org.apache.spark.broadcast.Broadcast
private[crowd] case class RaykarBinaryPartialModel(dataset: DataFrame, annotatorData: Dataset[BinaryAnnotation],
mu: Dataset[BinarySoftLabel], dataStatistics: Dataset[RaykarBinaryStatistics],
params: Broadcast[RaykarBinaryParams], logLikelihood: Double,
improvement: Double, nAnnotators: Int, nFeatures: Int) {
def modify(nDataset: DataFrame =dataset,
nAnnotatorData: Dataset[BinaryAnnotation] =annotatorData,
nMu: Dataset[BinarySoftLabel] =mu,
nDataStatistics: Dataset[RaykarBinaryStatistics] = dataStatistics,
nParams: Broadcast[RaykarBinaryParams] =params,
nLogLikelihood: Double =logLikelihood,
nImprovement: Double =improvement,
nNAnnotators: Int =nAnnotators,
nNFeatures: Int =nFeatures) =
new RaykarBinaryPartialModel(nDataset, nAnnotatorData, nMu, nDataStatistics,
nParams, nLogLikelihood, nImprovement, nNAnnotators, nNFeatures)
}
示例12: KajinoPartialModel
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.types
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.DataFrame
import org.apache.spark.broadcast.Broadcast
private[crowd] case class KajinoPartialModel(dataset: DataFrame, annotatorData: Dataset[BinaryAnnotation],
w0: Broadcast[Array[Double]], w: Array[Array[Double]], priors: Broadcast[KajinoPriors],
variation: Double, nAnnotators: Int, nFeatures: Int) {
def modify(nDataset: DataFrame =dataset,
nAnnotatorData: Dataset[BinaryAnnotation] =annotatorData,
nW0: Broadcast[Array[Double]] =w0,
nW: Array[Array[Double]] = w,
nPriors: Broadcast[KajinoPriors] =priors,
nVariation: Double =variation,
nNAnnotators: Int =nAnnotators,
nNFeatures: Int =nFeatures) =
new KajinoPartialModel(nDataset, nAnnotatorData, nW0, nW, nPriors,
nVariation, nNAnnotators, nNFeatures)
}
示例13: GladAlphaAggregator
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators
import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast
private[crowd] class GladAlphaAggregator(params: Broadcast[GladParams], learningRate: Double)
extends Aggregator[GladPartial, GladAlphaAggregatorBuffer, Double]{
def zero: GladAlphaAggregatorBuffer = GladAlphaAggregatorBuffer(0,-1)
def reduce(b: GladAlphaAggregatorBuffer, a: GladPartial) : GladAlphaAggregatorBuffer = {
val alpha = params.value.alpha
val al = alpha(a.annotator)
val bet = a.beta
val aest = a.est
val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
val p = if (a.value == 1) a.est else (1-a.est)
val term = (p - sigmoidValue)*bet
GladAlphaAggregatorBuffer(b.agg + term, al)
}
def merge(b1: GladAlphaAggregatorBuffer, b2: GladAlphaAggregatorBuffer) : GladAlphaAggregatorBuffer = {
GladAlphaAggregatorBuffer(b1.agg + b2.agg, if (b1.alpha == -1) b2.alpha else b1.alpha )
}
def finish(reduction: GladAlphaAggregatorBuffer) = {
reduction.alpha + learningRate * reduction.agg
}
def bufferEncoder: Encoder[GladAlphaAggregatorBuffer] = Encoders.product[GladAlphaAggregatorBuffer]
def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}
private[crowd] case class GladAlphaAggregatorBuffer(agg: Double, alpha: Double)
示例14: GladBetaAggregator
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators
import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast
private[crowd] class GladBetaAggregator(params: Broadcast[GladParams], learningRate: Double)
extends Aggregator[GladPartial, GladBetaAggregatorBuffer, Double]{
def zero: GladBetaAggregatorBuffer = GladBetaAggregatorBuffer(0,-1)
def reduce(b: GladBetaAggregatorBuffer, a: GladPartial) : GladBetaAggregatorBuffer = {
val alpha = params.value.alpha
val al = alpha(a.annotator)
val bet = a.beta
val aest = a.est
val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
val p = if (a.value == 1) a.est else (1-a.est)
val term = (p - sigmoidValue)*alpha(a.annotator)
GladBetaAggregatorBuffer(b.agg + term, a.beta)
}
def merge(b1: GladBetaAggregatorBuffer, b2: GladBetaAggregatorBuffer) : GladBetaAggregatorBuffer = {
GladBetaAggregatorBuffer(b1.agg + b2.agg, if (b1.beta == -1) b2.beta else b1.beta)
}
def finish(reduction: GladBetaAggregatorBuffer) = {
reduction.beta + learningRate * reduction.agg
}
def bufferEncoder: Encoder[GladBetaAggregatorBuffer] = Encoders.product[GladBetaAggregatorBuffer]
def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}
private[crowd] case class GladBetaAggregatorBuffer(agg: Double, beta: Double)
示例15: RaykarBinaryStatisticsAggregator
//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators
import com.enriquegrodrigo.spark.crowd.types.RaykarBinaryPartial
import com.enriquegrodrigo.spark.crowd.types.RaykarBinaryParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast
private[crowd] class RaykarBinaryStatisticsAggregator(params: Broadcast[RaykarBinaryParams])
extends Aggregator[RaykarBinaryPartial, RaykarBinaryStatisticsAggregatorBuffer, (Double,Double)]{
def zero: RaykarBinaryStatisticsAggregatorBuffer = RaykarBinaryStatisticsAggregatorBuffer(1,1) //Binary
def reduce(b: RaykarBinaryStatisticsAggregatorBuffer, a: RaykarBinaryPartial) : RaykarBinaryStatisticsAggregatorBuffer = {
val alphaValue = params.value.alpha(a.annotator)
val alphaTerm = if (a.value == 1) alphaValue else 1-alphaValue
val betaValue = params.value.beta(a.annotator)
val betaTerm = if (a.value == 0) betaValue else 1-betaValue
RaykarBinaryStatisticsAggregatorBuffer(b.a * alphaTerm, b.b * betaTerm)
}
def merge(b1: RaykarBinaryStatisticsAggregatorBuffer, b2: RaykarBinaryStatisticsAggregatorBuffer) : RaykarBinaryStatisticsAggregatorBuffer = {
RaykarBinaryStatisticsAggregatorBuffer(b1.a * b2.a, b1.b*b2.b)
}
def finish(reduction: RaykarBinaryStatisticsAggregatorBuffer) = {
(reduction.a,reduction.b)
}
def bufferEncoder: Encoder[RaykarBinaryStatisticsAggregatorBuffer] = Encoders.product[RaykarBinaryStatisticsAggregatorBuffer]
def outputEncoder: Encoder[(Double,Double)] = Encoders.product[(Double,Double)]
}
private[crowd] case class RaykarBinaryStatisticsAggregatorBuffer(a: Double, b: Double)