本文整理汇总了Scala中org.apache.spark.serializer.KryoSerializer类的典型用法代码示例。如果您正苦于以下问题:Scala KryoSerializer类的具体用法?Scala KryoSerializer怎么用?Scala KryoSerializer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KryoSerializer类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: Utils
//设置package包名称以及导入依赖的类
package com.larry.da.jobs.idmap
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.hadoop.io.{NullWritable, BytesWritable}
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.KryoSerializer
import scala.reflect.ClassTag
object Utils {
val keys="0123456789ABCDEFHIJKMNPRSTUVWXYZ"
val keyDic = Map( keys zip (0 until 32 ) toSeq :_* )
def compressAguid(uid:Long)={
var n = uid
val res = 0 until 13 map(i=>{ val index = n & 31; n = n >>> 5; keys(index.toInt)})
res.mkString("").reverse + "u"
}
def unCompressAguid(uid:String)={
val res = uid.take(13).map(s=>keyDic(s))
var n = res.head.toLong
res.tail.foreach(p=> {
n = (n << 5) | p
})
n
}
def saveAsObjectFile[T: ClassTag](rdd: RDD[T], path: String) {
val kryoSerializer = new KryoSerializer(rdd.context.getConf)
rdd.mapPartitions(iter => iter.grouped(10)
.map(_.toArray))
.map(splitArray => {
//initializes kyro and calls your registrator class
val kryo = kryoSerializer.newKryo()
//convert data to bytes
val bao = new ByteArrayOutputStream()
val output = kryoSerializer.newKryoOutput()
output.setOutputStream(bao)
kryo.writeClassAndObject(output, splitArray)
output.close()
// We are ignoring key field of sequence file
val byteWritable = new BytesWritable(bao.toByteArray)
(NullWritable.get(), byteWritable)
}).saveAsSequenceFile(path)
}
}
示例2: registrationRequired
//设置package包名称以及导入依赖的类
package org.hammerlab.spark.confs
import org.apache.spark.serializer.{ KryoRegistrator, KryoSerializer }
import org.hammerlab.spark.SparkConfBase
trait Kryo {
self: SparkConfBase ?
def registrationRequired: Boolean = true
def referenceTracking: Boolean = false
def registrar: Class[_ <: KryoRegistrator] = null
sparkConf(
"spark.serializer" ? classOf[KryoSerializer].getCanonicalName,
"spark.kryo.referenceTracking" ? referenceTracking.toString,
"spark.kryo.registrationRequired" ? registrationRequired.toString
)
Option(registrar)
.foreach(
clz ?
sparkConf(
"spark.kryo.registrator" ? clz.getCanonicalName
)
)
}
示例3: ConfsTest
//设置package包名称以及导入依赖的类
package org.hammerlab.spark
import org.apache.spark.serializer.KryoSerializer
import org.hammerlab.test.Suite
class ConfsTest
extends Suite
with SparkConfBase
with confs.Kryo
with confs.DynamicAllocation
with confs.EventLog
with confs.Speculation {
test("make SparkContext") {
val conf = makeSparkConf
conf.get("spark.serializer", classOf[KryoSerializer].getCanonicalName)
conf.get("spark.dynamicAllocation.enabled") should be("true")
conf.get("spark.eventLog.enabled") should be("true")
conf.get("spark.speculation", "true")
}
}
示例4: HttpRddIOTest
//设置package包名称以及导入依赖的类
import java.util.Date
import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.execution.streaming.BufferedTextCollector
import org.apache.spark.sql.execution.streaming.HttpTextReceiver
import org.apache.spark.sql.execution.streaming.HttpTextSender
import org.apache.spark.sql.execution.streaming.TextConsolePrinter
import org.junit.Test
import org.junit.Assert
class HttpRddIOTest {
val LINES1 = Array[(String, Int, Boolean, Char, Float, Double, Long, Date)](("hello1", 1, true, 'a', 0.1f, 0.1d, 1L, new Date(10000)),
("hello2", 2, false, 'b', 0.2f, 0.2d, 2L, new Date(20000)), ("hello3", 3, true, 'c', 0.3f, 0.3d, 3L, new Date(30000)));
@Test
def testHttpRddIO() {
//starts a http server with a receiver servlet
val receiver = HttpTextReceiver.startReceiver(new SparkConf(), "receiver1", "/xxxx", 8080);
receiver.addListener(new TextConsolePrinter());
val buffer = new BufferedTextCollector();
receiver.addListener(buffer);
val sender = new HttpTextSender("http://localhost:8080/xxxx");
val kryoSerializer = new KryoSerializer(new SparkConf());
sender.sendObjectArray(kryoSerializer, "topic-1", 1, LINES1);
receiver.stop();
val data = buffer.dump().map(_._1).toArray;
Assert.assertArrayEquals(LINES1.asInstanceOf[Array[Object]], data.asInstanceOf[Array[Object]]);
}
}
示例5: Of
//设置package包名称以及导入依赖的类
package astraea.spark
import geotrellis.spark.testkit.{TestEnvironment => GeoTrellisTestEnvironment}
import org.apache.spark.SparkContext
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.scalatest._
import scala.util.Properties
trait TestEnvironment extends GeoTrellisTestEnvironment { self: Suite with BeforeAndAfterAll ?
val _spark: SparkSession = {
System.setProperty("spark.driver.port", "0")
System.setProperty("spark.hostPort", "0")
System.setProperty("spark.ui.enabled", "false")
val session = SparkSession.builder()
.master("local")
.appName("Test Context")
.getOrCreate()
// Shortcut out of using Kryo serialization if we want to test against
// java serialization.
if(Properties.envOrNone("GEOTRELLIS_USE_JAVA_SER").isEmpty) {
val conf = session.sparkContext.getConf
conf
.set("spark.serializer", classOf[KryoSerializer].getName)
.set("spark.kryoserializer.buffer.max", "500m")
.set("spark.kryo.registrationRequired", "false")
setKryoRegistrator(conf)
}
System.clearProperty("spark.driver.port")
System.clearProperty("spark.hostPort")
System.clearProperty("spark.ui.enabled")
session
}
override implicit def sc: SparkContext = _spark.sparkContext
lazy val sql: SQLContext = _spark.sqlContext
}
示例6: SparkSupport
//设置package包名称以及导入依赖的类
package geotrellis.util
import geotrellis.config.Dataset
import geotrellis.spark.io.hadoop.formats.TemporalGeoTiffInputFormat
import geotrellis.spark.io.kryo.KryoRegistrator
import geotrellis.spark.util.SparkUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.serializer.KryoSerializer
trait SparkSupport {
implicit val sc: SparkContext
@transient lazy val conf = SparkUtils.hadoopConfiguration
}
object SparkSupport {
def sparkContext(timeTag: String = "ISO_TIME", timeFormat: String = "yyyy-MM-dd'T'HH:mm:ss"): SparkContext =
configureTime(timeTag, timeFormat)(
new SparkContext(
new SparkConf()
.setAppName("GeoTrellis Integration Tests")
.set("spark.serializer", classOf[KryoSerializer].getName)
.set("spark.kryo.registrator", classOf[KryoRegistrator].getName)
.setJars(SparkContext.jarOfObject(this).toList)
)
)
def configureTime(timeTag: String, timeFormat: String)(implicit sc: SparkContext): SparkContext = {
TemporalGeoTiffInputFormat.setTimeTag(sc.hadoopConfiguration, timeTag)
TemporalGeoTiffInputFormat.setTimeFormat(sc.hadoopConfiguration, timeFormat)
sc
}
def configureTime(dataset: Dataset)(implicit sc: SparkContext): SparkContext = {
dataset.output.keyIndexMethod.timeTag.foreach(TemporalGeoTiffInputFormat.setTimeTag(sc.hadoopConfiguration, _))
dataset.output.keyIndexMethod.timeFormat.foreach(TemporalGeoTiffInputFormat.setTimeFormat(sc.hadoopConfiguration, _))
sc
}
}
示例7: SparkConnector
//设置package包名称以及导入依赖的类
package org.custom.etl.utils
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.serializer.KryoSerializer
object SparkConnector {
val conf = new SparkConf().setMaster("local[4]").setAppName("CUSTOM-ETL-SPARK2ES")
// conf.set("es.nodes", "localhost")
// conf.set("es.port", "9200")
// conf.set("es.index.auto.create", "true")
conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
// conf.set("es.nodes.wan.only", "true")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
}
示例8: SparkJobRunner
//设置package包名称以及导入依赖的类
package eu.shiftforward.adstax.spark
import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
object SparkJobRunner {
def main(args: Array[String]): Unit = {
val ji = args.indexOf("--job")
if (ji == -1 || ji + 1 >= args.size) {
println("No job specified")
return
}
val job = getClass.getClassLoader.loadClass(args(ji + 1)).newInstance().asInstanceOf[SparkJob]
val conf = new SparkConf()
.setAppName(job.name)
.set("es.index.read.missing.as.empty", "yes")
.set("spark.serializer", classOf[KryoSerializer].getName)
implicit val context = new AdStaxSparkContext(conf)
try {
job.run(args.drop(ji + 2))
} finally {
context.stop()
}
}
}