Scala KryoSerializer类代码示例

本文整理汇总了Scala中org.apache.spark.serializer.KryoSerializer类的典型用法代码示例。如果您正苦于以下问题：Scala KryoSerializer类的具体用法？Scala KryoSerializer怎么用？Scala KryoSerializer使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了KryoSerializer类的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: Utils

//设置package包名称以及导入依赖的类
package com.larry.da.jobs.idmap

import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.hadoop.io.{NullWritable, BytesWritable}
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.KryoSerializer

import scala.reflect.ClassTag


object Utils {

  val keys="0123456789ABCDEFHIJKMNPRSTUVWXYZ"
  val keyDic = Map( keys zip (0 until 32 ) toSeq :_* )

  def compressAguid(uid:Long)={
    var n = uid
    val res = 0 until 13 map(i=>{ val index = n & 31; n = n >>> 5; keys(index.toInt)})
    res.mkString("").reverse + "u"
  }

  def unCompressAguid(uid:String)={
    val res = uid.take(13).map(s=>keyDic(s))
    var n = res.head.toLong
    res.tail.foreach(p=> {
      n = (n << 5) | p
    })
    n
  }


  def saveAsObjectFile[T: ClassTag](rdd: RDD[T], path: String) {
    val kryoSerializer = new KryoSerializer(rdd.context.getConf)

    rdd.mapPartitions(iter => iter.grouped(10)
      .map(_.toArray))
      .map(splitArray => {
        //initializes kyro and calls your registrator class
        val kryo = kryoSerializer.newKryo()

        //convert data to bytes
        val bao = new ByteArrayOutputStream()
        val output = kryoSerializer.newKryoOutput()
        output.setOutputStream(bao)
        kryo.writeClassAndObject(output, splitArray)
        output.close()

        // We are ignoring key field of sequence file
        val byteWritable = new BytesWritable(bao.toByteArray)
        (NullWritable.get(), byteWritable)
      }).saveAsSequenceFile(path)
  }

}

开发者ID:larry88，项目名称:spark_da，代码行数:55，代码来源:Utils.scala

示例2: registrationRequired

//设置package包名称以及导入依赖的类
package org.hammerlab.spark.confs

import org.apache.spark.serializer.{ KryoRegistrator, KryoSerializer }
import org.hammerlab.spark.SparkConfBase

trait Kryo {
  self: SparkConfBase ?

  def registrationRequired: Boolean = true
  def referenceTracking: Boolean = false
  def registrar: Class[_ <: KryoRegistrator] = null

  sparkConf(
    "spark.serializer" ? classOf[KryoSerializer].getCanonicalName,
    "spark.kryo.referenceTracking" ? referenceTracking.toString,
    "spark.kryo.registrationRequired" ? registrationRequired.toString
  )

  Option(registrar)
    .foreach(
      clz ?
        sparkConf(
          "spark.kryo.registrator" ? clz.getCanonicalName
        )
    )
}

开发者ID:hammerlab，项目名称:spark-util，代码行数:27，代码来源:Kryo.scala

示例3: ConfsTest

//设置package包名称以及导入依赖的类
package org.hammerlab.spark

import org.apache.spark.serializer.KryoSerializer
import org.hammerlab.test.Suite

class ConfsTest
  extends Suite
    with SparkConfBase
    with confs.Kryo
    with confs.DynamicAllocation
    with confs.EventLog
    with confs.Speculation {
  test("make SparkContext") {
    val conf = makeSparkConf

    conf.get("spark.serializer", classOf[KryoSerializer].getCanonicalName)
    conf.get("spark.dynamicAllocation.enabled") should be("true")
    conf.get("spark.eventLog.enabled") should be("true")
    conf.get("spark.speculation", "true")
  }
}

开发者ID:hammerlab，项目名称:spark-util，代码行数:22，代码来源:ConfsTest.scala

示例4: HttpRddIOTest

//设置package包名称以及导入依赖的类
import java.util.Date

import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.execution.streaming.BufferedTextCollector
import org.apache.spark.sql.execution.streaming.HttpTextReceiver
import org.apache.spark.sql.execution.streaming.HttpTextSender
import org.apache.spark.sql.execution.streaming.TextConsolePrinter
import org.junit.Test
import org.junit.Assert

class HttpRddIOTest {
	val LINES1 = Array[(String, Int, Boolean, Char, Float, Double, Long, Date)](("hello1", 1, true, 'a', 0.1f, 0.1d, 1L, new Date(10000)),
		("hello2", 2, false, 'b', 0.2f, 0.2d, 2L, new Date(20000)), ("hello3", 3, true, 'c', 0.3f, 0.3d, 3L, new Date(30000)));

	@Test
	def testHttpRddIO() {
		//starts a http server with a receiver servlet
		val receiver = HttpTextReceiver.startReceiver(new SparkConf(), "receiver1", "/xxxx", 8080);
		receiver.addListener(new TextConsolePrinter());
		val buffer = new BufferedTextCollector();
		receiver.addListener(buffer);

		val sender = new HttpTextSender("http://localhost:8080/xxxx");
		val kryoSerializer = new KryoSerializer(new SparkConf());
		sender.sendObjectArray(kryoSerializer, "topic-1", 1, LINES1);
		receiver.stop();

		val data = buffer.dump().map(_._1).toArray;
		Assert.assertArrayEquals(LINES1.asInstanceOf[Array[Object]], data.asInstanceOf[Array[Object]]);
	}
}

开发者ID:bluejoe2008，项目名称:spark-http-stream，代码行数:33，代码来源:HttpRddIOTest.scala

示例5: Of

//设置package包名称以及导入依赖的类
package astraea.spark

import geotrellis.spark.testkit.{TestEnvironment => GeoTrellisTestEnvironment}

import org.apache.spark.SparkContext
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.scalatest._

import scala.util.Properties

trait TestEnvironment extends GeoTrellisTestEnvironment { self: Suite with BeforeAndAfterAll ?
  val _spark: SparkSession = {
    System.setProperty("spark.driver.port", "0")
    System.setProperty("spark.hostPort", "0")
    System.setProperty("spark.ui.enabled", "false")

    val session = SparkSession.builder()
      .master("local")
      .appName("Test Context")
      .getOrCreate()

    // Shortcut out of using Kryo serialization if we want to test against
    // java serialization.
    if(Properties.envOrNone("GEOTRELLIS_USE_JAVA_SER").isEmpty) {
      val conf = session.sparkContext.getConf
      conf
        .set("spark.serializer", classOf[KryoSerializer].getName)
        .set("spark.kryoserializer.buffer.max", "500m")
        .set("spark.kryo.registrationRequired", "false")
      setKryoRegistrator(conf)
    }

    System.clearProperty("spark.driver.port")
    System.clearProperty("spark.hostPort")
    System.clearProperty("spark.ui.enabled")

    session
  }

  override implicit def sc: SparkContext = _spark.sparkContext

  lazy val sql: SQLContext = _spark.sqlContext
}

开发者ID:s22s，项目名称:avro2spark，代码行数:45，代码来源:TestEnvironment.scala

示例6: SparkSupport

//设置package包名称以及导入依赖的类
package geotrellis.util

import geotrellis.config.Dataset
import geotrellis.spark.io.hadoop.formats.TemporalGeoTiffInputFormat
import geotrellis.spark.io.kryo.KryoRegistrator
import geotrellis.spark.util.SparkUtils

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.serializer.KryoSerializer

trait SparkSupport {
  implicit val sc: SparkContext

  @transient lazy val conf = SparkUtils.hadoopConfiguration
}

object SparkSupport {
  def sparkContext(timeTag: String = "ISO_TIME", timeFormat: String = "yyyy-MM-dd'T'HH:mm:ss"): SparkContext =
    configureTime(timeTag, timeFormat)(
      new SparkContext(
        new SparkConf()
          .setAppName("GeoTrellis Integration Tests")
          .set("spark.serializer", classOf[KryoSerializer].getName)
          .set("spark.kryo.registrator", classOf[KryoRegistrator].getName)
          .setJars(SparkContext.jarOfObject(this).toList)
      )
    )

  def configureTime(timeTag: String, timeFormat: String)(implicit sc: SparkContext): SparkContext = {
    TemporalGeoTiffInputFormat.setTimeTag(sc.hadoopConfiguration, timeTag)
    TemporalGeoTiffInputFormat.setTimeFormat(sc.hadoopConfiguration, timeFormat)

    sc
  }

  def configureTime(dataset: Dataset)(implicit sc: SparkContext): SparkContext = {
    dataset.output.keyIndexMethod.timeTag.foreach(TemporalGeoTiffInputFormat.setTimeTag(sc.hadoopConfiguration, _))
    dataset.output.keyIndexMethod.timeFormat.foreach(TemporalGeoTiffInputFormat.setTimeFormat(sc.hadoopConfiguration, _))

    sc
  }
}

开发者ID:geotrellis，项目名称:geotrellis-integration-tests-tool，代码行数:43，代码来源:SparkSupport.scala

示例7: SparkConnector

//设置package包名称以及导入依赖的类
package org.custom.etl.utils

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.serializer.KryoSerializer

object SparkConnector {
  
  val conf = new SparkConf().setMaster("local[4]").setAppName("CUSTOM-ETL-SPARK2ES")
//  conf.set("es.nodes", "localhost")
//  conf.set("es.port", "9200") 
//  conf.set("es.index.auto.create", "true")
  conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
//  conf.set("es.nodes.wan.only", "true")
  val sc = new SparkContext(conf)
  
  val sqlContext = new SQLContext(sc)
  
}

开发者ID:OElesin，项目名称:custom-etl-spark2es，代码行数:21，代码来源:SparkConnector.scala

示例8: SparkJobRunner

//设置package包名称以及导入依赖的类
package eu.shiftforward.adstax.spark

import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer


object SparkJobRunner {
  def main(args: Array[String]): Unit = {
    val ji = args.indexOf("--job")
    if (ji == -1 || ji + 1 >= args.size) {
      println("No job specified")
      return
    }
    val job = getClass.getClassLoader.loadClass(args(ji + 1)).newInstance().asInstanceOf[SparkJob]
    val conf = new SparkConf()
      .setAppName(job.name)
      .set("es.index.read.missing.as.empty", "yes")
      .set("spark.serializer", classOf[KryoSerializer].getName)
    implicit val context = new AdStaxSparkContext(conf)
    try {
      job.run(args.drop(ji + 2))
    } finally {
      context.stop()
    }
  }
}

开发者ID:ShiftForward，项目名称:adstax-sdk-scala，代码行数:27，代码来源:SparkJobRunner.scala

注：本文中的org.apache.spark.serializer.KryoSerializer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。