当前位置: 首页>>代码示例>>Scala>>正文


Scala SharedSparkContext类代码示例

本文整理汇总了Scala中com.holdenkarau.spark.testing.SharedSparkContext的典型用法代码示例。如果您正苦于以下问题:Scala SharedSparkContext类的具体用法?Scala SharedSparkContext怎么用?Scala SharedSparkContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SharedSparkContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: ColumnsTest

//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructField, StringType, LongType, DoubleType}

class ColumnsTest extends FunSuite with SharedSparkContext {
  val numLoops = 100

  test("test StringConstant") {
    val s1 = new StringConstant("f1", "abc")
    assert("abc" === s1.getValue(1))
    assert(StructField("f1", StringType, false) == s1.getStructField)
  }

  test("test RandomLong") {
    val s1 = new RandomLong("f1", 666666L)
    for (x <- 1 to numLoops) {
      assert(s1.getValue(1) >= 0)
      assert(s1.getValue(1) <= 666666L)
    }
    assert(StructField("f1", LongType, false) == s1.getStructField)
  }

  test("test RandomDouble") {
    val s1 = new RandomDouble("f1", 666666.00)
    for (x <- 1 to numLoops) {
      assert(s1.getValue(1) >= 0)
      assert(s1.getValue(1) <= 666666.00)
    }
    assert(StructField("f1", DoubleType, false) == s1.getStructField)
  }

  test("test Categorical") {
    val list = List("a", "b", "c", "d")
    val s1 = new Categorical("f1", list)
    for (x <- 1 to numLoops) {
      val v = s1.getValue(1)
      assert(list.exists(key => v.contains(key)))
    }
    assert(StructField("f1", StringType, false) == s1.getStructField)
  }
} 
开发者ID:dondrake,项目名称:BigDataMaker,代码行数:45,代码来源:TestColumns.scala

示例2: BigDataMakerTest

//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext

class BigDataMakerTest extends FunSuite with SharedSparkContext {
  test("first") {
    val sqlContext = new SQLContext(sc)
    val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
    bd.addColumn(new StringConstant("f1", "abc"))
    bd.addColumn(new StringConstant("f2", "def"))

    val df = bd._createDataFrame
    df.show
    assert(500 === df.count)
    assert(2 === df.columns.length)
  }

  test("col names") {
    val sqlContext = new SQLContext(sc)
    val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
    bd.addColumn(new StringConstant("f1", "abc"))
    bd.addColumn(new StringConstant("", "def"))

    assert("f1" === bd.cols(0).name)
    assert("f_1" === bd.cols(1).name)
  }
} 
开发者ID:dondrake,项目名称:BigDataMaker,代码行数:30,代码来源:TestBigDataMaker.scala

示例3: AppTest

//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}

/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
	test("test initializing spark context") {
		val list = List(1, 2, 3, 4)
		val rdd = sc.parallelize(list)

		assert(rdd.count === list.length)
	}
} 
开发者ID:dongjinleekr,项目名称:spark-sbt-quickstart,代码行数:18,代码来源:AppTest.scala

示例4: FunctionalSyntaxOWLExpressionsRDDBuilderTest

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.rdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite


class FunctionalSyntaxOWLExpressionsRDDBuilderTest extends FunSuite with SharedSparkContext {
  var _rdd: OWLExpressionsRDD = null

  def rdd = {
    if (_rdd == null) {
      _rdd = FunctionalSyntaxOWLExpressionsRDDBuilder.build(
        sc, "src/test/resources/ont_functional.owl")
      _rdd.cache()
    }

    _rdd
  }

  test("There should be three annotation lines with full URIs") {
    val res = rdd.filter(line => line.startsWith("Annotation(")).collect()
    val expected = List(
      "Annotation(<http://ex.com/foo#hasName> \"Name\")",
      "Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
      """Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")

    assert(res.length == 3)
    for (e <- expected) {
      assert(res.contains(e))
    }
  }

  
//  test("There should be an import statement") {
//    val res = rdd.filter(line => line.startsWith("Import")).collect()
//    assert(res.length == 1)
//    assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
//  }

  test("There should not be any empty lines") {
    val res = rdd.filter(line => line.trim.isEmpty).collect()
    assert(res.length == 0)
  }

  test("There should not be any comment lines") {
    val res = rdd.filter(line => line.trim.startsWith("#")).collect()
    assert(res.length == 0)
  }

  test("There should be a DisjointObjectProperties axiom") {
    val res = rdd.filter(line => line.trim.startsWith("DisjointObjectProperties")).collect()
    assert(res.length == 1)
  }

  test("The total number of axioms should be correct") {
    val total = 70 // = 71 - uncommented Import(...)
    assert(rdd.count() == total)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:62,代码来源:FunctionalSyntaxOWLExpressionsRDDBuilderTest.scala

示例5: FunctionalSyntaxOWLExpressionsDatasetBuilderTest

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.dataset

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SparkSession
import org.scalatest.FunSuite


class FunctionalSyntaxOWLExpressionsDatasetBuilderTest extends FunSuite with SharedSparkContext {
  lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master).getOrCreate()
  var _dataset: OWLExpressionsDataset = null
  def dataset: OWLExpressionsDataset = {
    if (_dataset == null) {
      _dataset = FunctionalSyntaxOWLExpressionsDatasetBuilder.build(
        spark, "src/test/resources/ont_functional.owl")
      _dataset.cache()
    }
    _dataset
  }

  test("There should be three annotation lines with full URIs") {
    val res = dataset.filter(line => line.startsWith("Annotation(")).collectAsList()
    val expected = List(
      "Annotation(<http://ex.com/foo#hasName> \"Name\")",
      "Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
      """Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")
    assert(res.size() == 3)
    for (e <- expected) {
      assert(res.contains(e))
    }
  }

  
  //  test("There should be an import statement") {
  //    val res = rdd.filter(line => line.startsWith("Import")).collect()
  //    assert(res.length == 1)
  //    assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
  //  }

  test("There should not be any empty lines") {
    val res = dataset.filter(line => line.trim.isEmpty)
    assert(res.count() == 0)
  }

  test("There should not be any comment lines") {
    val res = dataset.filter(line => line.trim.startsWith("#"))
    assert(res.count() == 0)
  }

  test("There should be a DisjointObjectProperties axiom") {
    val res = dataset.filter(line => line.trim.startsWith("DisjointObjectProperties"))
    assert(res.count() == 1)
  }

  test("The total number of axioms should be correct") {
    val total = 70 // = 71 - uncommented Import(...)
    assert(dataset.count() == total)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:61,代码来源:FunctionalSyntaxOWLExpressionsDatasetBuilderTest.scala

示例6: JoinTest

//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.goldilocks

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.rdd.RDD
import org.scalatest.FunSuite


class JoinTest extends FunSuite with SharedSparkContext {
  test("Hash join"){
    val keySet = "a, b, c, d, e, f, g".split(",")
    val smallRDD = sc.parallelize(keySet.map(letter => (letter, letter.hashCode)))
    val largeRDD: RDD[(String, Double)] =
      sc.parallelize(keySet.flatMap{ letter =>
        Range(1, 50).map(i => (letter, letter.hashCode() / i.toDouble))})
    val result: RDD[(String, (Double, Int))] =
      RDDJoinExamples.manualBroadCastHashJoin(
        largeRDD, smallRDD)
    val nativeJoin: RDD[(String, (Double, Int))] = largeRDD.join(smallRDD)

    assert(result.subtract(nativeJoin).count == 0)
  }
} 
开发者ID:gourimahapatra,项目名称:high-performance-spark,代码行数:23,代码来源:JoinTest.scala

示例7: WordCountTest

//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.wordcount


import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class WordCountTest extends FunSuite with SharedSparkContext {
  test("word count with Stop Words Removed"){
    val wordRDD = sc.parallelize(Seq(
      "How happy was the panda? You ask.",
      "Panda is the most happy panda in all the #$!?ing land!"))

    val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
    val illegalTokens: Array[Char] = "#$%?!.".toCharArray

    val wordCounts = WordCount.withStopWordsFiltered(
      wordRDD, illegalTokens, stopWords)
    val wordCountsAsMap = wordCounts.collectAsMap()
    assert(!wordCountsAsMap.contains("the"))
    assert(!wordCountsAsMap.contains("?"))
    assert(!wordCountsAsMap.contains("#$!?ing"))
    assert(wordCountsAsMap.contains("ing"))
    assert(wordCountsAsMap.get("panda").get.equals(3))
  }
} 
开发者ID:gourimahapatra,项目名称:high-performance-spark,代码行数:26,代码来源:WordCountTest.scala

示例8: SparkCassRDDFunctionsSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.rdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.{ MustMatchers, WordSpec }

class SparkCassRDDFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.cassandra.rdd" must {
    "lift RDD into SparkCassRDDFunctions" in {
      val testRDD = sc.parallelize(1 to 25)
        .map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))

      // If internalSparkContext is available, RDD was lifted.
      testRDD.internalSparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Cassandra,代码行数:17,代码来源:SparkCassRDDFunctionsSpec.scala

示例9: SparkCassDataFrameFunctionsSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.sql

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }

class SparkCassDataFrameFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.cassandra.sql" must {
    "lift DataFrame into SparkCassDataFrameFunctions" in {
      val sqlContext = new SQLContext(sc)

      import sqlContext.implicits._

      val testRDD = sc.parallelize(1 to 25)
        .map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))
      val testDataFrame = testRDD.toDF("test_key", "test_value")

      // If internalSparkContext is available, RDD was lifted.
      testDataFrame.internalSparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Cassandra,代码行数:23,代码来源:SparkCassDataFrameFunctionsSpec.scala

示例10: TransformationTestWithSparkTestingBase

//设置package包名称以及导入依赖的类
package com.chapter16.SparkTesting

import org.scalatest.Assertions._
import org.apache.spark.rdd.RDD
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class TransformationTestWithSparkTestingBase extends FunSuite with SharedSparkContext {
  def tokenize(line: RDD[String]) = {
    line.map(x => x.split(' ')).collect()
  }

  test("works, obviously!") {
    assert(1 == 1)
  }

  test("Words counting") {
    assert(sc.parallelize("Hello world My name is Reza".split("\\W")).map(_ + 1).count == 6)
  }

  test("Testing RDD transformations using a shared Spark Context") {
    val input = List("Testing", "RDD transformations", "using a shared", "Spark Context")
    val expected = Array(Array("Testing"), Array("RDD", "transformations"), Array("using", "a", "shared"), Array("Spark", "Context"))
    val transformed = tokenize(sc.parallelize(input))
    assert(transformed === expected)
  }
} 
开发者ID:PacktPublishing,项目名称:Scala-and-Spark-for-Big-Data-Analytics,代码行数:28,代码来源:TransformationTestWithSparkTestingBase.scala

示例11: WordCountTest

//设置package包名称以及导入依赖的类
package $organization$.$name$



import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class WordCountTest extends FunSuite with SharedSparkContext {
  test("word count with Stop Words Removed"){
    val linesRDD = sc.parallelize(Seq(
      "How happy was the panda? You ask.",
      "Panda is the most happy panda in all the#!?ing land!"))

    val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
    val splitTokens: Array[Char] = "#%?!. ".toCharArray

    val wordCounts = WordCount.withStopWordsFiltered(
      linesRDD, splitTokens, stopWords)
    val wordCountsAsMap = wordCounts.collectAsMap()
    assert(!wordCountsAsMap.contains("the"))
    assert(!wordCountsAsMap.contains("?"))
    assert(!wordCountsAsMap.contains("#!?ing"))
    assert(wordCountsAsMap.contains("ing"))
    assert(wordCountsAsMap.get("panda").get.equals(3))
  }
} 
开发者ID:holdenk,项目名称:sparkProjectTemplate.g8,代码行数:27,代码来源:WordCountTest.scala

示例12: AppTest

//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark.dataset

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}

/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
	test("test initializing spark context") {
		val list = List(1, 2, 3, 4)
		val rdd = sc.parallelize(list)

		assert(rdd.count === list.length)
	}
} 
开发者ID:dongjinleekr,项目名称:spark-dataset,代码行数:18,代码来源:AppTest.scala

示例13: PackageSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.elasticsearch.sql

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }

class PackageSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.elasticsearch.sql" must {
    "lift DataFrame into SparkEsDataFrameFunctions" in {

      val sqlContext = new SQLContext(sc)

      val inputData = Seq(
        ("TEST_VALUE_1", 1),
        ("TEST_VALUE_2", 2),
        ("TEST_VALUE_3", 3)
      )

      val outputDataFrame = sqlContext.createDataFrame(inputData)
        .toDF("key", "value")

      // If sparkContext is available, DataFrame was lifted into SparkEsDataFrameFunctions.
      outputDataFrame.sparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Elasticsearch,代码行数:27,代码来源:PackageSpec.scala

示例14: MapperSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.{ SparkSession, Row }
import org.apache.spark.mllib.linalg.distributed.{ CoordinateMatrix, IndexedRow, IndexedRowMatrix, MatrixEntry }
import org.apache.spark.mllib.linalg.{ DenseVector, Vector, Vectors }

class MapperSpec extends FunSuite with SharedSparkContext {

  test("simple mapper on noisy circle") {
    val spark = SparkSession.builder().getOrCreate()

    val fileLoc = getClass.getClassLoader.getResource("circles.csv").getPath()
    val circle = spark.read
      .option("header", false)
      .option("inferSchema", true)
      .csv(fileLoc)

    assert(circle.count == 400)

    val indexedRDD = circle.rdd.zipWithIndex.map {
      case (Row(x: Double, y: Double), i) =>
        val v: Vector = new DenseVector(Array(x, y))
        IndexedRow(i, v)
    }
    val matrix = new IndexedRowMatrix(indexedRDD)
    val similarities = matrix.toCoordinateMatrix
      .transpose()
      .toIndexedRowMatrix()
      .columnSimilarities()
    val distances = new CoordinateMatrix(
      similarities
        .entries
        .map((entry) => new MatrixEntry(entry.i, entry.j, 1 - entry.value))
    )

    val filtration = new IndexedRowMatrix(indexedRDD.map({ row =>
      IndexedRow(row.index, new DenseVector(Array(
        Vectors.norm(row.vector, 2)
      )))
    }))

    //Mapper.writeAsJson(graph, "mapper-vis/circle-graph.json")
    val graph = Mapper.mapper(sc, distances, filtration, 100, 2.0)

    assert(graph.vertices.count == 160)
    assert(graph.edges.count == 327)
  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:51,代码来源:MapperSpec.scala

示例15: CoverSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.linalg.distributed.{ IndexedRow, IndexedRowMatrix }

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext

class CoverSpec extends FunSuite with SharedSparkContext {
  test("cover") {
    val rdd = sc.parallelize((0 to 10).toSeq)
    val filtration = new IndexedRowMatrix(
      rdd.map({ x =>
        new IndexedRow(x, new DenseVector(Array(x * 2, scala.math.sin(x))))
      })
    )

    val cover = new Cover(filtration, 4, 0.5)

    assert(cover.numCoverSegments == 16)
    assert(cover.filterRanges(0) == NumericBoundary(0.0, 20.0))
    assert(cover.filterRanges(1).lower >= -1.0)
    assert(cover.filterRanges(1).upper <= 1.0)

    assert(cover.coverAssignment(new DenseVector(Array(8.33, 0.5))) == List(CoverSegmentKey(6), CoverSegmentKey(7)))

  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:29,代码来源:CoverSpec.scala


注:本文中的com.holdenkarau.spark.testing.SharedSparkContext类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。