本文整理汇总了Scala中com.holdenkarau.spark.testing.SharedSparkContext类的典型用法代码示例。如果您正苦于以下问题:Scala SharedSparkContext类的具体用法?Scala SharedSparkContext怎么用?Scala SharedSparkContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SharedSparkContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ColumnsTest
//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker
import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructField, StringType, LongType, DoubleType}
class ColumnsTest extends FunSuite with SharedSparkContext {
val numLoops = 100
test("test StringConstant") {
val s1 = new StringConstant("f1", "abc")
assert("abc" === s1.getValue(1))
assert(StructField("f1", StringType, false) == s1.getStructField)
}
test("test RandomLong") {
val s1 = new RandomLong("f1", 666666L)
for (x <- 1 to numLoops) {
assert(s1.getValue(1) >= 0)
assert(s1.getValue(1) <= 666666L)
}
assert(StructField("f1", LongType, false) == s1.getStructField)
}
test("test RandomDouble") {
val s1 = new RandomDouble("f1", 666666.00)
for (x <- 1 to numLoops) {
assert(s1.getValue(1) >= 0)
assert(s1.getValue(1) <= 666666.00)
}
assert(StructField("f1", DoubleType, false) == s1.getStructField)
}
test("test Categorical") {
val list = List("a", "b", "c", "d")
val s1 = new Categorical("f1", list)
for (x <- 1 to numLoops) {
val v = s1.getValue(1)
assert(list.exists(key => v.contains(key)))
}
assert(StructField("f1", StringType, false) == s1.getStructField)
}
}
示例2: BigDataMakerTest
//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker
import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
class BigDataMakerTest extends FunSuite with SharedSparkContext {
test("first") {
val sqlContext = new SQLContext(sc)
val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
bd.addColumn(new StringConstant("f1", "abc"))
bd.addColumn(new StringConstant("f2", "def"))
val df = bd._createDataFrame
df.show
assert(500 === df.count)
assert(2 === df.columns.length)
}
test("col names") {
val sqlContext = new SQLContext(sc)
val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
bd.addColumn(new StringConstant("f1", "abc"))
bd.addColumn(new StringConstant("", "def"))
assert("f1" === bd.cols(0).name)
assert("f_1" === bd.cols(1).name)
}
}
示例3: AppTest
//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark
import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}
/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
test("test initializing spark context") {
val list = List(1, 2, 3, 4)
val rdd = sc.parallelize(list)
assert(rdd.count === list.length)
}
}
示例4: FunctionalSyntaxOWLExpressionsRDDBuilderTest
//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.rdd
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite
class FunctionalSyntaxOWLExpressionsRDDBuilderTest extends FunSuite with SharedSparkContext {
var _rdd: OWLExpressionsRDD = null
def rdd = {
if (_rdd == null) {
_rdd = FunctionalSyntaxOWLExpressionsRDDBuilder.build(
sc, "src/test/resources/ont_functional.owl")
_rdd.cache()
}
_rdd
}
test("There should be three annotation lines with full URIs") {
val res = rdd.filter(line => line.startsWith("Annotation(")).collect()
val expected = List(
"Annotation(<http://ex.com/foo#hasName> \"Name\")",
"Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
"""Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")
assert(res.length == 3)
for (e <- expected) {
assert(res.contains(e))
}
}
// test("There should be an import statement") {
// val res = rdd.filter(line => line.startsWith("Import")).collect()
// assert(res.length == 1)
// assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
// }
test("There should not be any empty lines") {
val res = rdd.filter(line => line.trim.isEmpty).collect()
assert(res.length == 0)
}
test("There should not be any comment lines") {
val res = rdd.filter(line => line.trim.startsWith("#")).collect()
assert(res.length == 0)
}
test("There should be a DisjointObjectProperties axiom") {
val res = rdd.filter(line => line.trim.startsWith("DisjointObjectProperties")).collect()
assert(res.length == 1)
}
test("The total number of axioms should be correct") {
val total = 70 // = 71 - uncommented Import(...)
assert(rdd.count() == total)
}
}
示例5: FunctionalSyntaxOWLExpressionsDatasetBuilderTest
//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.dataset
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SparkSession
import org.scalatest.FunSuite
class FunctionalSyntaxOWLExpressionsDatasetBuilderTest extends FunSuite with SharedSparkContext {
lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master).getOrCreate()
var _dataset: OWLExpressionsDataset = null
def dataset: OWLExpressionsDataset = {
if (_dataset == null) {
_dataset = FunctionalSyntaxOWLExpressionsDatasetBuilder.build(
spark, "src/test/resources/ont_functional.owl")
_dataset.cache()
}
_dataset
}
test("There should be three annotation lines with full URIs") {
val res = dataset.filter(line => line.startsWith("Annotation(")).collectAsList()
val expected = List(
"Annotation(<http://ex.com/foo#hasName> \"Name\")",
"Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
"""Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")
assert(res.size() == 3)
for (e <- expected) {
assert(res.contains(e))
}
}
// test("There should be an import statement") {
// val res = rdd.filter(line => line.startsWith("Import")).collect()
// assert(res.length == 1)
// assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
// }
test("There should not be any empty lines") {
val res = dataset.filter(line => line.trim.isEmpty)
assert(res.count() == 0)
}
test("There should not be any comment lines") {
val res = dataset.filter(line => line.trim.startsWith("#"))
assert(res.count() == 0)
}
test("There should be a DisjointObjectProperties axiom") {
val res = dataset.filter(line => line.trim.startsWith("DisjointObjectProperties"))
assert(res.count() == 1)
}
test("The total number of axioms should be correct") {
val total = 70 // = 71 - uncommented Import(...)
assert(dataset.count() == total)
}
}
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:61,代码来源:FunctionalSyntaxOWLExpressionsDatasetBuilderTest.scala
示例6: JoinTest
//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.goldilocks
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.rdd.RDD
import org.scalatest.FunSuite
class JoinTest extends FunSuite with SharedSparkContext {
test("Hash join"){
val keySet = "a, b, c, d, e, f, g".split(",")
val smallRDD = sc.parallelize(keySet.map(letter => (letter, letter.hashCode)))
val largeRDD: RDD[(String, Double)] =
sc.parallelize(keySet.flatMap{ letter =>
Range(1, 50).map(i => (letter, letter.hashCode() / i.toDouble))})
val result: RDD[(String, (Double, Int))] =
RDDJoinExamples.manualBroadCastHashJoin(
largeRDD, smallRDD)
val nativeJoin: RDD[(String, (Double, Int))] = largeRDD.join(smallRDD)
assert(result.subtract(nativeJoin).count == 0)
}
}
示例7: WordCountTest
//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.wordcount
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite
class WordCountTest extends FunSuite with SharedSparkContext {
test("word count with Stop Words Removed"){
val wordRDD = sc.parallelize(Seq(
"How happy was the panda? You ask.",
"Panda is the most happy panda in all the #$!?ing land!"))
val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
val illegalTokens: Array[Char] = "#$%?!.".toCharArray
val wordCounts = WordCount.withStopWordsFiltered(
wordRDD, illegalTokens, stopWords)
val wordCountsAsMap = wordCounts.collectAsMap()
assert(!wordCountsAsMap.contains("the"))
assert(!wordCountsAsMap.contains("?"))
assert(!wordCountsAsMap.contains("#$!?ing"))
assert(wordCountsAsMap.contains("ing"))
assert(wordCountsAsMap.get("panda").get.equals(3))
}
}
示例8: SparkCassRDDFunctionsSpec
//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.rdd
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.{ MustMatchers, WordSpec }
class SparkCassRDDFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
"Package com.github.jparkie.spark.cassandra.rdd" must {
"lift RDD into SparkCassRDDFunctions" in {
val testRDD = sc.parallelize(1 to 25)
.map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))
// If internalSparkContext is available, RDD was lifted.
testRDD.internalSparkContext
}
}
}
示例9: SparkCassDataFrameFunctionsSpec
//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.sql
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }
class SparkCassDataFrameFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
"Package com.github.jparkie.spark.cassandra.sql" must {
"lift DataFrame into SparkCassDataFrameFunctions" in {
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val testRDD = sc.parallelize(1 to 25)
.map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))
val testDataFrame = testRDD.toDF("test_key", "test_value")
// If internalSparkContext is available, RDD was lifted.
testDataFrame.internalSparkContext
}
}
}
示例10: TransformationTestWithSparkTestingBase
//设置package包名称以及导入依赖的类
package com.chapter16.SparkTesting
import org.scalatest.Assertions._
import org.apache.spark.rdd.RDD
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite
class TransformationTestWithSparkTestingBase extends FunSuite with SharedSparkContext {
def tokenize(line: RDD[String]) = {
line.map(x => x.split(' ')).collect()
}
test("works, obviously!") {
assert(1 == 1)
}
test("Words counting") {
assert(sc.parallelize("Hello world My name is Reza".split("\\W")).map(_ + 1).count == 6)
}
test("Testing RDD transformations using a shared Spark Context") {
val input = List("Testing", "RDD transformations", "using a shared", "Spark Context")
val expected = Array(Array("Testing"), Array("RDD", "transformations"), Array("using", "a", "shared"), Array("Spark", "Context"))
val transformed = tokenize(sc.parallelize(input))
assert(transformed === expected)
}
}
开发者ID:PacktPublishing,项目名称:Scala-and-Spark-for-Big-Data-Analytics,代码行数:28,代码来源:TransformationTestWithSparkTestingBase.scala
示例11: WordCountTest
//设置package包名称以及导入依赖的类
package $organization$.$name$
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite
class WordCountTest extends FunSuite with SharedSparkContext {
test("word count with Stop Words Removed"){
val linesRDD = sc.parallelize(Seq(
"How happy was the panda? You ask.",
"Panda is the most happy panda in all the#!?ing land!"))
val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
val splitTokens: Array[Char] = "#%?!. ".toCharArray
val wordCounts = WordCount.withStopWordsFiltered(
linesRDD, splitTokens, stopWords)
val wordCountsAsMap = wordCounts.collectAsMap()
assert(!wordCountsAsMap.contains("the"))
assert(!wordCountsAsMap.contains("?"))
assert(!wordCountsAsMap.contains("#!?ing"))
assert(wordCountsAsMap.contains("ing"))
assert(wordCountsAsMap.get("panda").get.equals(3))
}
}
示例12: AppTest
//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark.dataset
import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}
/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
test("test initializing spark context") {
val list = List(1, 2, 3, 4)
val rdd = sc.parallelize(list)
assert(rdd.count === list.length)
}
}
示例13: PackageSpec
//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.elasticsearch.sql
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }
class PackageSpec extends WordSpec with MustMatchers with SharedSparkContext {
"Package com.github.jparkie.spark.elasticsearch.sql" must {
"lift DataFrame into SparkEsDataFrameFunctions" in {
val sqlContext = new SQLContext(sc)
val inputData = Seq(
("TEST_VALUE_1", 1),
("TEST_VALUE_2", 2),
("TEST_VALUE_3", 3)
)
val outputDataFrame = sqlContext.createDataFrame(inputData)
.toDF("key", "value")
// If sparkContext is available, DataFrame was lifted into SparkEsDataFrameFunctions.
outputDataFrame.sparkContext
}
}
}
示例14: MapperSpec
//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper
import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.{ SparkSession, Row }
import org.apache.spark.mllib.linalg.distributed.{ CoordinateMatrix, IndexedRow, IndexedRowMatrix, MatrixEntry }
import org.apache.spark.mllib.linalg.{ DenseVector, Vector, Vectors }
class MapperSpec extends FunSuite with SharedSparkContext {
test("simple mapper on noisy circle") {
val spark = SparkSession.builder().getOrCreate()
val fileLoc = getClass.getClassLoader.getResource("circles.csv").getPath()
val circle = spark.read
.option("header", false)
.option("inferSchema", true)
.csv(fileLoc)
assert(circle.count == 400)
val indexedRDD = circle.rdd.zipWithIndex.map {
case (Row(x: Double, y: Double), i) =>
val v: Vector = new DenseVector(Array(x, y))
IndexedRow(i, v)
}
val matrix = new IndexedRowMatrix(indexedRDD)
val similarities = matrix.toCoordinateMatrix
.transpose()
.toIndexedRowMatrix()
.columnSimilarities()
val distances = new CoordinateMatrix(
similarities
.entries
.map((entry) => new MatrixEntry(entry.i, entry.j, 1 - entry.value))
)
val filtration = new IndexedRowMatrix(indexedRDD.map({ row =>
IndexedRow(row.index, new DenseVector(Array(
Vectors.norm(row.vector, 2)
)))
}))
//Mapper.writeAsJson(graph, "mapper-vis/circle-graph.json")
val graph = Mapper.mapper(sc, distances, filtration, 100, 2.0)
assert(graph.vertices.count == 160)
assert(graph.edges.count == 327)
}
}
示例15: CoverSpec
//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper
import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.linalg.distributed.{ IndexedRow, IndexedRowMatrix }
import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
class CoverSpec extends FunSuite with SharedSparkContext {
test("cover") {
val rdd = sc.parallelize((0 to 10).toSeq)
val filtration = new IndexedRowMatrix(
rdd.map({ x =>
new IndexedRow(x, new DenseVector(Array(x * 2, scala.math.sin(x))))
})
)
val cover = new Cover(filtration, 4, 0.5)
assert(cover.numCoverSegments == 16)
assert(cover.filterRanges(0) == NumericBoundary(0.0, 20.0))
assert(cover.filterRanges(1).lower >= -1.0)
assert(cover.filterRanges(1).upper <= 1.0)
assert(cover.coverAssignment(new DenseVector(Array(8.33, 0.5))) == List(CoverSegmentKey(6), CoverSegmentKey(7)))
}
}