本文整理汇总了Scala中org.apache.spark.api.java.JavaSparkContext类的典型用法代码示例。如果您正苦于以下问题:Scala JavaSparkContext类的具体用法?Scala JavaSparkContext怎么用?Scala JavaSparkContext使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了JavaSparkContext类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: NamedContext
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.worker
import java.io.File
import io.hydrosphere.mist.api.{CentralLoggingConf, RuntimeJobInfo, SetupConfiguration}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.streaming.Duration
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
class NamedContext(
val sparkContext: SparkContext,
val namespace: String,
streamingDuration: Duration = Duration(40 * 1000),
loggingConf: Option[CentralLoggingConf] = None
) {
private val jars = mutable.Buffer.empty[String]
def addJar(jarPath: String): Unit = {
val jarAbsolutePath = new File(jarPath).getAbsolutePath
if (!jars.contains(jarAbsolutePath)) {
sparkContext.addJar(jarPath)
jars += jarAbsolutePath
}
}
def setupConfiguration(jobId: String): SetupConfiguration = {
SetupConfiguration(
context = sparkContext,
streamingDuration = streamingDuration,
info = RuntimeJobInfo(jobId, namespace),
loggingConf = loggingConf
)
}
//TODO: can we call that inside python directly using setupConfiguration?
// python support
def sparkConf: SparkConf = sparkContext.getConf
// python support
def javaContext: JavaSparkContext = new JavaSparkContext(sparkContext)
// python support
def sqlContext: SQLContext = new SQLContext(sparkContext)
// python support
def hiveContext: HiveContext = new HiveContext(sparkContext)
def stop(): Unit = {
sparkContext.stop()
}
}
示例2: GeoSparkEval
//设置package包名称以及导入依赖的类
package eu.br.bigsea.benchmark
import org.datasyslab.geospark._
import org.datasyslab.geospark.spatialRDD._
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.SparkConf
object GeoSparkEval {
def main(args: Array[String]) {
val inputFile = args(0)
// spark configuration
val conf = new SparkConf().
setAppName("GeoSpark Evaluation").
setMaster("local[*]")
val sc = new JavaSparkContext(conf) // magellan uses java api :/
val points = new PointRDD (
sc,
inputFile,
1, // offset of the line for coordinates
"csv", // file format
100) // number of partitions
// we can access the RDD of points directly
println (points.getRawPointRDD.count)
println (points.getRawPointRDD.first)
// JSON serializer crashes :/
// points.saveAsGeoJSON ("file:///tmp/onibus.geojson")
sc.stop
}
}
示例3: GetSearchCount
//设置package包名称以及导入依赖的类
package mad_nectarine.spark
import java.util.Properties
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{MapWritable, Text}
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.{Logging, SparkConf}
import org.elasticsearch.hadoop.mr.EsInputFormat
object GetSearchCount extends Logging {
def main(args: Array[String]) {
//validate args
if (args.length < 1) {
throw new IllegalArgumentException("search word is required")
}
//create spark conf
val sparkConf = new SparkConf()
sparkConf.setAppName("mad_nectarine.GetTweetsSearchCount")
val context = new JavaSparkContext(sparkConf)
try {
//load config
System.out.println("executing... [load config]")
val fs = FileSystem.get(context.hadoopConfiguration());
val propertiesStream = fs.open(new Path("hdfs:///tmp/spark.to-words.properties"))
val properties = new Properties()
properties.load(propertiesStream)
//create es conf
System.out.println("executing... [create es conf]")
val esConf = new JobConf()
esConf.set("es.nodes", properties.getProperty("logic.search-count.nodes"))
esConf.set("es.resource", properties.getProperty("logic.search-count.resource"))
var query = properties.getProperty("logic.search-count.query").replace("@@search_word", args(0))
query = query.replace("\\r\\n","")
query = query.replace("\\n","")
query = query.replace("\\r","")
System.out.println(s"query is ${query}")
esConf.set("es.query", query)
//load data from elasticsearch
System.out.println("executing... [load data from elasticsearch]")
val esRDD = context.hadoopRDD(esConf,
classOf[EsInputFormat[Text, MapWritable]],
classOf[Text],
classOf[MapWritable]
)
System.out.println("Count of records founds is " + esRDD.count())
} finally{
context.stop()
}
}
}