当前位置: 首页>>代码示例>>Scala>>正文


Scala JavaSparkContext类代码示例

本文整理汇总了Scala中org.apache.spark.api.java.JavaSparkContext的典型用法代码示例。如果您正苦于以下问题:Scala JavaSparkContext类的具体用法?Scala JavaSparkContext怎么用?Scala JavaSparkContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了JavaSparkContext类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: NamedContext

//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.worker

import java.io.File

import io.hydrosphere.mist.api.{CentralLoggingConf, RuntimeJobInfo, SetupConfiguration}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.streaming.Duration
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable

class NamedContext(
  val sparkContext: SparkContext,
  val namespace: String,
  streamingDuration: Duration = Duration(40 * 1000),
  loggingConf: Option[CentralLoggingConf] = None
) {

  private val jars = mutable.Buffer.empty[String]

  def addJar(jarPath: String): Unit = {
    val jarAbsolutePath = new File(jarPath).getAbsolutePath
    if (!jars.contains(jarAbsolutePath)) {
      sparkContext.addJar(jarPath)
      jars += jarAbsolutePath
    }
  }

  def setupConfiguration(jobId: String): SetupConfiguration = {
    SetupConfiguration(
      context = sparkContext,
      streamingDuration = streamingDuration,
      info = RuntimeJobInfo(jobId, namespace),
      loggingConf = loggingConf
    )
  }

  //TODO: can we call that inside python directly using setupConfiguration?
  // python support
  def sparkConf: SparkConf = sparkContext.getConf

  // python support
  def javaContext: JavaSparkContext = new JavaSparkContext(sparkContext)

  // python support
  def sqlContext: SQLContext = new SQLContext(sparkContext)

  // python support
  def hiveContext: HiveContext = new HiveContext(sparkContext)

  def stop(): Unit = {
    sparkContext.stop()
  }

} 
开发者ID:Hydrospheredata,项目名称:mist,代码行数:58,代码来源:NamedContext.scala

示例2: GeoSparkEval

//设置package包名称以及导入依赖的类
package eu.br.bigsea.benchmark

import org.datasyslab.geospark._
import org.datasyslab.geospark.spatialRDD._

import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.SparkConf

object GeoSparkEval {
  def main(args: Array[String]) {

    val inputFile = args(0)

    // spark configuration
    val conf = new SparkConf().
    setAppName("GeoSpark Evaluation").
    setMaster("local[*]")

    val sc = new JavaSparkContext(conf) // magellan uses java api :/
    val points = new PointRDD (
      sc,
      inputFile,
      1,     // offset of the line for coordinates
      "csv", // file format
      100)   // number of partitions

    // we can access the RDD of points directly
    println (points.getRawPointRDD.count)
    println (points.getRawPointRDD.first)

    // JSON serializer crashes :/
    // points.saveAsGeoJSON ("file:///tmp/onibus.geojson") 

    sc.stop
  }
} 
开发者ID:eubr-bigsea,项目名称:spark-spatial,代码行数:38,代码来源:GeoSparkEval.scala

示例3: GetSearchCount

//设置package包名称以及导入依赖的类
package mad_nectarine.spark

import java.util.Properties
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{MapWritable, Text}
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.{Logging, SparkConf}
import org.elasticsearch.hadoop.mr.EsInputFormat

object GetSearchCount extends Logging {
  def main(args: Array[String]) {

    //validate args
    if (args.length < 1) {
      throw new IllegalArgumentException("search word is required")
    }

    //create spark conf
    val sparkConf = new SparkConf()
    sparkConf.setAppName("mad_nectarine.GetTweetsSearchCount")
    val context = new JavaSparkContext(sparkConf)

    try {
      //load config
      System.out.println("executing... [load config]")
      val fs = FileSystem.get(context.hadoopConfiguration());
      val propertiesStream = fs.open(new Path("hdfs:///tmp/spark.to-words.properties"))
      val properties = new Properties()
      properties.load(propertiesStream)

      //create es conf
      System.out.println("executing... [create es conf]")
      val esConf = new JobConf()
      esConf.set("es.nodes", properties.getProperty("logic.search-count.nodes"))
      esConf.set("es.resource", properties.getProperty("logic.search-count.resource"))
      var query = properties.getProperty("logic.search-count.query").replace("@@search_word", args(0))
      query = query.replace("\\r\\n","")
      query = query.replace("\\n","")
      query = query.replace("\\r","")
      System.out.println(s"query is ${query}")
      esConf.set("es.query", query)

      //load data from elasticsearch
      System.out.println("executing... [load data from elasticsearch]")
      val esRDD = context.hadoopRDD(esConf,
        classOf[EsInputFormat[Text, MapWritable]],
        classOf[Text],
        classOf[MapWritable]
      )
      System.out.println("Count of records founds is " + esRDD.count())

    } finally{
      context.stop()
    }
  }
} 
开发者ID:mad-nectarine,项目名称:nlp-test.spark.to-words,代码行数:58,代码来源:GetSearchCount.scala


注:本文中的org.apache.spark.api.java.JavaSparkContext类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。