当前位置: 首页>>代码示例>>Scala>>正文


Scala SparkException类代码示例

本文整理汇总了Scala中org.apache.spark.SparkException的典型用法代码示例。如果您正苦于以下问题:Scala SparkException类的具体用法?Scala SparkException怎么用?Scala SparkException使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SparkException类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: LocalIndexToString

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.IndexToString

class LocalIndexToString(override val sparkTransformer: IndexToString) extends LocalTransformer[IndexToString] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val labels = sparkTransformer.getLabels
        val indexer = (index: Double) => {
          val idx = index.toInt
          if (0 <= idx && idx < labels.length) {
            labels(idx)
          } else {
            throw new SparkException(s"Unseen index: $index ??")
          }
        }
        val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map {
          case d: Double => indexer(d)
          case d => throw new IllegalArgumentException(s"Unknown data to index: $d")
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalIndexToString extends LocalModel[IndexToString] {
  override def load(metadata: Metadata, data: Map[String, Any]): IndexToString = {
    val ctor = classOf[IndexToString].getDeclaredConstructor(classOf[String])
    ctor.setAccessible(true)
    ctor
      .newInstance(metadata.uid)
      .setLabels(metadata.paramMap("labels").asInstanceOf[List[String]].to[Array])
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: IndexToString): LocalTransformer[IndexToString] = new LocalIndexToString(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:43,代码来源:LocalIndexToString.scala

示例2: App3PublishFailAndRecover

//设置package包名称以及导入依赖的类
package com.buransky.understandingSparkStreamingState

import net.manub.embeddedkafka.EmbeddedKafka._
import BaseApp._
import org.apache.spark.SparkException


object App3PublishFailAndRecover extends BaseApp {
  override def main(args: Array[String]): Unit = {
    withRunningKafka {
      for (i <- 0 to args.length) {
        publishStringMessageToKafka(kafkaTopic, "a")
        publishStringMessageToKafka(kafkaTopic, "b")
        publishStringMessageToKafka(kafkaTopic, "c")
        publishStringMessageToKafka(kafkaTopic, "d")
      }

      // First step is to simulate a failure ...
      BaseApp.failOn = "c"
      BaseApp.murder = false
      try {
        withSsc() { inputStream =>
          inputStream.mapWithState(stringStateSpec)
        }
      }
      catch {
        case ex: SparkException if ex.getCause.getMessage == "Fail!" => // This is expected
        case other: Throwable =>
          log.error("WTF", other)
          throw other
      }

      // ... second step is to try to recover from it.
      BaseApp.failOn = ""
      withSsc() { inputStream =>
        inputStream.mapWithState(stringStateSpec)
      }
    }
  }
} 
开发者ID:RadoBuransky,项目名称:understanding-spark-streaming-state,代码行数:41,代码来源:App3PublishFailAndRecover.scala

示例3: LocalStringIndexerModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.StringIndexerModel

import scala.collection.mutable

class LocalStringIndexerModel(override val sparkTransformer: StringIndexerModel) extends LocalTransformer[StringIndexerModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val labelToIndex = {
          val n = sparkTransformer.labels.length
          val map = new mutable.HashMap[String, Double]
          var i = 0
          while (i < n) {
            map.update(sparkTransformer.labels(i), i)
            i += 1
          }
          map
        }
        val indexer = (label: String) => {
          if (labelToIndex.contains(label)) {
            labelToIndex(label)
          } else {
            throw new SparkException(s"Unseen label: $label.")
          }
        }
        val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map { feature =>
          val str = feature.asInstanceOf[String]
          indexer(str)
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalStringIndexerModel extends LocalModel[StringIndexerModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): StringIndexerModel = {
    new StringIndexerModel(metadata.uid, data("labels").asInstanceOf[List[String]].to[Array])
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .setHandleInvalid(metadata.paramMap("handleInvalid").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: StringIndexerModel): LocalTransformer[StringIndexerModel] = new LocalStringIndexerModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:50,代码来源:LocalStringIndexerModel.scala

示例4: ContentToHash

//设置package包名称以及导入依赖的类
package com.xuzq.hotNews

import org.apache.spark.SparkException
import org.apache.spark.unsafe.hash.Murmur3_x86_32._
import org.apache.spark.unsafe.types.UTF8String


class ContentToHash {

  val seed = 42

  def getHashCode(word:String, mod: Int): Int ={
    return nonNegativeMod(murmur3Hash(word), mod)
  }

  def nonNegativeMod(x: Int, mod: Int): Int = {
    val rawMod = x % mod
    rawMod + (if (rawMod < 0) mod else 0)
  }

  def murmur3Hash(term: Any): Int = {
    term match {
      case null => seed
      case b: Boolean => hashInt(if (b) 1 else 0, seed)
      case b: Byte => hashInt(b, seed)
      case s: Short => hashInt(s, seed)
      case i: Int => hashInt(i, seed)
      case l: Long => hashLong(l, seed)
      case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
      case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
      case s: String =>
        val utf8 = UTF8String.fromString(s)
        hashUnsafeBytes(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed)
      case _ => throw new SparkException("HashingTF with murmur3 algorithm does not " +
        s"support type ${term.getClass.getCanonicalName} of input data.")
    }
  }
} 
开发者ID:ZanderXu,项目名称:HotNews,代码行数:39,代码来源:ContentToHash.scala

示例5: SimhashIndexing

//设置package包名称以及导入依赖的类
package io.gzet.story

import java.net.URL

import com.datastax.spark.connector._
import io.gzet.story.model.Article
import io.gzet.story.util.SimhashUtils._
import io.gzet.story.util.{HtmlFetcher, Tokenizer}
import io.gzet.utils.spark.gdelt.GKGParser
import org.apache.lucene.analysis.en.EnglishAnalyzer
import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}

import scala.util.Try

object SimhashIndexing extends SimpleConfig with Logging {

  def main(args: Array[String]) = {

    val sc = new SparkContext(new SparkConf().setAppName("GDELT Indexing"))

    if (args.isEmpty)
      throw new SparkException("usage: <gdeltInputDir>")

    val gdeltInputDir = args.head
    val gkgRDD = sc.textFile(gdeltInputDir)
      .map(GKGParser.toJsonGKGV2)
      .map(GKGParser.toCaseClass2)

    val urlRDD = gkgRDD.map(g => g.documentId.getOrElse("NA"))
      .filter(url => Try(new URL(url)).isSuccess)
      .distinct()
      .repartition(partitions)

    val contentRDD = urlRDD.mapPartitions({ it =>
      val html = new HtmlFetcher(gooseConnectionTimeout, gooseSocketTimeout)
      it map html.fetch
    })

    val corpusRDD = contentRDD.mapPartitions({ it =>
      val analyzer = new EnglishAnalyzer()
      it.map(content => (content, Tokenizer.lucene(content.body, analyzer)))
    }).filter({ case (content, corpus) =>
      corpus.length > minWords
    })

    //CREATE TABLE gzet.articles ( hash int PRIMARY KEY, url text, title text, body text );
    corpusRDD.mapValues(_.mkString(" ").simhash).map({ case (content, simhash) =>
      Article(simhash, content.body, content.title, content.url)
    }).saveToCassandra(cassandraKeyspace, cassandraTable)

  }

} 
开发者ID:PacktPublishing,项目名称:Mastering-Spark-for-Data-Science,代码行数:54,代码来源:SimhashIndexing.scala

示例6: LocalIndexToString

//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors

import io.hydrosphere.mist.api.ml._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.IndexToString

class LocalIndexToString(override val sparkTransformer: IndexToString) extends LocalTransformer[IndexToString] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val labels = sparkTransformer.getLabels
        val indexer = (index: Double) => {
          val idx = index.toInt
          if (0 <= idx && idx < labels.length) {
            labels(idx)
          } else {
            throw new SparkException(s"Unseen index: $index ??")
          }
        }
        val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map {
          case d: Double => indexer(d)
          case d => throw new IllegalArgumentException(s"Unknown data to index: $d")
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalIndexToString extends LocalModel[IndexToString] {
  override def load(metadata: Metadata, data: Map[String, Any]): IndexToString = {
    val ctor = classOf[IndexToString].getDeclaredConstructor(classOf[String])
    ctor.setAccessible(true)
    ctor
      .newInstance(metadata.uid)
      .setLabels(metadata.paramMap("labels").asInstanceOf[List[String]].to[Array])
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: IndexToString): LocalTransformer[IndexToString] = new LocalIndexToString(transformer)
} 
开发者ID:Hydrospheredata,项目名称:mist,代码行数:43,代码来源:LocalIndexToString.scala

示例7: LocalStringIndexerModel

//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors

import io.hydrosphere.mist.api.ml._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.StringIndexerModel

import scala.collection.mutable

class LocalStringIndexerModel(override val sparkTransformer: StringIndexerModel) extends LocalTransformer[StringIndexerModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val labelToIndex = {
          val n = sparkTransformer.labels.length
          val map = new mutable.HashMap[String, Double]
          var i = 0
          while (i < n) {
            map.update(sparkTransformer.labels(i), i)
            i += 1
          }
          map
        }
        val indexer = (label: String) => {
          if (labelToIndex.contains(label)) {
            labelToIndex(label)
          } else {
            throw new SparkException(s"Unseen label: $label.")
          }
        }
        val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map { feature =>
          val str = feature.asInstanceOf[String]
          indexer(str)
        })
        localData.withColumn(newColumn)
      case None => localData
    }
  }
}

object LocalStringIndexerModel extends LocalModel[StringIndexerModel] {
  override def load(metadata: Metadata, data: Map[String, Any]): StringIndexerModel = {
    new StringIndexerModel(metadata.uid, data("labels").asInstanceOf[List[String]].to[Array])
      .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
      .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      .setHandleInvalid(metadata.paramMap("handleInvalid").asInstanceOf[String])
  }

  override implicit def getTransformer(transformer: StringIndexerModel): LocalTransformer[StringIndexerModel] = new LocalStringIndexerModel(transformer)
} 
开发者ID:Hydrospheredata,项目名称:mist,代码行数:50,代码来源:LocalStringIndexerModel.scala

示例8: StructTypeToMleap

//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter

import com.truecar.mleap.runtime.types
import org.apache.spark.SparkException
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._


case class StructTypeToMleap(schema: StructType) {
  def toMleap: types.StructType = {
    val leapFields = schema.fields.map {
      field =>
        val sparkType = field.dataType
        val sparkTypeName = sparkType.typeName
        val dataType = sparkType match {
          case _: NumericType | BooleanType => types.DoubleType
          case _: StringType => types.StringType
          case _: VectorUDT => types.VectorType
          case dataType: ArrayType if dataType.elementType == StringType => types.StringArrayType
          case _ => throw new SparkException(s"unsupported MLeap datatype: $sparkTypeName")
        }

        types.StructField(field.name, dataType)
    }
    types.StructType(leapFields)
  }
} 
开发者ID:TrueCar,项目名称:mleap,代码行数:28,代码来源:StructTypeToMleap.scala


注:本文中的org.apache.spark.SparkException类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。