本文整理汇总了Scala中org.apache.spark.sql.types.DoubleType类的典型用法代码示例。如果您正苦于以下问题:Scala DoubleType类的具体用法?Scala DoubleType怎么用?Scala DoubleType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DoubleType类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ColumnsTest
//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker
import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructField, StringType, LongType, DoubleType}
class ColumnsTest extends FunSuite with SharedSparkContext {
val numLoops = 100
test("test StringConstant") {
val s1 = new StringConstant("f1", "abc")
assert("abc" === s1.getValue(1))
assert(StructField("f1", StringType, false) == s1.getStructField)
}
test("test RandomLong") {
val s1 = new RandomLong("f1", 666666L)
for (x <- 1 to numLoops) {
assert(s1.getValue(1) >= 0)
assert(s1.getValue(1) <= 666666L)
}
assert(StructField("f1", LongType, false) == s1.getStructField)
}
test("test RandomDouble") {
val s1 = new RandomDouble("f1", 666666.00)
for (x <- 1 to numLoops) {
assert(s1.getValue(1) >= 0)
assert(s1.getValue(1) <= 666666.00)
}
assert(StructField("f1", DoubleType, false) == s1.getStructField)
}
test("test Categorical") {
val list = List("a", "b", "c", "d")
val s1 = new Categorical("f1", list)
for (x <- 1 to numLoops) {
val v = s1.getValue(1)
assert(list.exists(key => v.contains(key)))
}
assert(StructField("f1", StringType, false) == s1.getStructField)
}
}
示例2: Titanic
//设置package包名称以及导入依赖的类
package fr.ippon.spark.ml
import org.apache.spark.sql.types.DoubleType
import org.apache.spark.sql.{functions, Column, DataFrame, SQLContext}
object Titanic {
// Fonction de récupération des données d'un fichier de Titanic dans un DataFrame
def dataframeFromTitanicFile(sqlc: SQLContext, file: String): DataFrame = sqlc.read
.format("com.databricks.spark.csv")
.option("header", "true")
.option("inferSchema", "true")
.load(file)
// Fonction de calcul de l'age moyen
def calcMeanAge(df: DataFrame, inputCol: String): Double = df
.agg(functions.avg(df(inputCol)))
.head
.getDouble(0)
// Fonction nous donnant l'age ou la moyenne des ages
def fillMissingAge(df: DataFrame, inputCol: String, outputCol: String, replacementValue: Double): DataFrame = {
val ageValue: (Any) => Double = age => age match {
case age: Double => age
case _ => replacementValue
}
df.withColumn(outputCol, functions.callUDF(ageValue, DoubleType, df(inputCol)))
}
}
示例3: ScorePredictor
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.ml.regression.RandomForestRegressionModel
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
import scala.collection.parallel.mutable.ParArray
object ScorePredictor {
val log: Logger = LogManager.getLogger(ScorePredictor.getClass)
def predictScores(spark: SparkSession,
modelsInputDir: File,
predictionsOutputDir: Option[File],
sites: ParArray[String],
featureData: DataFrame): Unit = {
log.info("Scoring items")
val predictions: Array[DataFrame] = sites.map(target => {
try {
log.info("Scoring for " + target)
log.info("Getting work data for " + target)
val workData: DataFrame = Utils.getWorkData(spark, featureData, target, exists = false)
log.info("Loading model for " + target)
val model = RandomForestRegressionModel.load(
new File(modelsInputDir, target).getAbsolutePath)
log.info("Scoring data for " + target)
val predictions = model
.setPredictionCol(target)
.transform(workData)
.select("id", target)
predictions
} catch {
case unknown: Throwable =>
log.error("Score for " + target + " failed", unknown)
val schema = StructType(Seq(
StructField("id", StringType, nullable = false),
StructField(target, DoubleType, nullable = true)))
spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
}
}).toArray
val predictedScores = predictions.reduce((left, right) => left.join(right, Seq("id"), "outer"))
log.info("Saving predictions")
predictionsOutputDir.foreach(f = o =>
predictedScores.coalesce(1)
.write
.mode(SaveMode.ErrorIfExists)
.option("header", value = true)
.option("compression", "bzip2")
.csv(new File(o, "allPredictions").getAbsolutePath))
}
}
示例4: SparkTermCandidatesWeighter
//设置package包名称以及导入依赖的类
package ru.ispras.atr.rank
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions.desc
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import ru.ispras.atr.datamodel.{DSDataset, TermCandidate}
import ru.ispras.atr.features.FeatureConfig
abstract class SparkTermCandidatesWeighter(docsToShow:Int) extends TermCandidatesWeighter {
val termDFName = "Term"
def allFeatures: Seq[FeatureConfig]
def convert2FeatureSpace(candidates: Seq[TermCandidate], dataset: DSDataset):Seq[Seq[Double]] = {
val resByFeatures: Seq[Seq[Double]] = allFeatures.map(f => {
//iterate by features first, because it lets to estimate time per feature and (maybe) it is faster due to caching
log.info(s"Initializing feature ${f.id}...")
val featureComputer = f.build(candidates, dataset)
log.info(s"Computing feature ${f.id}...")
featureComputer.compute(candidates)
})
log.info(s"${allFeatures.size} features have been computed")
resByFeatures.transpose
}
def convertToDF(termNames: Seq[String], featureNames: Seq[String], resByTerms: Seq[Seq[Double]]): DataFrame = {
val header = StructField(termDFName, StringType) +: featureNames.map(f => StructField(f, DoubleType))
val schema = StructType(header)
val rows = termNames.zip(resByTerms).map(a => Row.fromSeq(a._1 +: a._2))
val rowsRDD: RDD[Row] = SparkConfigs.sc.parallelize(rows)
val df = SparkConfigs.sqlc.createDataFrame(rowsRDD, schema)
df
}
def weightAndSort(candidates: Seq[TermCandidate], dataset: DSDataset): Iterable[(String, Double)] = {
val featureValues = convert2FeatureSpace(candidates, dataset)
val initDF = convertToDF(candidates.map(_.verboseRepr(docsToShow)), allFeatures.map(_.id), featureValues)
val weightedDF = weight(initDF)
val termNamesDF = weightedDF.select(termDFName,id).sort(desc(id))
val weightColId: String = id //for serialization
val termColId: String = termDFName
val terms = termNamesDF.rdd.map(r => (r.getAs[String](termColId), r.getAs[Double](weightColId))).collect()
terms
}
def weight(df: DataFrame) : DataFrame
}
object SparkConfigs {
val sparkConf = new SparkConf()
.setAppName("ATR Evaluation System")
.setMaster("local[16]")
.set("spark.driver.memory", "1g")
val sc = new SparkContext(sparkConf)
val sqlc = new HiveContext(sc)
}
示例5: DebugRowOpsSuite
//设置package包名称以及导入依赖的类
package org.tensorframes
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, StructType}
import org.scalatest.FunSuite
import org.tensorframes.impl.{DebugRowOpsImpl, ScalarDoubleType}
import org.tensorframes.dsl._
class DebugRowOpsSuite
extends FunSuite with TensorFramesTestSparkContext with GraphScoping with Logging {
lazy val sql = sqlContext
import ColumnInformation.structField
import Shape.Unknown
testGraph("Simple identity") {
val rows = Array(Row(1.0))
val input = StructType(Array(structField("x", ScalarDoubleType, Shape(Unknown))))
val p2 = placeholder[Double](1) named "x"
val out = identity(p2) named "y"
val outputSchema = StructType(Array(structField("y", ScalarDoubleType, Shape(Unknown))))
val (g, _) = TestUtilities.analyzeGraph(out)
logDebug(g.toString)
val res = DebugRowOpsImpl.performMap(rows, input, Array(0), g, outputSchema)
assert(res === Array(Row(1.0, 1.0)))
}
testGraph("Simple add") {
val rows = Array(Row(1.0))
val input = StructType(Array(structField("x", ScalarDoubleType, Shape(Unknown))))
val p2 = placeholder[Double](1) named "x"
val out = p2 + p2 named "y"
val outputSchema = StructType(Array(structField("y", ScalarDoubleType, Shape(Unknown))))
val (g, _) = TestUtilities.analyzeGraph(out)
logDebug(g.toString)
val res = DebugRowOpsImpl.performMap(rows, input, Array(0), g, outputSchema)
assert(res === Array(Row(2.0, 1.0)))
}
}