本文整理汇总了Scala中org.apache.spark.mllib.feature.StandardScaler类的典型用法代码示例。如果您正苦于以下问题:Scala StandardScaler类的具体用法?Scala StandardScaler怎么用?Scala StandardScaler使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StandardScaler类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: StandardScalarSample
//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
object StandardScalarSample {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local").setAppName("Word2Vector")
val sc = new SparkContext(conf)
val data = MLUtils.loadLibSVMFile(sc, "/home/ubuntu/work/spark-1.6.0-bin-hadoop2.6/data/mllib/sample_libsvm_data.txt")
val scaler1 = new StandardScaler().fit(data.map(x => x.features))
val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features))
// scaler3 is an identical model to scaler2, and will produce identical transformations
val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean)
// data1 will be unit variance.
val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
println(data1.first())
// Without converting the features into dense vectors, transformation with zero mean will raise
// exception on sparse vector.
// data2 will be unit variance and zero mean.
val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
println(data2.first())
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:28,代码来源:StandardScalarSample.scala
示例2: StandardScalarSample
//设置package包名称以及导入依赖的类
import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
object StandardScalarSample {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local").setAppName("Word2Vector")
val sc = new SparkContext(conf)
val data = MLUtils.loadLibSVMFile(sc,
org.sparksamples.Util.SPARK_HOME + "/data/mllib/sample_libsvm_data.txt")
val scaler1 = new StandardScaler().fit(data.map(x => x.features))
val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features))
// scaler3 is an identical model to scaler2, and will produce identical transformations
val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean)
// data1 will be unit variance.
val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
println(data1.first())
// Without converting the features into dense vectors, transformation with zero mean will raise
// exception on sparse vector.
// data2 will be unit variance and zero mean.
val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
println(data2.first())
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:29,代码来源:StandardScalarSample.scala
示例3: PreprocessingApp
//设置package包名称以及导入依赖的类
package org.apress.prospark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.feature.StandardScaler
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
object PreprocessingApp {
def main(args: Array[String]) {
if (args.length != 4) {
System.err.println(
"Usage: PreprocessingAppApp <appname> <batchInterval> <hostname> <port>")
System.exit(1)
}
val Seq(appName, batchInterval, hostname, port) = args.toSeq
val conf = new SparkConf()
.setAppName(appName)
.setJars(SparkContext.jarOfClass(this.getClass).toSeq)
val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
val substream = ssc.socketTextStream(hostname, port.toInt)
.filter(!_.contains("NaN"))
.map(_.split(" "))
.filter(f => f(1) != "0")
substream.map(f => Array(f(2), f(4), f(5), f(6)))
.map(f => f.map(v => v.toDouble))
.map(f => Vectors.dense(f))
.foreachRDD(rdd => {
val scalerModel = new StandardScaler().fit(rdd)
val scaledRDD = scalerModel.transform(rdd)
})
ssc.start()
ssc.awaitTermination()
}
}