本文整理汇总了Scala中org.apache.spark.mllib.feature.IDF类的典型用法代码示例。如果您正苦于以下问题:Scala IDF类的具体用法?Scala IDF怎么用?Scala IDF使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IDF类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TfIdfSample
//设置package包名称以及导入依赖的类
package org.sparksamples.featureext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.feature.HashingTF
import org.apache.spark.mllib.feature.IDF
object TfIdfSample{
def main(args: Array[String]) {
//TODO replace with path specific to your machine
val file = "/home/ubuntu/work/spark-1.6.0-bin-hadoop2.6//README.md"
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val sc = new SparkContext(spConfig)
val documents: RDD[Seq[String]] = sc.textFile(file).map(_.split(" ").toSeq)
print("Documents Size:" + documents.count)
val hashingTF = new HashingTF()
val tf = hashingTF.transform(documents)
for(tf_ <- tf) {
println(s"$tf_")
}
tf.cache()
val idf = new IDF().fit(tf)
val tfidf = idf.transform(tf)
println("tfidf size : " + tfidf.count)
for(tfidf_ <- tfidf) {
println(s"$tfidf_")
}
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:30,代码来源:TfIdfSample.scala
示例2: TfIdfSample
//设置package包名称以及导入依赖的类
package org.sparksamples.featureext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.feature.HashingTF
import org.apache.spark.mllib.feature.IDF
import org.sparksamples.Util
object TfIdfSample{
def main(args: Array[String]) {
//TODO replace with path specific to your machine
val file = Util.SPARK_HOME + "/README.md"
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val sc = new SparkContext(spConfig)
val documents: RDD[Seq[String]] = sc.textFile(file).map(_.split(" ").toSeq)
print("Documents Size:" + documents.count)
val hashingTF = new HashingTF()
val tf = hashingTF.transform(documents)
for(tf_ <- tf) {
println(s"$tf_")
}
tf.cache()
val idf = new IDF().fit(tf)
val tfidf = idf.transform(tf)
println("tfidf size : " + tfidf.count)
for(tfidf_ <- tfidf) {
println(s"$tfidf_")
}
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:31,代码来源:TfIdfSample.scala