本文整理汇总了Scala中org.apache.spark.mllib.feature.Word2Vec类的典型用法代码示例。如果您正苦于以下问题:Scala Word2Vec类的具体用法?Scala Word2Vec怎么用?Scala Word2Vec使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Word2Vec类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ConvertWordsToVectors
//设置package包名称以及导入依赖的类
package org.sparksamples.featureext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.feature.Word2Vec
object ConvertWordsToVectors{
def main(args: Array[String]) {
val file = "/home/ubuntu/work/rajdeepd-spark-ml/spark-ml/Chapter_04/data/text8_10000"
val conf = new SparkConf().setMaster("local").setAppName("Word2Vector")
val sc = new SparkContext(conf)
val input = sc.textFile(file).map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
val model = word2vec.fit(input)
val vectors = model.getVectors
vectors foreach ( (t2) => println (t2._1 + "-->" + t2._2.mkString(" ")))
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:18,代码来源:ConvertWordsToVectors.scala
示例2: Word2VecMllib
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.{SparseVector => SV}
object Word2VecMllib {
def main(args: Array[String]) {
val sc = new SparkContext("local[2]", "Word2Vector App")
val path = "./data/20news-bydate-train/alt.atheism/*"
val rdd = sc.wholeTextFiles(path)
val text = rdd.map { case (file, text) => text }
val newsgroups = rdd.map { case (file, text) => file.split("/").takeRight(2).head }
val newsgroupsMap = newsgroups.distinct.collect().zipWithIndex.toMap
val dim = math.pow(2, 18).toInt
var tokens = text.map(doc => TFIDFExtraction.tokenize(doc))
import org.apache.spark.mllib.feature.Word2Vec
val word2vec = new Word2Vec()
//word2vec.setSeed(42) // we do this to generate the same results each time
val word2vecModel = word2vec.fit(tokens)
word2vecModel.findSynonyms("philosophers", 5).foreach(println)
sc.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:28,代码来源:Word2VecMllib.scala
示例3: Word2VecExample
//设置package包名称以及导入依赖的类
package org.apache.spark.examples.mllib
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.feature.Word2VecModel
import org.apache.spark.mllib.feature.Word2Vec
object Word2VecExample {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setMaster("local[2]").setAppName("Word2VecExample")
val sc = new SparkContext(sparkConf)
val input = sc.textFile("../data/mllib/text8").map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
//fit()???DataFrame?????Transformer???
val model = word2vec.fit(input)
val synonyms = model.findSynonyms("china", 40)
for ((synonym, cosineSimilarity) <- synonyms) {
//?????
println(s"$synonym $cosineSimilarity")
}
// Save and load model
//model.save(sc, "myModelPath")
//val sameModel = Word2VecModel.load(sc, "myModelPath")
}
}
示例4: word2vec
//设置package包名称以及导入依赖的类
package spark.mltest
import org.apache.spark.mllib.feature.Word2Vec
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by I311352 on 4/5/2017.
*/
object word2vec extends App {
val text8 = "C:\\Users\\i311352\\Downloads\\text8"
val output = "data/model"
val conf = new SparkConf().setAppName("Spark Word2Vec").setMaster("local[2]")
val sc = new SparkContext(conf)
val input = sc.textFile(text8).map(line => line.split(" ").toSeq)
println("line data" + input.take(10).toList)
val input2 = sc.parallelize(List("Assigns a group ID to all the jobs started by this thread until the group ID is set to a")
.map(line=>line.split(" ").toSeq))
println("line data" + input2.take(10).toList)
val word2vec = new Word2Vec()
word2vec.setMinCount(1)
val model = word2vec.fit(input2)
println(model.transform("to"))
val synonyms = model.findSynonyms("a", 5)
for((synonym, cosineSimilarity) <- synonyms) {
println(s"$synonym $cosineSimilarity")
}
//model.save(sc, output)
}
示例5: ConvertWordsToVectors
//设置package包名称以及导入依赖的类
package org.sparksamples.featureext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.feature.Word2Vec
object ConvertWordsToVectors{
def main(args: Array[String]) {
val file = "/home/ubuntu/work/ml-resources/spark-ml/Chapter_04/data/text8_10000"
val conf = new SparkConf().setMaster("local").setAppName("Word2Vector")
val sc = new SparkContext(conf)
val input = sc.textFile(file).map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
val model = word2vec.fit(input)
val vectors = model.getVectors
vectors foreach ( (t2) => println (t2._1 + "-->" + t2._2.mkString(" ")))
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:18,代码来源:ConvertWordsToVectors.scala