本文整理汇总了Scala中org.apache.spark.mllib.feature.Word2VecModel类的典型用法代码示例。如果您正苦于以下问题:Scala Word2VecModel类的具体用法?Scala Word2VecModel怎么用?Scala Word2VecModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Word2VecModel类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: run
//设置package包名称以及导入依赖的类
package AbstractExactor
import KeywordExactor.PropertyExtractor
import org.apache.spark.mllib.feature.Word2VecModel
import org.apache.spark.{SparkConf, SparkContext}
def run(graphName: String,
vectorSize: Int,
sentenceList: Array[(Int, Array[String])],
keySentenceNum: Int,
iterator: Int,
word2vecModel: Word2VecModel,
df: Float): List[(String, Float)] = {
// ??????
val constructTextGraph = new ConstructSentenceGraph(graphName, vectorSize, sentenceList, word2vecModel)
val textGraph = constructTextGraph.constructGraph
// ????????
val keywordExtractor = new PropertyExtractor(textGraph, keySentenceNum)
val result = keywordExtractor.textrank(iterator, df).sortBy(_._1)
result
}
def main (args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("test").setMaster("local")
val sc = new SparkContext(conf)
val data = sc.textFile("/Users/li/workshop/MyRepository/TextRank/src/main/resources/2.txt")
.flatMap(_.split("?")).collect.map(x=> x.split(" "))
val dataIndex = data.zipWithIndex.map(x=>(x._2, x._1))
dataIndex.foreach(x=> println(x._1, x._2.mkString("")))
// val path = "hdfs://61.147.114.85:9000/home/liyu/word2vec/model2/10_100_5_102017-02-08-word2VectorModel"
val path = "/Users/li/workshop/DataSet/word2vec/model-10-100-20/2016-08-16-word2VectorModel/"
val model = Word2VecModel.load(sc, path)
val da = model.findSynonyms("???", 2)
da.foreach(x => println(x))
val result = run("jiji", 100, dataIndex, 2, 100, model, 0.9F)
println(result)
// ?????
val index = result.map(x=> x._1)
for (elem <- index) {
print(dataIndex(elem.toInt)._2.mkString(""))
}
}
}
示例2: process_bayes
//设置package包名称以及导入依赖的类
package twc.predict
import breeze.linalg.Vector
import com.kunyandata.nlpsuit.util.{KunyanConf, TextPreprocessing}
import com.kunyandata.nlpsuit.wordExtraction.TextRank
import org.apache.spark.mllib.feature.Word2VecModel
//???, (??, ??)
def process_bayes(doc: String, w2vModel: Word2VecModel, modelSize: Int, kunyan: KunyanConf, stopwords: Array[String]) = {
val docSeg = TextPreprocessing.process(doc, stopwords, kunyan)
//textRank
val keywords = TextRank.run("k", 10, docSeg.toList, 20, 50, 0.85f)
val keywordsFilter = keywords.toArray.filter(word => word._1.length >= 2)
val result = doc2vecWithModel_weight_beyes(keywordsFilter, w2vModel, modelSize)
result
}
// ??word2vec model ???????? ,??????, ????????
private def doc2vecWithModel_weight_beyes(doc: Array[(String, Float)], model:Word2VecModel, modelSize: Int): Array[Double] = {
var resultTemp = new Array[Double](modelSize)
var wordTemp = new Array[Double](modelSize)
doc.foreach(word => {
try {
wordTemp = model.transform(word._1).toArray
}
catch {
case e: Exception => wordTemp = Vector.zeros[Double](modelSize).toArray
}
for (i <- resultTemp.indices){
resultTemp(i) += wordTemp(i) * word._2
}
})
val docVec = resultTemp.map(_+70)
docVec
}
}
示例3: Word2VecExample
//设置package包名称以及导入依赖的类
package org.apache.spark.examples.mllib
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.feature.Word2VecModel
import org.apache.spark.mllib.feature.Word2Vec
object Word2VecExample {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setMaster("local[2]").setAppName("Word2VecExample")
val sc = new SparkContext(sparkConf)
val input = sc.textFile("../data/mllib/text8").map(line => line.split(" ").toSeq)
val word2vec = new Word2Vec()
//fit()???DataFrame?????Transformer???
val model = word2vec.fit(input)
val synonyms = model.findSynonyms("china", 40)
for ((synonym, cosineSimilarity) <- synonyms) {
//?????
println(s"$synonym $cosineSimilarity")
}
// Save and load model
//model.save(sc, "myModelPath")
//val sameModel = Word2VecModel.load(sc, "myModelPath")
}
}