本文整理汇总了Java中org.apache.spark.mllib.feature.Word2VecModel类的典型用法代码示例。如果您正苦于以下问题:Java Word2VecModel类的具体用法?Java Word2VecModel怎么用?Java Word2VecModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Word2VecModel类属于org.apache.spark.mllib.feature包,在下文中一共展示了Word2VecModel类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.mllib.feature.Word2VecModel; //导入依赖的package包/类
public static void main(String[] args) {
String logFile = "/home/anoukh/SentimentAnalysis/New Files/distinctTweetChunk.csv"; // Should be some file on your system
SparkConf conf = new SparkConf().setAppName("TwiiterSentiment").setMaster("local").set("spark.executor.memory", "8G")
.set("spark.driver.maxResultSize", "16G");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> tweetText = TwitterUtils.loadTwitterData(sc, logFile);
// JavaRDD<String> tweetText = sc.textFile(logFile).cache();
List<String> collectedList = tweetText.collect();
for (String value : collectedList) {
System.out.println(value);
}
JavaRDD<List> splittedTokens = tweetText.map(new Function<String, List>() {
@Override
public List call(String s) {
ArrayList<String> list = new ArrayList<String>();
Collections.addAll(list, s.split(" "));
return list;
}
});
Word2Vec word2vec = new Word2Vec().setVectorSize(10);
Word2VecModel model = word2vec.fit(splittedTokens);
System.out.println(model.getVectors().size());
model.save(sc.sc(), "uniqueTweet.model" + System.currentTimeMillis());
}
示例2: run
import org.apache.spark.mllib.feature.Word2VecModel; //导入依赖的package包/类
private void run(CompositeConfiguration conf) {
// Kafka props
String kafkaBrokers = conf.getString("metadata.broker.list");
String topics = conf.getString("consumer.topic");
String fromOffset = conf.getString("auto.offset.reset");
// Spark props
String sparkMaster = conf.getString("spark.master");
String sparkSerDe = conf.getString("spark.serializer");
long sparkStreamDuration = conf.getLong("stream.duration");
SparkConf sparkConf = new SparkConf().setAppName("Kafka Spark ES Flow with Java API").setMaster(sparkMaster).set("spark.serializer",
sparkSerDe);
JavaSparkContext sp = new JavaSparkContext(sparkConf);
JavaStreamingContext jssc = new JavaStreamingContext(sp, Durations.seconds(sparkStreamDuration));
SQLContext sqlContext = new SQLContext(sp);
H2OContext h2oContext = new H2OContext(sp.sc());
h2oContext.start();
HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
HashMap<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", kafkaBrokers);
kafkaParams.put("auto.offset.reset", fromOffset);
CraigslistJobTitlesApp staticApp = new CraigslistJobTitlesApp(craigslistJobTitles, sp.sc(), sqlContext, h2oContext);
try {
final Tuple2<Model<?, ?, ?>, Word2VecModel> tModel = staticApp.buildModels(craigslistJobTitles, "initialModel");
// final Tuple2<Model<?, ?, ?>, Word2VecModel> tModel = importModels(h2oModelFolder, word2VecModelFolder, sp.sc());
// final Model<?, ?, ?> tModel1 = importH2OModel(h2oModelFolder1);
final String modelId = tModel._1()._key.toString();
final Word2VecModel w2vModel = tModel._2();
// exportModels(tModel._1(), w2vModel, sp.sc());
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
// Classify incoming messages
messages.map(mesage -> mesage._2()).filter(str -> !str.isEmpty())
.map(jobTitle -> staticApp.classify(jobTitle, modelId, w2vModel))
.map(pred -> new StringBuilder(100).append('\"').append(pred._1()).append("\" = ").append(Arrays.toString(pred._2())))
.print();
// messages.map(mesage -> mesage._2()).filter(str -> !str.isEmpty())
// .map(jobTitle -> tModel1.score(new H2OFrame(jobTitle)))
// .map(pred -> pred._names)
// .print();
jssc.start();
jssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
} finally {
jssc.stop();
staticApp.shutdown();
}
}
示例3: importModels
import org.apache.spark.mllib.feature.Word2VecModel; //导入依赖的package包/类
private Tuple2<Model<?, ?, ?>, Word2VecModel> importModels(String h2oModelFolder, String word2VecModelFolder, SparkContext sc) {
return new Tuple2<Model<?, ?, ?>, Word2VecModel>(importH2OModel(h2oModelFolder), Word2VecModel.load(sc, word2VecModelFolder));
}
示例4: exportModels
import org.apache.spark.mllib.feature.Word2VecModel; //导入依赖的package包/类
private void exportModels(Model h2oModel, String h2oModelFolder, Word2VecModel w2vModel, String word2VecModelFolder, SparkContext sc) {
exportH2OModel(h2oModel, h2oModelFolder);
w2vModel.save(sc, word2VecModelFolder);
}