本文整理汇总了Java中org.deeplearning4j.models.word2vec.wordstore.VocabCache.vocabWords方法的典型用法代码示例。如果您正苦于以下问题:Java VocabCache.vocabWords方法的具体用法?Java VocabCache.vocabWords怎么用?Java VocabCache.vocabWords使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.deeplearning4j.models.word2vec.wordstore.VocabCache
的用法示例。
在下文中一共展示了VocabCache.vocabWords方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildShallowVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
* This method builds shadow vocabulary and huffman tree
*
* @param counter
* @return
*/
protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<Long> counter) {
// TODO: need simplified cache here, that will operate on Long instead of string labels
VocabCache<ShallowSequenceElement> vocabCache = new AbstractCache<>();
for (Long id : counter.keySet()) {
ShallowSequenceElement shallowElement = new ShallowSequenceElement(counter.getCount(id), id);
vocabCache.addToken(shallowElement);
}
// building huffman tree
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
huffman.applyIndexes(vocabCache);
return vocabCache;
}
示例2: writeWordVectors
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
* This method saves paragraph vectors to the given output stream.
*
* @param vectors
* @param stream
*/
@Deprecated
public static void writeWordVectors(ParagraphVectors vectors, OutputStream stream) {
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream, "UTF-8"))) {
/*
This method acts similary to w2v csv serialization, except of additional tag for labels
*/
VocabCache<VocabWord> vocabCache = vectors.getVocab();
for (VocabWord word : vocabCache.vocabWords()) {
StringBuilder builder = new StringBuilder();
builder.append(word.isLabel() ? "L" : "E").append(" ");
builder.append(word.getLabel().replaceAll(" ", whitespaceReplacement)).append(" ");
INDArray vector = vectors.getWordVectorMatrix(word.getLabel());
for (int j = 0; j < vector.length(); j++) {
builder.append(vector.getDouble(j));
if (j < vector.length() - 1) {
builder.append(" ");
}
}
writer.write(builder.append("\n").toString());
}
writer.flush();
writer.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
示例3: importVocabulary
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
* This method imports all elements from VocabCache passed as argument
* If element already exists,
*
* @param vocabCache
*/
public void importVocabulary(@NonNull VocabCache<T> vocabCache) {
for (T element : vocabCache.vocabWords()) {
this.addToken(element);
}
//logger.info("Current state: {}; Adding value: {}", this.documentsCounter.get(), vocabCache.totalNumberOfDocs());
this.documentsCounter.addAndGet(vocabCache.totalNumberOfDocs());
}
示例4: importVocabulary
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Override
public void importVocabulary(VocabCache<VocabWord> vocabCache) {
for (VocabWord word : vocabCache.vocabWords()) {
if (vocabs.containsKey(word.getLabel())) {
wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
} else {
tokens.put(word.getLabel(), word);
vocabs.put(word.getLabel(), word);
wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
}
totalWordOccurrences.addAndGet((long) word.getElementFrequency());
}
}
示例5: testSyn0AfterFirstIteration
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testSyn0AfterFirstIteration() throws Exception {
JavaSparkContext sc = getContext();
JavaRDD<String> corpusRDD = getCorpusRDD(sc);
// word2vec.setRemoveStop(false);
Broadcast<Map<String, Object>> broadcastTokenizerVarMap = sc.broadcast(word2vec.getTokenizerVarMap());
TextPipeline pipeline = new TextPipeline(corpusRDD, broadcastTokenizerVarMap);
pipeline.buildVocabCache();
pipeline.buildVocabWordListRDD();
VocabCache<VocabWord> vocabCache = pipeline.getVocabCache();
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
// Get total word count and put into word2vec variable map
Map<String, Object> word2vecVarMap = word2vec.getWord2vecVarMap();
word2vecVarMap.put("totalWordCount", pipeline.getTotalWordCount());
double[] expTable = word2vec.getExpTable();
JavaRDD<AtomicLong> sentenceCountRDD = pipeline.getSentenceCountRDD();
JavaRDD<List<VocabWord>> vocabWordListRDD = pipeline.getVocabWordListRDD();
CountCumSum countCumSum = new CountCumSum(sentenceCountRDD);
JavaRDD<Long> sentenceCountCumSumRDD = countCumSum.buildCumSum();
JavaPairRDD<List<VocabWord>, Long> vocabWordListSentenceCumSumRDD =
vocabWordListRDD.zip(sentenceCountCumSumRDD);
Broadcast<Map<String, Object>> word2vecVarMapBroadcast = sc.broadcast(word2vecVarMap);
Broadcast<double[]> expTableBroadcast = sc.broadcast(expTable);
FirstIterationFunction firstIterationFunction = new FirstIterationFunction(word2vecVarMapBroadcast,
expTableBroadcast, pipeline.getBroadCastVocabCache());
JavaRDD<Pair<VocabWord, INDArray>> pointSyn0Vec = vocabWordListSentenceCumSumRDD
.mapPartitions(firstIterationFunction).map(new MapToPairFunction());
}
示例6: testHasMoreObjects1
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testHasMoreObjects1() throws Exception {
File tempFile = File.createTempFile("tmp", "tmp");
tempFile.deleteOnExit();
VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
VocabWord word1 = new VocabWord(1.0, "human");
VocabWord word2 = new VocabWord(2.0, "animal");
VocabWord word3 = new VocabWord(3.0, "unknown");
vocabCache.addToken(word1);
vocabCache.addToken(word2);
vocabCache.addToken(word3);
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
huffman.applyIndexes(vocabCache);
BinaryCoOccurrenceWriter<VocabWord> writer = new BinaryCoOccurrenceWriter<>(tempFile);
CoOccurrenceWeight<VocabWord> object1 = new CoOccurrenceWeight<>();
object1.setElement1(word1);
object1.setElement2(word2);
object1.setWeight(3.14159265);
writer.writeObject(object1);
CoOccurrenceWeight<VocabWord> object2 = new CoOccurrenceWeight<>();
object2.setElement1(word2);
object2.setElement2(word3);
object2.setWeight(0.197);
writer.writeObject(object2);
writer.finish();
BinaryCoOccurrenceReader<VocabWord> reader = new BinaryCoOccurrenceReader<>(tempFile, vocabCache, null);
CoOccurrenceWeight<VocabWord> r1 = reader.nextObject();
log.info("Object received: " + r1);
assertNotEquals(null, r1);
r1 = reader.nextObject();
log.info("Object received: " + r1);
assertNotEquals(null, r1);
}
示例7: testHasMoreObjects2
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testHasMoreObjects2() throws Exception {
File tempFile = File.createTempFile("tmp", "tmp");
tempFile.deleteOnExit();
VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
VocabWord word1 = new VocabWord(1.0, "human");
VocabWord word2 = new VocabWord(2.0, "animal");
VocabWord word3 = new VocabWord(3.0, "unknown");
vocabCache.addToken(word1);
vocabCache.addToken(word2);
vocabCache.addToken(word3);
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
huffman.applyIndexes(vocabCache);
BinaryCoOccurrenceWriter<VocabWord> writer = new BinaryCoOccurrenceWriter<>(tempFile);
CoOccurrenceWeight<VocabWord> object1 = new CoOccurrenceWeight<>();
object1.setElement1(word1);
object1.setElement2(word2);
object1.setWeight(3.14159265);
writer.writeObject(object1);
CoOccurrenceWeight<VocabWord> object2 = new CoOccurrenceWeight<>();
object2.setElement1(word2);
object2.setElement2(word3);
object2.setWeight(0.197);
writer.writeObject(object2);
CoOccurrenceWeight<VocabWord> object3 = new CoOccurrenceWeight<>();
object3.setElement1(word1);
object3.setElement2(word3);
object3.setWeight(0.001);
writer.writeObject(object3);
writer.finish();
BinaryCoOccurrenceReader<VocabWord> reader = new BinaryCoOccurrenceReader<>(tempFile, vocabCache, null);
CoOccurrenceWeight<VocabWord> r1 = reader.nextObject();
log.info("Object received: " + r1);
assertNotEquals(null, r1);
r1 = reader.nextObject();
log.info("Object received: " + r1);
assertNotEquals(null, r1);
r1 = reader.nextObject();
log.info("Object received: " + r1);
assertNotEquals(null, r1);
}