当前位置: 首页>>代码示例>>Java>>正文


Java VocabCache.vocabWords方法代码示例

本文整理汇总了Java中org.deeplearning4j.models.word2vec.wordstore.VocabCache.vocabWords方法的典型用法代码示例。如果您正苦于以下问题:Java VocabCache.vocabWords方法的具体用法?Java VocabCache.vocabWords怎么用?Java VocabCache.vocabWords使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.deeplearning4j.models.word2vec.wordstore.VocabCache的用法示例。


在下文中一共展示了VocabCache.vocabWords方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildShallowVocabCache

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
 * This method builds shadow vocabulary and huffman tree
 *
 * @param counter
 * @return
 */
protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<Long> counter) {

    // TODO: need simplified cache here, that will operate on Long instead of string labels
    VocabCache<ShallowSequenceElement> vocabCache = new AbstractCache<>();
    for (Long id : counter.keySet()) {
        ShallowSequenceElement shallowElement = new ShallowSequenceElement(counter.getCount(id), id);
        vocabCache.addToken(shallowElement);
    }

    // building huffman tree
    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();
    huffman.applyIndexes(vocabCache);

    return vocabCache;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:23,代码来源:SparkSequenceVectors.java

示例2: writeWordVectors

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
 * This method saves paragraph vectors to the given output stream.
 *
 * @param vectors
 * @param stream
 */
@Deprecated
public static void writeWordVectors(ParagraphVectors vectors, OutputStream stream) {

    try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream, "UTF-8"))) {
        /*
        This method acts similary to w2v csv serialization, except of additional tag for labels
         */

        VocabCache<VocabWord> vocabCache = vectors.getVocab();
        for (VocabWord word : vocabCache.vocabWords()) {
            StringBuilder builder = new StringBuilder();

            builder.append(word.isLabel() ? "L" : "E").append(" ");
            builder.append(word.getLabel().replaceAll(" ", whitespaceReplacement)).append(" ");

            INDArray vector = vectors.getWordVectorMatrix(word.getLabel());
            for (int j = 0; j < vector.length(); j++) {
                builder.append(vector.getDouble(j));
                if (j < vector.length() - 1) {
                    builder.append(" ");
                }
            }

            writer.write(builder.append("\n").toString());
        }

        writer.flush();
        writer.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:39,代码来源:WordVectorSerializer.java

示例3: importVocabulary

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
/**
 * This method imports all elements from VocabCache passed as argument
 * If element already exists,
 *
 * @param vocabCache
 */
public void importVocabulary(@NonNull VocabCache<T> vocabCache) {
    for (T element : vocabCache.vocabWords()) {
        this.addToken(element);
    }
    //logger.info("Current state: {}; Adding value: {}", this.documentsCounter.get(), vocabCache.totalNumberOfDocs());
    this.documentsCounter.addAndGet(vocabCache.totalNumberOfDocs());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:AbstractCache.java

示例4: importVocabulary

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Override
public void importVocabulary(VocabCache<VocabWord> vocabCache) {
    for (VocabWord word : vocabCache.vocabWords()) {
        if (vocabs.containsKey(word.getLabel())) {
            wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
        } else {
            tokens.put(word.getLabel(), word);
            vocabs.put(word.getLabel(), word);
            wordFrequencies.incrementCount(word.getLabel(), (float) word.getElementFrequency());
        }
        totalWordOccurrences.addAndGet((long) word.getElementFrequency());
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:InMemoryLookupCache.java

示例5: testSyn0AfterFirstIteration

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testSyn0AfterFirstIteration() throws Exception {
    JavaSparkContext sc = getContext();
    JavaRDD<String> corpusRDD = getCorpusRDD(sc);
    //  word2vec.setRemoveStop(false);
    Broadcast<Map<String, Object>> broadcastTokenizerVarMap = sc.broadcast(word2vec.getTokenizerVarMap());

    TextPipeline pipeline = new TextPipeline(corpusRDD, broadcastTokenizerVarMap);
    pipeline.buildVocabCache();
    pipeline.buildVocabWordListRDD();
    VocabCache<VocabWord> vocabCache = pipeline.getVocabCache();
    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();

    // Get total word count and put into word2vec variable map
    Map<String, Object> word2vecVarMap = word2vec.getWord2vecVarMap();
    word2vecVarMap.put("totalWordCount", pipeline.getTotalWordCount());
    double[] expTable = word2vec.getExpTable();

    JavaRDD<AtomicLong> sentenceCountRDD = pipeline.getSentenceCountRDD();
    JavaRDD<List<VocabWord>> vocabWordListRDD = pipeline.getVocabWordListRDD();

    CountCumSum countCumSum = new CountCumSum(sentenceCountRDD);
    JavaRDD<Long> sentenceCountCumSumRDD = countCumSum.buildCumSum();

    JavaPairRDD<List<VocabWord>, Long> vocabWordListSentenceCumSumRDD =
                    vocabWordListRDD.zip(sentenceCountCumSumRDD);

    Broadcast<Map<String, Object>> word2vecVarMapBroadcast = sc.broadcast(word2vecVarMap);
    Broadcast<double[]> expTableBroadcast = sc.broadcast(expTable);

    FirstIterationFunction firstIterationFunction = new FirstIterationFunction(word2vecVarMapBroadcast,
                    expTableBroadcast, pipeline.getBroadCastVocabCache());
    JavaRDD<Pair<VocabWord, INDArray>> pointSyn0Vec = vocabWordListSentenceCumSumRDD
                    .mapPartitions(firstIterationFunction).map(new MapToPairFunction());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:37,代码来源:TextPipelineTest.java

示例6: testHasMoreObjects1

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testHasMoreObjects1() throws Exception {
    File tempFile = File.createTempFile("tmp", "tmp");
    tempFile.deleteOnExit();

    VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();

    VocabWord word1 = new VocabWord(1.0, "human");
    VocabWord word2 = new VocabWord(2.0, "animal");
    VocabWord word3 = new VocabWord(3.0, "unknown");

    vocabCache.addToken(word1);
    vocabCache.addToken(word2);
    vocabCache.addToken(word3);

    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();
    huffman.applyIndexes(vocabCache);


    BinaryCoOccurrenceWriter<VocabWord> writer = new BinaryCoOccurrenceWriter<>(tempFile);

    CoOccurrenceWeight<VocabWord> object1 = new CoOccurrenceWeight<>();
    object1.setElement1(word1);
    object1.setElement2(word2);
    object1.setWeight(3.14159265);

    writer.writeObject(object1);

    CoOccurrenceWeight<VocabWord> object2 = new CoOccurrenceWeight<>();
    object2.setElement1(word2);
    object2.setElement2(word3);
    object2.setWeight(0.197);

    writer.writeObject(object2);

    writer.finish();

    BinaryCoOccurrenceReader<VocabWord> reader = new BinaryCoOccurrenceReader<>(tempFile, vocabCache, null);


    CoOccurrenceWeight<VocabWord> r1 = reader.nextObject();
    log.info("Object received: " + r1);
    assertNotEquals(null, r1);

    r1 = reader.nextObject();
    log.info("Object received: " + r1);
    assertNotEquals(null, r1);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:50,代码来源:BinaryCoOccurrenceReaderTest.java

示例7: testHasMoreObjects2

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入方法依赖的package包/类
@Test
public void testHasMoreObjects2() throws Exception {
    File tempFile = File.createTempFile("tmp", "tmp");
    tempFile.deleteOnExit();

    VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();

    VocabWord word1 = new VocabWord(1.0, "human");
    VocabWord word2 = new VocabWord(2.0, "animal");
    VocabWord word3 = new VocabWord(3.0, "unknown");

    vocabCache.addToken(word1);
    vocabCache.addToken(word2);
    vocabCache.addToken(word3);

    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();
    huffman.applyIndexes(vocabCache);


    BinaryCoOccurrenceWriter<VocabWord> writer = new BinaryCoOccurrenceWriter<>(tempFile);

    CoOccurrenceWeight<VocabWord> object1 = new CoOccurrenceWeight<>();
    object1.setElement1(word1);
    object1.setElement2(word2);
    object1.setWeight(3.14159265);

    writer.writeObject(object1);

    CoOccurrenceWeight<VocabWord> object2 = new CoOccurrenceWeight<>();
    object2.setElement1(word2);
    object2.setElement2(word3);
    object2.setWeight(0.197);

    writer.writeObject(object2);

    CoOccurrenceWeight<VocabWord> object3 = new CoOccurrenceWeight<>();
    object3.setElement1(word1);
    object3.setElement2(word3);
    object3.setWeight(0.001);

    writer.writeObject(object3);

    writer.finish();

    BinaryCoOccurrenceReader<VocabWord> reader = new BinaryCoOccurrenceReader<>(tempFile, vocabCache, null);


    CoOccurrenceWeight<VocabWord> r1 = reader.nextObject();
    log.info("Object received: " + r1);
    assertNotEquals(null, r1);

    r1 = reader.nextObject();
    log.info("Object received: " + r1);
    assertNotEquals(null, r1);

    r1 = reader.nextObject();
    log.info("Object received: " + r1);
    assertNotEquals(null, r1);

}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:62,代码来源:BinaryCoOccurrenceReaderTest.java


注:本文中的org.deeplearning4j.models.word2vec.wordstore.VocabCache.vocabWords方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。