当前位置: 首页>>代码示例>>Java>>正文


Java VocabCache类代码示例

本文整理汇总了Java中org.deeplearning4j.models.word2vec.wordstore.VocabCache的典型用法代码示例。如果您正苦于以下问题:Java VocabCache类的具体用法?Java VocabCache怎么用?Java VocabCache使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


VocabCache类属于org.deeplearning4j.models.word2vec.wordstore包,在下文中一共展示了VocabCache类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: fromPair

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * Load word vectors from the given pair
 *
 * @param pair
 *            the given pair
 * @return a read only word vectors impl based on the given lookup table and vocab
 */
public static Word2Vec fromPair(Pair<InMemoryLookupTable, VocabCache> pair) {
    Word2Vec vectors = new Word2Vec();
    vectors.setLookupTable(pair.getFirst());
    vectors.setVocab(pair.getSecond());
    vectors.setModelUtils(new BasicModelUtils());
    return vectors;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:15,代码来源:WordVectorSerializer.java

示例2: writeSequenceVectors

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * This method saves specified SequenceVectors model to target  OutputStream
 *
 * @param vectors SequenceVectors model
 * @param factory SequenceElementFactory implementation for your objects
 * @param stream Target output stream
 * @param <T>
 */
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
                @NonNull SequenceElementFactory<T> factory, @NonNull OutputStream stream) throws IOException {
    WeightLookupTable<T> lookupTable = vectors.getLookupTable();
    VocabCache<T> vocabCache = vectors.getVocab();

    PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));

    // at first line we save VectorsConfiguration
    writer.write(vectors.getConfiguration().toEncodedJson());

    // now we have elements one by one
    for (int x = 0; x < vocabCache.numWords(); x++) {
        T element = vocabCache.elementAtIndex(x);
        String json = factory.serialize(element);
        INDArray d = Nd4j.create(1);
        double[] vector = lookupTable.vector(element.getLabel()).dup().data().asDouble();
        ElementPair pair = new ElementPair(json, vector);
        writer.println(pair.toEncodedJson());
        writer.flush();
    }
    writer.flush();
    writer.close();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:32,代码来源:WordVectorSerializer.java

示例3: readVocabCache

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * This method reads vocab cache from provided InputStream.
 * Please note: it reads only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
 *
 * @param stream
 * @return
 * @throws IOException
 */
public static VocabCache<VocabWord> readVocabCache(@NonNull InputStream stream) throws IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
    AbstractCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();

    VocabWordFactory factory = new VocabWordFactory();

    String line = "";
    while ((line = reader.readLine()) != null) {
        VocabWord word = factory.deserialize(line);

        vocabCache.addToken(word);
        vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
    }

    return vocabCache;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:25,代码来源:WordVectorSerializer.java

示例4: configure

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;
    this.configuration = configuration;

    cbow.configure(vocabCache, lookupTable, configuration);

    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.sampling = configuration.getSampling();

    this.syn0 = ((InMemoryLookupTable<T>) lookupTable).getSyn0();
    this.syn1 = ((InMemoryLookupTable<T>) lookupTable).getSyn1();
    this.syn1Neg = ((InMemoryLookupTable<T>) lookupTable).getSyn1Neg();
    this.expTable = ((InMemoryLookupTable<T>) lookupTable).getExpTable();
    this.table = ((InMemoryLookupTable<T>) lookupTable).getTable();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:DM.java

示例5: weights

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
private static INDArray weights(GloveWeightLookupTable glove, Map<String, float[]> data, VocabCache vocab) {
    INDArray ret = Nd4j.create(data.size(), glove.layerSize());

    for (Map.Entry<String, float[]> entry : data.entrySet()) {
        String key = entry.getKey();
        INDArray row = Nd4j.create(Nd4j.createBuffer(entry.getValue()));
        if (row.length() != glove.layerSize())
            continue;
        if (vocab.indexOf(key) >= data.size())
            continue;
        if (vocab.indexOf(key) < 0)
            continue;
        ret.putRow(vocab.indexOf(key), row);
    }
    return ret;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:GloveWeightLookupTable.java

示例6: getSyn0Vector

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public INDArray getSyn0Vector(Integer wordIndex, VocabCache<VocabWord> vocabCache) {
    if (!workers.contains(Thread.currentThread().getId()))
        workers.add(Thread.currentThread().getId());

    VocabWord word = vocabCache.elementAtIndex(wordIndex);

    if (!indexSyn0VecMap.containsKey(word)) {
        synchronized (this) {
            if (!indexSyn0VecMap.containsKey(word)) {
                indexSyn0VecMap.put(word, getRandomSyn0Vec(vectorLength.get(), wordIndex));
            }
        }
    }

    return indexSyn0VecMap.get(word);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:VocabHolder.java

示例7: testGlove

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
    Glove glove = new Glove(true, 5, 100);
    JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
                    .map(new Function<String, String>() {
                        @Override
                        public String call(String s) throws Exception {
                            return s.toLowerCase();
                        }
                    });


    Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
    WordVectors vectors = WordVectorSerializer
                    .fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
    Collection<String> words = vectors.wordsNearest("day", 20);
    assertTrue(words.contains("week"));
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:GloveTest.java

示例8: buildShallowVocabCache

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * This method builds shadow vocabulary and huffman tree
 *
 * @param counter
 * @return
 */
protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<Long> counter) {

    // TODO: need simplified cache here, that will operate on Long instead of string labels
    VocabCache<ShallowSequenceElement> vocabCache = new AbstractCache<>();
    for (Long id : counter.keySet()) {
        ShallowSequenceElement shallowElement = new ShallowSequenceElement(counter.getCount(id), id);
        vocabCache.addToken(shallowElement);
    }

    // building huffman tree
    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();
    huffman.applyIndexes(vocabCache);

    return vocabCache;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:23,代码来源:SparkSequenceVectors.java

示例9: writeWordVectors

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * This mehod writes word vectors to the given OutputStream.
 * Please note: this method doesn't load whole vocab/lookupTable into memory, so it's able to process large vocabularies served over network.
 *
 * @param lookupTable
 * @param stream
 * @param <T>
 * @throws IOException
 */
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable,
                OutputStream stream) throws IOException {
    VocabCache<T> vocabCache = lookupTable.getVocabCache();

    PrintWriter writer = new PrintWriter(new OutputStreamWriter(stream, "UTF-8"));
    // saving header as "NUM_WORDS VECTOR_SIZE NUM_DOCS"
    String str = vocabCache.numWords() + " " + lookupTable.layerSize() + " " + vocabCache.totalNumberOfDocs();
    log.debug("Saving header: {}", str);
    writer.println(str);

    // saving vocab content
    for (int x = 0; x < vocabCache.numWords(); x++) {
        T element = vocabCache.elementAtIndex(x);

        StringBuilder builder = new StringBuilder();

        builder.append(encodeB64(element.getLabel())).append(" ");
        INDArray vec = lookupTable.vector(element.getLabel());
        for (int i = 0; i < vec.length(); i++) {
            builder.append(vec.getDouble(i));
            if (i < vec.length() - 1)
                builder.append(" ");
        }
        writer.println(builder.toString());
    }
    writer.flush();
    writer.close();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:38,代码来源:WordVectorSerializer.java

示例10: writeVocabCache

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * This method saves vocab cache to provided OutputStream.
 * Please note: it saves only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
 *
 * @param vocabCache
 * @param stream
 * @throws UnsupportedEncodingException
 */
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull OutputStream stream)
                throws IOException {
    PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));

    for (int x = 0; x < vocabCache.numWords(); x++) {
        VocabWord word = vocabCache.elementAtIndex(x);
        writer.println(word.toJSON());
    }

    writer.flush();
    writer.close();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:WordVectorSerializer.java

示例11: configure

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;
    this.configuration = configuration;

    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.sampling = configuration.getSampling();

    if (configuration.getNegative() > 0) {
        if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
            logger.info("Initializing syn1Neg...");
            ((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
            ((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
            ((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
        }
    }


    this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
    this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
    this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
    this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
    this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
    this.variableWindows = configuration.getVariableWindows();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:30,代码来源:CBOW.java

示例12: configure

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
 * SkipGram initialization over given vocabulary and WeightLookupTable
 *
 * @param vocabCache
 * @param lookupTable
 * @param configuration
 */
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;
    this.configuration = configuration;

    if (configuration.getNegative() > 0) {
        if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
            log.info("Initializing syn1Neg...");
            ((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
            ((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
            ((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
        }
    }

    this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
    this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
    this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
    this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
    this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());



    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.sampling = configuration.getSampling();
    this.variableWindows = configuration.getVariableWindows();

    this.vectorLength = configuration.getLayersSize();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:40,代码来源:SkipGram.java

示例13: configure

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;

    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.configuration = configuration;

    skipGram.configure(vocabCache, lookupTable, configuration);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:DBOW.java

示例14: InMemoryLookupTable

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public InMemoryLookupTable(VocabCache<T> vocab, int vectorLength, boolean useAdaGrad, double lr, Random gen,
                double negative) {
    this.vocab = vocab;
    this.vectorLength = vectorLength;
    this.useAdaGrad = useAdaGrad;
    this.lr.set(lr);
    this.rng = gen;
    this.negative = negative;
    initExpTable();

    if (useAdaGrad) {
        initAdaGrad();
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:15,代码来源:InMemoryLookupTable.java

示例15: ASCIICoOccurrenceReader

import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public ASCIICoOccurrenceReader(@NonNull File file, @NonNull VocabCache<T> vocabCache) {
    this.vocabCache = vocabCache;
    this.file = file;
    try {
        iterator = new PrefetchingSentenceIterator.Builder(new BasicLineIterator(file)).build();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:10,代码来源:ASCIICoOccurrenceReader.java


注:本文中的org.deeplearning4j.models.word2vec.wordstore.VocabCache类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。