当前位置: 首页>>代码示例>>Java>>正文


Java WordVectors.getWordVectorMatrix方法代码示例

本文整理汇总了Java中org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVectorMatrix方法的典型用法代码示例。如果您正苦于以下问题:Java WordVectors.getWordVectorMatrix方法的具体用法?Java WordVectors.getWordVectorMatrix怎么用?Java WordVectors.getWordVectorMatrix使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.deeplearning4j.models.embeddings.wordvectors.WordVectors的用法示例。


在下文中一共展示了WordVectors.getWordVectorMatrix方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testWriteWordVectorsFromWord2Vec

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testWriteWordVectorsFromWord2Vec() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    WordVectorSerializer.writeWordVectors((Word2Vec) vec, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    INDArray wordVector1 = wordVectors.getWordVectorMatrix("Morgan_Freeman");
    INDArray wordVector2 = wordVectors.getWordVectorMatrix("JA_Montalbano");
    assertEquals(vec.getWordVectorMatrix("Morgan_Freeman"), wordVector1);
    assertEquals(vec.getWordVectorMatrix("JA_Montalbano"), wordVector2);
    assertTrue(wordVector1.length() == 300);
    assertTrue(wordVector2.length() == 300);
    assertEquals(wordVector1.getDouble(0), 0.044423, 1e-3);
    assertEquals(wordVector2.getDouble(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例2: testStaticLoaderArchive

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * This method tests ZIP file loading as static model
 *
 * @throws Exception
 */
@Test
public void testStaticLoaderArchive() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:WordVectorSerializerTest.java

示例3: testUnifiedLoaderArchive1

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testUnifiedLoaderArchive1() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:WordVectorSerializerTest.java

示例4: testUnifiedLoaderArchive2

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testUnifiedLoaderArchive2() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:18,代码来源:WordVectorSerializerTest.java

示例5: testUnifiedLoaderText

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * This method tests CSV file loading via unified loader
 *
 * @throws Exception
 */
@Test
public void testUnifiedLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    // we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:22,代码来源:WordVectorSerializerTest.java

示例6: windows

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * Constructs a list of window of size windowSize.
 * Note that padding for each window is created as well.
 * @param words the words to tokenize and construct windows from
 * @param tokenizerFactory tokenizer factory to use
 * @param windowSize the window size to generate
 * @return the list of windows for the tokenized string
 */
public static List<Window> windows(String words, @NonNull TokenizerFactory tokenizerFactory, int windowSize,
                WordVectors vectors) {
    Tokenizer tokenizer = tokenizerFactory.create(words);
    List<String> list = new ArrayList<>();
    while (tokenizer.hasMoreTokens()) {
        String token = tokenizer.nextToken();

        // if we don't have UNK word defined - we have to skip this word
        if (vectors.getWordVectorMatrix(token) != null)
            list.add(token);
    }

    if (list.isEmpty())
        throw new IllegalStateException("No tokens found for windows");

    return windows(list, windowSize);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:26,代码来源:Windows.java

示例7: testStaticLoaderBinary

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * This method tests binary file loading as static model
 *
 * @throws Exception
 */
@Test
public void testStaticLoaderBinary() throws Exception {

    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(binaryFile);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:20,代码来源:WordVectorSerializerTest.java

示例8: testStaticLoaderText

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * This method tests CSV file loading as static model
 *
 * @throws Exception
 */
@Test
public void testStaticLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(textFile);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:WordVectorSerializerTest.java

示例9: testUnifiedLoaderBinary

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * This method tests binary file loading via unified loader
 *
 * @throws Exception
 */
@Test
public void testUnifiedLoaderBinary() throws Exception {

    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    WordVectors vectorsStatic = WordVectorSerializer.readWord2VecModel(binaryFile, false);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:20,代码来源:WordVectorSerializerTest.java

示例10: testIndexPersistence

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testIndexPersistence() throws Exception {
    File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
    SentenceIterator iter = UimaSentenceIterator.createWithPath(inputFile.getAbsolutePath());
    // Split on white spaces in the line to get words
    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100)
                    .stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5).seed(42).windowSize(5)
                    .iterate(iter).tokenizerFactory(t).build();

    vec.fit();

    VocabCache orig = vec.getVocab();

    File tempFile = File.createTempFile("temp", "w2v");
    tempFile.deleteOnExit();

    WordVectorSerializer.writeWordVectors(vec, tempFile);

    WordVectors vec2 = WordVectorSerializer.loadTxtVectors(tempFile);

    VocabCache rest = vec2.vocab();

    assertEquals(orig.totalNumberOfDocs(), rest.totalNumberOfDocs());

    for (VocabWord word : vec.getVocab().vocabWords()) {
        INDArray array1 = vec.getWordVectorMatrix(word.getLabel());
        INDArray array2 = vec2.getWordVectorMatrix(word.getLabel());

        assertEquals(array1, array2);
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:35,代码来源:WordVectorSerializerTest.java

示例11: testOutputStream

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testOutputStream() throws Exception {
    File file = File.createTempFile("tmp_ser", "ssa");
    file.deleteOnExit();

    File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
    SentenceIterator iter = new BasicLineIterator(inputFile);
    // Split on white spaces in the line to get words
    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    InMemoryLookupCache cache = new InMemoryLookupCache(false);
    WeightLookupTable table = new InMemoryLookupTable.Builder().vectorLength(100).useAdaGrad(false).negative(5.0)
                    .cache(cache).lr(0.025f).build();

    Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100)
                    .lookupTable(table).stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5)
                    .vocabCache(cache).seed(42)
                    //                .workers(6)
                    .windowSize(5).iterate(iter).tokenizerFactory(t).build();

    assertEquals(new ArrayList<String>(), vec.getStopWords());
    vec.fit();

    INDArray day1 = vec.getWordVectorMatrix("day");

    WordVectorSerializer.writeWordVectors(vec, new FileOutputStream(file));

    WordVectors vec2 = WordVectorSerializer.loadTxtVectors(file);

    INDArray day2 = vec2.getWordVectorMatrix("day");

    assertEquals(day1, day2);

    File tempFile = File.createTempFile("tetsts", "Fdfs");
    tempFile.deleteOnExit();

    WordVectorSerializer.writeWord2VecModel(vec, tempFile);

    Word2Vec vec3 = WordVectorSerializer.readWord2VecModel(tempFile);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:42,代码来源:WordVectorSerializerTest.java


注:本文中的org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVectorMatrix方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。