当前位置: 首页>>代码示例>>Java>>正文


Java InMemoryLookupTable类代码示例

本文整理汇总了Java中org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable的典型用法代码示例。如果您正苦于以下问题:Java InMemoryLookupTable类的具体用法?Java InMemoryLookupTable怎么用?Java InMemoryLookupTable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


InMemoryLookupTable类属于org.deeplearning4j.models.embeddings.inmemory包,在下文中一共展示了InMemoryLookupTable类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

        // Gets Path to Text file
        String filePath = "c:/raw_sentences.txt";

        log.info("Load & Vectorize Sentences....");
        // Strip white space before and after for each line
        SentenceIterator iter = UimaSentenceIterator.createWithPath(filePath);
        // Split on white spaces in the line to get words
        TokenizerFactory t = new DefaultTokenizerFactory();
        t.setTokenPreProcessor(new CommonPreprocessor());

        InMemoryLookupCache cache = new InMemoryLookupCache();
        WeightLookupTable table = new InMemoryLookupTable.Builder()
                .vectorLength(100)
                .useAdaGrad(false)
                .cache(cache)
                .lr(0.025f).build();

        log.info("Building model....");
        Word2Vec vec = new Word2Vec.Builder()
                .minWordFrequency(5).iterations(1)
                .layerSize(100).lookupTable(table)
                .stopWords(new ArrayList<String>())
                .vocabCache(cache).seed(42)
                .windowSize(5).iterate(iter).tokenizerFactory(t).build();

        log.info("Fitting Word2Vec model....");
        vec.fit();

        log.info("Writing word vectors to text file....");
        // Write word
        WordVectorSerializer.writeWordVectors(vec, "word2vec.txt");

        log.info("Closest Words:");
        Collection<String> lst = vec.wordsNearest("man", 5); 
        System.out.println(lst);
        double cosSim = vec.similarity("cruise", "voyage");
        System.out.println(cosSim);
    }
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:41,代码来源:Word2VecRawTextExample.java

示例2: predict

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
public List<Pair<String, Double>> predict(@NotNull String name, @NotNull SourceSegment source, @NotNull List<var> inputs) {

     /*
      Now we'll iterate over unlabeled data, and check which label it could be assigned to
      Please note: for many domains it's normal to have 1 document fall into few labels at once,
      with different "weight" for each.
     */
        MeansBuilder meansBuilder = new MeansBuilder((InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable(),
                                                     tokenizerFactory);
        LabelSeeker seeker = new LabelSeeker(iterator.getLabelsSource().getLabels(),
                                             (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable());


        LabelledDocument document = new LabelledDocument();
        document.setContent(signatureToText(name, inputs));
        INDArray documentAsCentroid = meansBuilder.documentAsVector(document);
        List<Pair<String, Double>> scores = seeker.getScores(documentAsCentroid);
        return scores;

    }
 
开发者ID:sillelien,项目名称:dollar,代码行数:21,代码来源:ParagraphVectorsClassifierExample.java

示例3: testWriteWordVectors

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
    WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例4: testFromTableAndVocab

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {

    WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();

    WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例5: testUnifiedLoaderArchive1

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive1() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:WordVectorSerializerTest.java

示例6: testUnifiedLoaderArchive2

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive2() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:18,代码来源:WordVectorSerializerTest.java

示例7: testUnifiedLoaderText

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
/**
 * This method tests CSV file loading via unified loader
 *
 * @throws Exception
 */
@Test
public void testUnifiedLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    // we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:22,代码来源:WordVectorSerializerTest.java

示例8: fromPair

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
/**
 * Load word vectors from the given pair
 *
 * @param pair
 *            the given pair
 * @return a read only word vectors impl based on the given lookup table and vocab
 */
public static Word2Vec fromPair(Pair<InMemoryLookupTable, VocabCache> pair) {
    Word2Vec vectors = new Word2Vec();
    vectors.setLookupTable(pair.getFirst());
    vectors.setVocab(pair.getSecond());
    vectors.setModelUtils(new BasicModelUtils());
    return vectors;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:15,代码来源:WordVectorSerializer.java

示例9: configure

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;
    this.configuration = configuration;

    cbow.configure(vocabCache, lookupTable, configuration);

    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.sampling = configuration.getSampling();

    this.syn0 = ((InMemoryLookupTable<T>) lookupTable).getSyn0();
    this.syn1 = ((InMemoryLookupTable<T>) lookupTable).getSyn1();
    this.syn1Neg = ((InMemoryLookupTable<T>) lookupTable).getSyn1Neg();
    this.expTable = ((InMemoryLookupTable<T>) lookupTable).getExpTable();
    this.table = ((InMemoryLookupTable<T>) lookupTable).getTable();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:DM.java

示例10: Word2VecParam

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
public Word2VecParam(boolean useAdaGrad, double negative, int numWords, INDArray table, int window,
                AtomicLong nextRandom, double alpha, double minAlpha, int totalWords, int lastChecked,
                Broadcast<AtomicLong> wordCount, InMemoryLookupTable weights, int vectorLength,
                Broadcast<double[]> expTable) {
    this.useAdaGrad = useAdaGrad;
    this.negative = negative;
    this.numWords = numWords;
    this.table = table;
    this.window = window;
    this.nextRandom = nextRandom;
    this.alpha = alpha;
    this.minAlpha = minAlpha;
    this.totalWords = totalWords;
    this.lastChecked = lastChecked;
    this.wordCount = wordCount;
    this.weights = weights;
    this.vectorLength = vectorLength;
    this.expTable = expTable;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:20,代码来源:Word2VecParam.java

示例11: testGlove

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
    Glove glove = new Glove(true, 5, 100);
    JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
                    .map(new Function<String, String>() {
                        @Override
                        public String call(String s) throws Exception {
                            return s.toLowerCase();
                        }
                    });


    Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
    WordVectors vectors = WordVectorSerializer
                    .fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
    Collection<String> words = vectors.wordsNearest("day", 20);
    assertTrue(words.contains("week"));
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:GloveTest.java

示例12: useExistingWordVectors

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
/**
 * This method allows you to use pre-built WordVectors model (Word2Vec or GloVe) for Par2Hier.
 * Existing model will be transferred into new model before training starts.
 *
 * PLEASE NOTE: Non-normalized model is recommended to use here.
 *
 * @param vec existing WordVectors model
 * @return a builder
 */
@Override
@SuppressWarnings("unchecked")
public Builder useExistingWordVectors(@NonNull WordVectors vec) {
  if (((InMemoryLookupTable<VocabWord>) vec.lookupTable()).getSyn1() == null &&
      ((InMemoryLookupTable<VocabWord>) vec.lookupTable()).getSyn1Neg() == null) {
    throw new ND4JIllegalStateException("Model being passed as existing has no syn1/syn1Neg available");
  }

  this.existingVectors = vec;
  return this;
}
 
开发者ID:tteofili,项目名称:par2hier,代码行数:21,代码来源:Par2Hier.java

示例13: checkUnlabeledData

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
void checkUnlabeledData() throws FileNotFoundException {
  /*
  At this point we assume that we have model built and we can check
  which categories our unlabeled document falls into.
  So we'll start loading our unlabeled documents and checking them
 */
    ClassPathResource unClassifiedResource = new ClassPathResource("paravec/unlabeled");
    FileLabelAwareIterator unClassifiedIterator = new FileLabelAwareIterator.Builder()
                                                          .addSourceFolder(unClassifiedResource.getFile())
                                                          .build();

 /*
  Now we'll iterate over unlabeled data, and check which label it could be assigned to
  Please note: for many domains it's normal to have 1 document fall into few labels at once,
  with different "weight" for each.
 */
    MeansBuilder meansBuilder = new MeansBuilder(
                                                        (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable(),
                                                        tokenizerFactory);
    LabelSeeker seeker = new LabelSeeker(iterator.getLabelsSource().getLabels(),
                                         (InMemoryLookupTable<VocabWord>) paragraphVectors.getLookupTable());

    while (unClassifiedIterator.hasNextDocument()) {
        LabelledDocument document = unClassifiedIterator.nextDocument();
        INDArray documentAsCentroid = meansBuilder.documentAsVector(document);
        List<Pair<String, Double>> scores = seeker.getScores(documentAsCentroid);

     /*
      please note, document.getLabel() is used just to show which document we're looking at now,
      as a substitute for printing out the whole document name.
      So, labels on these two documents are used like titles,
      just to visualize our classification done properly
     */
        log.info("Document '" + document.getLabel() + "' falls into the following categories: ");
        for (Pair<String, Double> score : scores) {
            log.info("        " + score.getFirst() + ": " + score.getSecond());
        }
    }

}
 
开发者ID:sillelien,项目名称:dollar,代码行数:41,代码来源:ParagraphVectorsClassifierExample.java

示例14: writeWordVectors

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
/**
 * Writes the word vectors to the given path. Note that this assumes an in memory cache
 *
 * @param lookupTable
 * @param cache
 *
 * @param path
 *            the path to write
 * @throws IOException
 */
@Deprecated
public static void writeWordVectors(InMemoryLookupTable lookupTable, InMemoryLookupCache cache, String path)
                throws IOException {
    BufferedWriter write = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(new File(path), false), "UTF-8"));
    for (int i = 0; i < lookupTable.getSyn0().rows(); i++) {
        String word = cache.wordAtIndex(i);
        if (word == null) {
            continue;
        }
        StringBuilder sb = new StringBuilder();
        sb.append(word.replaceAll(" ", whitespaceReplacement));
        sb.append(" ");
        INDArray wordVector = lookupTable.vector(word);
        for (int j = 0; j < wordVector.length(); j++) {
            sb.append(wordVector.getDouble(j));
            if (j < wordVector.length() - 1) {
                sb.append(" ");
            }
        }
        sb.append("\n");
        write.write(sb.toString());

    }

    write.flush();
    write.close();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:39,代码来源:WordVectorSerializer.java

示例15: configure

import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
                @NonNull VectorsConfiguration configuration) {
    this.vocabCache = vocabCache;
    this.lookupTable = lookupTable;
    this.configuration = configuration;

    this.window = configuration.getWindow();
    this.useAdaGrad = configuration.isUseAdaGrad();
    this.negative = configuration.getNegative();
    this.sampling = configuration.getSampling();

    if (configuration.getNegative() > 0) {
        if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
            logger.info("Initializing syn1Neg...");
            ((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
            ((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
            ((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
        }
    }


    this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
    this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
    this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
    this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
    this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
    this.variableWindows = configuration.getVariableWindows();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:30,代码来源:CBOW.java


注:本文中的org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。