当前位置: 首页>>代码示例>>Java>>正文


Java WordVectorSerializer类代码示例

本文整理汇总了Java中org.deeplearning4j.models.embeddings.loader.WordVectorSerializer的典型用法代码示例。如果您正苦于以下问题:Java WordVectorSerializer类的具体用法?Java WordVectorSerializer怎么用?Java WordVectorSerializer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


WordVectorSerializer类属于org.deeplearning4j.models.embeddings.loader包,在下文中一共展示了WordVectorSerializer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testWriteWordVectorsFromWord2Vec

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
@Ignore
public void testWriteWordVectorsFromWord2Vec() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    WordVectorSerializer.writeWordVectors((Word2Vec) vec, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    INDArray wordVector1 = wordVectors.getWordVectorMatrix("Morgan_Freeman");
    INDArray wordVector2 = wordVectors.getWordVectorMatrix("JA_Montalbano");
    assertEquals(vec.getWordVectorMatrix("Morgan_Freeman"), wordVector1);
    assertEquals(vec.getWordVectorMatrix("JA_Montalbano"), wordVector2);
    assertTrue(wordVector1.length() == 300);
    assertTrue(wordVector2.length() == 300);
    assertEquals(wordVector1.getDouble(0), 0.044423, 1e-3);
    assertEquals(wordVector2.getDouble(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例2: testFindNamesFromText

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Ignore
@Test
public void testFindNamesFromText() throws IOException {
    SentenceIterator iter = new BasicLineIterator("src/test/resources/chineseName.txt");

    log.info("load is right!");
    TokenizerFactory tokenizerFactory = new ChineseTokenizerFactory();
    //tokenizerFactory.setTokenPreProcessor(new ChineseTokenizer());

    //Generates a word-vector from the dataset stored in resources folder
    Word2Vec vec = new Word2Vec.Builder().minWordFrequency(2).iterations(5).layerSize(100).seed(42)
                    .learningRate(0.1).windowSize(20).iterate(iter).tokenizerFactory(tokenizerFactory).build();
    vec.fit();
    WordVectorSerializer.writeWordVectors(vec, new File("src/test/resources/chineseNameWordVector.txt"));

    //trains a model that can find out all names from news(Suffix txt),It uses word vector generated
    // WordVectors wordVectors;

    //test model,Whether the model find out name from unknow text;

}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:22,代码来源:ChineseTokenizerTest.java

示例3: main

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

        // Gets Path to Text file
        String filePath = "c:/raw_sentences.txt";

        log.info("Load & Vectorize Sentences....");
        // Strip white space before and after for each line
        SentenceIterator iter = UimaSentenceIterator.createWithPath(filePath);
        // Split on white spaces in the line to get words
        TokenizerFactory t = new DefaultTokenizerFactory();
        t.setTokenPreProcessor(new CommonPreprocessor());

        InMemoryLookupCache cache = new InMemoryLookupCache();
        WeightLookupTable table = new InMemoryLookupTable.Builder()
                .vectorLength(100)
                .useAdaGrad(false)
                .cache(cache)
                .lr(0.025f).build();

        log.info("Building model....");
        Word2Vec vec = new Word2Vec.Builder()
                .minWordFrequency(5).iterations(1)
                .layerSize(100).lookupTable(table)
                .stopWords(new ArrayList<String>())
                .vocabCache(cache).seed(42)
                .windowSize(5).iterate(iter).tokenizerFactory(t).build();

        log.info("Fitting Word2Vec model....");
        vec.fit();

        log.info("Writing word vectors to text file....");
        // Write word
        WordVectorSerializer.writeWordVectors(vec, "word2vec.txt");

        log.info("Closest Words:");
        Collection<String> lst = vec.wordsNearest("man", 5); 
        System.out.println(lst);
        double cosSim = vec.similarity("cruise", "voyage");
        System.out.println(cosSim);
    }
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:41,代码来源:Word2VecRawTextExample.java

示例4: loadWordEmbeddings

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
public static void loadWordEmbeddings(String wePath, String weType) {
    try {
        switch (weType) {
            case "Google":
                WordEmbeddingRelatedness.wordVectors = WordVectorSerializer.loadGoogleModel(new File(wePath), true);
                break;
            case "Glove":
                WordEmbeddingRelatedness.wordVectors = WordVectorSerializer.loadTxtVectors(new File(wePath));
                break;
            default:
                System.out.println("Word Embeddings type is invalid! " + weType + " is not a valid type. Please use Google or Glove model.");
                System.exit(0);
        }
    } catch (IOException e) {
        System.out.println("Could not find Word Embeddings file in " + wePath);
    }
}
 
开发者ID:butnaruandrei,项目名称:ShotgunWSD,代码行数:18,代码来源:WordEmbeddingRelatedness.java

示例5: main

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
/**
 * args[0] input: word2vecファイル名
 * args[1] input: sentimentモデル名
 * args[2] input: test親フォルダ名
 *
 * @param args
 * @throws Exception
 */
public static void main (final String[] args) throws Exception {
  if (args[0]==null || args[1]==null || args[2]==null)
    System.exit(1);

  WordVectors wvec = WordVectorSerializer.loadTxtVectors(new File(args[0]));
  MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(args[1],false);

  DataSetIterator test = new AsyncDataSetIterator(
      new SentimentRecurrentIterator(args[2],wvec,100,300,false),1);
  Evaluation evaluation = new Evaluation();
  while(test.hasNext()) {
    DataSet t = test.next();
    INDArray features = t.getFeatures();
    INDArray lables = t.getLabels();
    INDArray inMask = t.getFeaturesMaskArray();
    INDArray outMask = t.getLabelsMaskArray();
    INDArray predicted = model.output(features,false,inMask,outMask);
    evaluation.evalTimeSeries(lables,predicted,outMask);
  }
  System.out.println(evaluation.stats());
}
 
开发者ID:keigohtr,项目名称:sentiment-rnn,代码行数:30,代码来源:SentimentRecurrentTestCmd.java

示例6: start

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
public void start() throws Exception {
    if (serializeFile().exists()) {
        try {
            log.info("Loading from " + serializeFile().getAbsolutePath());
            paragraphVectors = WordVectorSerializer.readParagraphVectors(serializeFile());
        } catch (Exception e) {
            log.debug(e.getMessage(), e);
            makeParagraphVectors();
        }
    } else {
        makeParagraphVectors();
    }
}
 
开发者ID:sillelien,项目名称:dollar,代码行数:14,代码来源:ParagraphVectorsClassifierExample.java

示例7: testWord2VecGoogleModelUptraining

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Ignore
@Test
public void testWord2VecGoogleModelUptraining() throws Exception {
    long time1 = System.currentTimeMillis();
    Word2Vec vec = WordVectorSerializer.readWord2VecModel(
                    new File("C:\\Users\\raver\\Downloads\\GoogleNews-vectors-negative300.bin.gz"), false);
    long time2 = System.currentTimeMillis();
    log.info("Model loaded in {} msec", time2 - time1);
    SentenceIterator iter = new BasicLineIterator(inputFile.getAbsolutePath());
    // Split on white spaces in the line to get words
    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    vec.setTokenizerFactory(t);
    vec.setSentenceIterator(iter);
    vec.getConfiguration().setUseHierarchicSoftmax(false);
    vec.getConfiguration().setNegative(5.0);
    vec.setElementsLearningAlgorithm(new CBOW<VocabWord>());

    vec.fit();
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:22,代码来源:Word2VecTests.java

示例8: testWriteWordVectors

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
    WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例9: testFromTableAndVocab

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {

    WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();

    WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例10: testStaticLoaderArchive

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
/**
 * This method tests ZIP file loading as static model
 *
 * @throws Exception
 */
@Test
public void testStaticLoaderArchive() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:21,代码来源:WordVectorSerializerTest.java

示例11: testUnifiedLoaderArchive1

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive1() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:WordVectorSerializerTest.java

示例12: testUnifiedLoaderArchive2

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive2() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:18,代码来源:WordVectorSerializerTest.java

示例13: testUnifiedLoaderText

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
/**
 * This method tests CSV file loading via unified loader
 *
 * @throws Exception
 */
@Test
public void testUnifiedLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);

    // we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:22,代码来源:WordVectorSerializerTest.java

示例14: testGoogleModelForInference

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Ignore
@Test
public void testGoogleModelForInference() throws Exception {
    WordVectors googleVectors = WordVectorSerializer.loadGoogleModelNonNormalized(
                    new File("/ext/GoogleNews-vectors-negative300.bin.gz"), true, false);

    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());

    ParagraphVectors pv =
                    new ParagraphVectors.Builder().tokenizerFactory(t).iterations(10).useHierarchicSoftmax(false)
                                    .trainWordVectors(false).iterations(10).useExistingWordVectors(googleVectors)
                                    .negativeSample(10).sequenceLearningAlgorithm(new DM<VocabWord>()).build();

    INDArray vec1 = pv.inferVector("This text is pretty awesome");
    INDArray vec2 = pv.inferVector("Fantastic process of crazy things happening inside just for history purposes");

    log.info("vec1/vec2: {}", Transforms.cosineSim(vec1, vec2));
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:20,代码来源:ParagraphVectorsTest.java

示例15: testGlove

import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
    Glove glove = new Glove(true, 5, 100);
    JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
                    .map(new Function<String, String>() {
                        @Override
                        public String call(String s) throws Exception {
                            return s.toLowerCase();
                        }
                    });


    Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
    WordVectors vectors = WordVectorSerializer
                    .fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
    Collection<String> words = vectors.wordsNearest("day", 20);
    assertTrue(words.contains("week"));
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:19,代码来源:GloveTest.java


注:本文中的org.deeplearning4j.models.embeddings.loader.WordVectorSerializer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。