本文整理汇总了Java中org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVectorMatrix方法的典型用法代码示例。如果您正苦于以下问题:Java WordVectors.getWordVectorMatrix方法的具体用法?Java WordVectors.getWordVectorMatrix怎么用?Java WordVectors.getWordVectorMatrix使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.deeplearning4j.models.embeddings.wordvectors.WordVectors
的用法示例。
在下文中一共展示了WordVectors.getWordVectorMatrix方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testWriteWordVectorsFromWord2Vec
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testWriteWordVectorsFromWord2Vec() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
WordVectorSerializer.writeWordVectors((Word2Vec) vec, pathToWriteto);
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
INDArray wordVector1 = wordVectors.getWordVectorMatrix("Morgan_Freeman");
INDArray wordVector2 = wordVectors.getWordVectorMatrix("JA_Montalbano");
assertEquals(vec.getWordVectorMatrix("Morgan_Freeman"), wordVector1);
assertEquals(vec.getWordVectorMatrix("JA_Montalbano"), wordVector2);
assertTrue(wordVector1.length() == 300);
assertTrue(wordVector2.length() == 300);
assertEquals(wordVector1.getDouble(0), 0.044423, 1e-3);
assertEquals(wordVector2.getDouble(0), 0.051964, 1e-3);
}
示例2: testStaticLoaderArchive
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* This method tests ZIP file loading as static model
*
* @throws Exception
*/
@Test
public void testStaticLoaderArchive() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
}
示例3: testUnifiedLoaderArchive1
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testUnifiedLoaderArchive1() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
示例4: testUnifiedLoaderArchive2
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testUnifiedLoaderArchive2() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
示例5: testUnifiedLoaderText
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* This method tests CSV file loading via unified loader
*
* @throws Exception
*/
@Test
public void testUnifiedLoaderText() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
// we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
示例6: windows
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* Constructs a list of window of size windowSize.
* Note that padding for each window is created as well.
* @param words the words to tokenize and construct windows from
* @param tokenizerFactory tokenizer factory to use
* @param windowSize the window size to generate
* @return the list of windows for the tokenized string
*/
public static List<Window> windows(String words, @NonNull TokenizerFactory tokenizerFactory, int windowSize,
WordVectors vectors) {
Tokenizer tokenizer = tokenizerFactory.create(words);
List<String> list = new ArrayList<>();
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
// if we don't have UNK word defined - we have to skip this word
if (vectors.getWordVectorMatrix(token) != null)
list.add(token);
}
if (list.isEmpty())
throw new IllegalStateException("No tokens found for windows");
return windows(list, windowSize);
}
示例7: testStaticLoaderBinary
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* This method tests binary file loading as static model
*
* @throws Exception
*/
@Test
public void testStaticLoaderBinary() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
WordVectors vectorsLive = WordVectorSerializer.loadGoogleModel(binaryFile, true);
WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(binaryFile);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
}
示例8: testStaticLoaderText
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* This method tests CSV file loading as static model
*
* @throws Exception
*/
@Test
public void testStaticLoaderText() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(textFile);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
}
示例9: testUnifiedLoaderBinary
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* This method tests binary file loading via unified loader
*
* @throws Exception
*/
@Test
public void testUnifiedLoaderBinary() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
WordVectors vectorsLive = WordVectorSerializer.loadGoogleModel(binaryFile, true);
WordVectors vectorsStatic = WordVectorSerializer.readWord2VecModel(binaryFile, false);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
}
示例10: testIndexPersistence
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testIndexPersistence() throws Exception {
File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
SentenceIterator iter = UimaSentenceIterator.createWithPath(inputFile.getAbsolutePath());
// Split on white spaces in the line to get words
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100)
.stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5).seed(42).windowSize(5)
.iterate(iter).tokenizerFactory(t).build();
vec.fit();
VocabCache orig = vec.getVocab();
File tempFile = File.createTempFile("temp", "w2v");
tempFile.deleteOnExit();
WordVectorSerializer.writeWordVectors(vec, tempFile);
WordVectors vec2 = WordVectorSerializer.loadTxtVectors(tempFile);
VocabCache rest = vec2.vocab();
assertEquals(orig.totalNumberOfDocs(), rest.totalNumberOfDocs());
for (VocabWord word : vec.getVocab().vocabWords()) {
INDArray array1 = vec.getWordVectorMatrix(word.getLabel());
INDArray array2 = vec2.getWordVectorMatrix(word.getLabel());
assertEquals(array1, array2);
}
}
示例11: testOutputStream
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testOutputStream() throws Exception {
File file = File.createTempFile("tmp_ser", "ssa");
file.deleteOnExit();
File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
SentenceIterator iter = new BasicLineIterator(inputFile);
// Split on white spaces in the line to get words
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
InMemoryLookupCache cache = new InMemoryLookupCache(false);
WeightLookupTable table = new InMemoryLookupTable.Builder().vectorLength(100).useAdaGrad(false).negative(5.0)
.cache(cache).lr(0.025f).build();
Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100)
.lookupTable(table).stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5)
.vocabCache(cache).seed(42)
// .workers(6)
.windowSize(5).iterate(iter).tokenizerFactory(t).build();
assertEquals(new ArrayList<String>(), vec.getStopWords());
vec.fit();
INDArray day1 = vec.getWordVectorMatrix("day");
WordVectorSerializer.writeWordVectors(vec, new FileOutputStream(file));
WordVectors vec2 = WordVectorSerializer.loadTxtVectors(file);
INDArray day2 = vec2.getWordVectorMatrix("day");
assertEquals(day1, day2);
File tempFile = File.createTempFile("tetsts", "Fdfs");
tempFile.deleteOnExit();
WordVectorSerializer.writeWord2VecModel(vec, tempFile);
Word2Vec vec3 = WordVectorSerializer.readWord2VecModel(tempFile);
}