本文整理汇总了Java中org.deeplearning4j.models.embeddings.wordvectors.WordVectors类的典型用法代码示例。如果您正苦于以下问题:Java WordVectors类的具体用法?Java WordVectors怎么用?Java WordVectors使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WordVectors类属于org.deeplearning4j.models.embeddings.wordvectors包,在下文中一共展示了WordVectors类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testWriteWordVectorsFromWord2Vec
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
@Ignore
public void testWriteWordVectorsFromWord2Vec() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
WordVectorSerializer.writeWordVectors((Word2Vec) vec, pathToWriteto);
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
INDArray wordVector1 = wordVectors.getWordVectorMatrix("Morgan_Freeman");
INDArray wordVector2 = wordVectors.getWordVectorMatrix("JA_Montalbano");
assertEquals(vec.getWordVectorMatrix("Morgan_Freeman"), wordVector1);
assertEquals(vec.getWordVectorMatrix("JA_Montalbano"), wordVector2);
assertTrue(wordVector1.length() == 300);
assertTrue(wordVector2.length() == 300);
assertEquals(wordVector1.getDouble(0), 0.044423, 1e-3);
assertEquals(wordVector2.getDouble(0), 0.051964, 1e-3);
}
示例2: getDataSetIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
public static DataSetIterator getDataSetIterator(String DATA_PATH, boolean isTraining, WordVectors wordVectors, int minibatchSize,
int maxSentenceLength, Random rng ){
String path = FilenameUtils.concat(DATA_PATH, (isTraining ? "aclImdb/train/" : "aclImdb/test/"));
String positiveBaseDir = FilenameUtils.concat(path, "pos");
String negativeBaseDir = FilenameUtils.concat(path, "neg");
File filePositive = new File(positiveBaseDir);
File fileNegative = new File(negativeBaseDir);
Map<String,List<File>> reviewFilesMap = new HashMap<>();
reviewFilesMap.put("Positive", Arrays.asList(filePositive.listFiles()));
reviewFilesMap.put("Negative", Arrays.asList(fileNegative.listFiles()));
LabeledSentenceProvider sentenceProvider = new FileLabeledSentenceProvider(reviewFilesMap, rng);
return new CnnSentenceDataSetIterator.Builder()
.sentenceProvider(sentenceProvider)
.wordVectors(wordVectors)
.minibatchSize(minibatchSize)
.maxSentenceLength(maxSentenceLength)
.useNormalizedWordVectors(false)
.build();
}
示例3: SentimentExampleIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* @param dataDirectory the directory of the IMDB review data set
* @param wordVectors WordVectors object
* @param batchSize Size of each minibatch for training
* @param truncateLength If reviews exceed
* @param train If true: return the training data. If false: return the testing data.
*/
public SentimentExampleIterator(String dataDirectory, WordVectors wordVectors, int batchSize, int truncateLength, boolean train) throws IOException {
this.batchSize = batchSize;
this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
File p = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/pos/") + "/");
File n = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/neg/") + "/");
positiveFiles = p.listFiles();
negativeFiles = n.listFiles();
this.wordVectors = wordVectors;
this.truncateLength = truncateLength;
tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
}
示例4: RnnTextEmbeddingDataSetIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* @param data Instances with documents and labels
* @param wordVectors WordVectors object
* @param tokenFact Tokenizer factory
* @param tpp Token pre processor
* @param stopWords Stop word object
* @param batchSize Size of each minibatch for training
* @param truncateLength If reviews exceed
*/
public RnnTextEmbeddingDataSetIterator(
Instances data,
WordVectors wordVectors,
TokenizerFactory tokenFact,
TokenPreProcess tpp,
AbstractStopwords stopWords,
LabeledSentenceProvider sentenceProvider,
int batchSize,
int truncateLength) {
this.batchSize = batchSize;
this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
this.data = data;
this.wordVectors = wordVectors;
this.truncateLength = truncateLength;
this.tokenizerFactory = tokenFact;
this.tokenizerFactory.setTokenPreProcessor(tpp);
this.stopWords = stopWords;
this.sentenceProvider = sentenceProvider;
}
示例5: SentimentRecurrentIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* @param dataDirectory the directory of the IMDB review data set
* @param wordVectors WordVectors object
* @param batchSize Size of each minibatch for training
* @param truncateLength If reviews exceed
* @param train If true: return the training data. If false: return the testing data.
*/
public SentimentRecurrentIterator(String dataDirectory, WordVectors wordVectors, int batchSize, int truncateLength, boolean train) throws IOException {
this.batchSize = batchSize;
this.vectorSize = wordVectors.lookupTable().layerSize();
File p = new File(FilenameUtils.concat(dataDirectory, (train ? "train" : "test") + "/positive/") + "/");
File n = new File(FilenameUtils.concat(dataDirectory, (train ? "train" : "test") + "/negative/") + "/");
positiveFiles = p.listFiles();
negativeFiles = n.listFiles();
numPositives = positiveFiles.length;
numNegatives = negativeFiles.length;
numTotals = numPositives+numNegatives;
rnd = new Random(1);
this.wordVectors = wordVectors;
this.truncateLength = truncateLength;
tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new LowCasePreProcessor());
}
示例6: main
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* args[0] input: word2vecファイル名
* args[1] input: sentimentモデル名
* args[2] input: test親フォルダ名
*
* @param args
* @throws Exception
*/
public static void main (final String[] args) throws Exception {
if (args[0]==null || args[1]==null || args[2]==null)
System.exit(1);
WordVectors wvec = WordVectorSerializer.loadTxtVectors(new File(args[0]));
MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(args[1],false);
DataSetIterator test = new AsyncDataSetIterator(
new SentimentRecurrentIterator(args[2],wvec,100,300,false),1);
Evaluation evaluation = new Evaluation();
while(test.hasNext()) {
DataSet t = test.next();
INDArray features = t.getFeatures();
INDArray lables = t.getLabels();
INDArray inMask = t.getFeaturesMaskArray();
INDArray outMask = t.getLabelsMaskArray();
INDArray predicted = model.output(features,false,inMask,outMask);
evaluation.evalTimeSeries(lables,predicted,outMask);
}
System.out.println(evaluation.stats());
}
示例7: testWriteWordVectors
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
assertTrue(wordVector1.length == 300);
assertTrue(wordVector2.length == 300);
assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
示例8: testFromTableAndVocab
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
assertTrue(wordVector1.length == 300);
assertTrue(wordVector2.length == 300);
assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
示例9: testStaticLoaderArchive
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* This method tests ZIP file loading as static model
*
* @throws Exception
*/
@Test
public void testStaticLoaderArchive() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
}
示例10: testUnifiedLoaderArchive1
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive1() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
示例11: testUnifiedLoaderArchive2
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
public void testUnifiedLoaderArchive2() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
示例12: testUnifiedLoaderText
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* This method tests CSV file loading via unified loader
*
* @throws Exception
*/
@Test
public void testUnifiedLoaderText() throws Exception {
logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);
INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");
assertNotEquals(null, arrayLive);
assertEquals(arrayLive, arrayStatic);
// we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
示例13: windows
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
/**
* Constructs a list of window of size windowSize.
* Note that padding for each window is created as well.
* @param words the words to tokenize and construct windows from
* @param tokenizerFactory tokenizer factory to use
* @param windowSize the window size to generate
* @return the list of windows for the tokenized string
*/
public static List<Window> windows(String words, @NonNull TokenizerFactory tokenizerFactory, int windowSize,
WordVectors vectors) {
Tokenizer tokenizer = tokenizerFactory.create(words);
List<String> list = new ArrayList<>();
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
// if we don't have UNK word defined - we have to skip this word
if (vectors.getWordVectorMatrix(token) != null)
list.add(token);
}
if (list.isEmpty())
throw new IllegalStateException("No tokens found for windows");
return windows(list, windowSize);
}
示例14: testGoogleModelForInference
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Ignore
@Test
public void testGoogleModelForInference() throws Exception {
WordVectors googleVectors = WordVectorSerializer.loadGoogleModelNonNormalized(
new File("/ext/GoogleNews-vectors-negative300.bin.gz"), true, false);
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
ParagraphVectors pv =
new ParagraphVectors.Builder().tokenizerFactory(t).iterations(10).useHierarchicSoftmax(false)
.trainWordVectors(false).iterations(10).useExistingWordVectors(googleVectors)
.negativeSample(10).sequenceLearningAlgorithm(new DM<VocabWord>()).build();
INDArray vec1 = pv.inferVector("This text is pretty awesome");
INDArray vec2 = pv.inferVector("Fantastic process of crazy things happening inside just for history purposes");
log.info("vec1/vec2: {}", Transforms.cosineSim(vec1, vec2));
}
示例15: testGlove
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
Glove glove = new Glove(true, 5, 100);
JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
.map(new Function<String, String>() {
@Override
public String call(String s) throws Exception {
return s.toLowerCase();
}
});
Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
WordVectors vectors = WordVectorSerializer
.fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
Collection<String> words = vectors.wordsNearest("day", 20);
assertTrue(words.contains("week"));
}