本文整理汇总了Java中org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVector方法的典型用法代码示例。如果您正苦于以下问题:Java WordVectors.getWordVector方法的具体用法?Java WordVectors.getWordVector怎么用?Java WordVectors.getWordVector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.deeplearning4j.models.embeddings.wordvectors.WordVectors
的用法示例。
在下文中一共展示了WordVectors.getWordVector方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: SentimentExampleIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* @param dataDirectory the directory of the IMDB review data set
* @param wordVectors WordVectors object
* @param batchSize Size of each minibatch for training
* @param truncateLength If reviews exceed
* @param train If true: return the training data. If false: return the testing data.
*/
public SentimentExampleIterator(String dataDirectory, WordVectors wordVectors, int batchSize, int truncateLength, boolean train) throws IOException {
this.batchSize = batchSize;
this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
File p = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/pos/") + "/");
File n = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/neg/") + "/");
positiveFiles = p.listFiles();
negativeFiles = n.listFiles();
this.wordVectors = wordVectors;
this.truncateLength = truncateLength;
tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
}
示例2: RnnTextEmbeddingDataSetIterator
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
* @param data Instances with documents and labels
* @param wordVectors WordVectors object
* @param tokenFact Tokenizer factory
* @param tpp Token pre processor
* @param stopWords Stop word object
* @param batchSize Size of each minibatch for training
* @param truncateLength If reviews exceed
*/
public RnnTextEmbeddingDataSetIterator(
Instances data,
WordVectors wordVectors,
TokenizerFactory tokenFact,
TokenPreProcess tpp,
AbstractStopwords stopWords,
LabeledSentenceProvider sentenceProvider,
int batchSize,
int truncateLength) {
this.batchSize = batchSize;
this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
this.data = data;
this.wordVectors = wordVectors;
this.truncateLength = truncateLength;
this.tokenizerFactory = tokenFact;
this.tokenizerFactory.setTokenPreProcessor(tpp);
this.stopWords = stopWords;
this.sentenceProvider = sentenceProvider;
}
示例3: testWriteWordVectors
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);
WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
assertTrue(wordVector1.length == 300);
assertTrue(wordVector2.length == 300);
assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
示例4: testFromTableAndVocab
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
assertTrue(wordVector1.length == 300);
assertTrue(wordVector2.length == 300);
assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
示例5: getSenseEmbedding
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
*
* @param wordVector The Word Embeddings dictionary
* @param synset The synset ...
* @param word The word ...
* @return The sense embedding of a synset
*/
public static double[] getSenseEmbedding(WordVectors wordVector, Synset synset, String word, SenseComputation senseComputation) {
if(wordEmbeddings.containsKey(synset)){
return ArrayUtils.toPrimitive(wordEmbeddings.get(synset));
}
String[] words = getSenseBag(synset, word);
double[] senseEmbedding, tmpEmbedding;
Double[] tmpEmbedding2, tmpSenseEmbedding;
ArrayList<Double[]> senseEmbeddings = new ArrayList<>();
// For each word in the sense bag, get the coresponding word embeddings and store them in an array
for (String w : words) {
if (w != null) {
if (wordVector.hasWord(w)) {
tmpEmbedding = wordVector.getWordVector(w);
tmpEmbedding2 = new Double[tmpEmbedding.length];
for (int i = 0; i < tmpEmbedding.length; i++) {
tmpEmbedding2[i] = tmpEmbedding[i];
}
senseEmbeddings.add(tmpEmbedding2);
}
}
}
senseEmbedding = senseComputation.compute(senseEmbeddings);
tmpSenseEmbedding = new Double[senseEmbedding.length];
for (int i = 0; i < tmpSenseEmbedding.length; i++) {
tmpSenseEmbedding[i] = senseEmbedding[i];
}
wordEmbeddings.put(synset, tmpSenseEmbedding);
return senseEmbedding;
}
示例6: testTextCnnTextFilesRegression
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnTextFilesRegression() throws Exception {
CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
cnnTextIter.setTrainBatchSize(64);
cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
clf.setInstanceIterator(cnnTextIter);
cnnTextIter.initialize();
final WordVectors wordVectors = cnnTextIter.getWordVectors();
int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
ConvolutionLayer conv1 = new ConvolutionLayer();
conv1.setKernelSize(new int[] {3, vectorSize});
conv1.setNOut(10);
conv1.setStride(new int[] {1, vectorSize});
conv1.setConvolutionMode(ConvolutionMode.Same);
ConvolutionLayer conv2 = new ConvolutionLayer();
conv2.setKernelSize(new int[] {2, vectorSize});
conv2.setNOut(10);
conv2.setStride(new int[] {1, vectorSize});
conv2.setConvolutionMode(ConvolutionMode.Same);
GlobalPoolingLayer gpl = new GlobalPoolingLayer();
OutputLayer out = new OutputLayer();
out.setLossFn(new LossMSE());
out.setActivationFn(new ActivationIdentity());
clf.setLayers(conv1, conv2, gpl, out);
clf.setCacheMode(CacheMode.MEMORY);
final Instances data = DatasetLoader.loadAngerMeta();
TestUtil.holdout(clf, data);
}
示例7: testTextCnnTextFilesClassification
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnTextFilesClassification() throws Exception {
CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
cnnTextIter.setTrainBatchSize(64);
cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
clf.setInstanceIterator(cnnTextIter);
cnnTextIter.initialize();
final WordVectors wordVectors = cnnTextIter.getWordVectors();
int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
ConvolutionLayer conv1 = new ConvolutionLayer();
conv1.setKernelSize(new int[] {4, vectorSize});
conv1.setNOut(10);
conv1.setStride(new int[] {1, vectorSize});
conv1.setConvolutionMode(ConvolutionMode.Same);
conv1.setDropOut(0.2);
conv1.setActivationFn(new ActivationReLU());
ConvolutionLayer conv2 = new ConvolutionLayer();
conv2.setKernelSize(new int[] {3, vectorSize});
conv2.setNOut(10);
conv2.setStride(new int[] {1, vectorSize});
conv2.setConvolutionMode(ConvolutionMode.Same);
conv2.setDropOut(0.2);
conv2.setActivationFn(new ActivationReLU());
GlobalPoolingLayer gpl = new GlobalPoolingLayer();
gpl.setDropOut(0.33);
OutputLayer out = new OutputLayer();
clf.setLayers(conv1, conv2, gpl, out);
clf.setCacheMode(CacheMode.MEMORY);
final Instances data = DatasetLoader.loadAngerMetaClassification();
TestUtil.holdout(clf, data);
}
示例8: getNMostSimilarByVector
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
public ArrayList<String> getNMostSimilarByVector(int n, String esw, WordVectors ven, double[] v) {
//Target language vectors lookup
ArrayList<String> candidates = new ArrayList<String>();
ArrayList<Term> arr = new ArrayList<Term>();
int numbEnglishWords = ven.vocab().numWords();
Double similarity = 0.0;
for (int i=0; i < numbEnglishWords; i++) {
String w = ven.vocab().wordAtIndex(i);
double[] wordVector_en = ven.getWordVector(w);
Double simAux = cosineSimilarity(v, wordVector_en);
if (simAux > similarity) {
similarity = simAux;
}
Term t = new Term(w,simAux);
arr.add(t);
}
Collections.sort(arr, new Comparator<Term>() {
@Override
public int compare(Term t1, Term t2) {
// Sort from max to min
return new Double(t2.score).compareTo(new Double(t1.score));
}
});
System.out.println("-----Closest Words to spanish word " + esw + " in English: ");
for (int i=0; i <n && i < arr.size(); i++) {
String term = arr.get(i).term;
candidates.add(term);
System.out.println(term);
}
System.out.println("--Score: " + similarity);
return candidates;
}
示例9: testGetMatrix
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testGetMatrix() throws IOException {
// Reload target and source vectors
WordVectors ves = WordVectorSerializer.loadTxtVectors(new File(sourceVector));
//Load source and target training set from dictionary
System.out.println("Source vector loaded");
WordVectors ven = WordVectorSerializer.loadTxtVectors(new File(targetVector));
System.out.println("Target vector loaded");
VectorTranslation mapper = new VectorTranslation(dictionaryFile, dictionaryLength, columns);
DoubleMatrix translationMatrix = mapper.calculateTranslationMatrix(ves, ven);
//Example Spanish -> English
String[] terms1 = {
"ser",
"haber",
"espacio",
"mostrar",
"asesino",
"intimidad",
// Hey, I know the numbers, too!
"dos", "tres", "cuatro", "sesenta",
"honradez",
"banquero",
"medios",
"deporte",
"decidido"
};
for (String term : terms1) {
DoubleMatrix vsource = new DoubleMatrix(ves.getWordVector(term));
double [] vtargetestimated = translationMatrix.mmul(vsource).transpose().toArray();
mapper.getNMostSimilarByVector(n, term, ven, vtargetestimated);
}
}
示例10: testLoaderBinary
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testLoaderBinary() throws IOException {
WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
assertEquals(vec.vocab().numWords(), 30);
assertTrue(vec.vocab().hasToken("Morgan_Freeman"));
assertTrue(vec.vocab().hasToken("JA_Montalbano"));
double[] wordVector1 = vec.getWordVector("Morgan_Freeman");
double[] wordVector2 = vec.getWordVector("JA_Montalbano");
assertTrue(wordVector1.length == 300);
assertTrue(wordVector2.length == 300);
assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
示例11: testTextCnnClassification
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnClassification() throws Exception {
CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
cnnTextIter.setTrainBatchSize(128);
cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
clf.setInstanceIterator(cnnTextIter);
cnnTextIter.initialize();
final WordVectors wordVectors = cnnTextIter.getWordVectors();
int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
ConvolutionLayer conv1 = new ConvolutionLayer();
conv1.setKernelSize(new int[] {4, vectorSize});
conv1.setNOut(10);
conv1.setStride(new int[] {1, vectorSize});
conv1.setConvolutionMode(ConvolutionMode.Same);
conv1.setDropOut(0.2);
conv1.setActivationFn(new ActivationReLU());
BatchNormalization bn1 = new BatchNormalization();
ConvolutionLayer conv2 = new ConvolutionLayer();
conv2.setKernelSize(new int[] {3, vectorSize});
conv2.setNOut(10);
conv2.setStride(new int[] {1, vectorSize});
conv2.setConvolutionMode(ConvolutionMode.Same);
conv2.setDropOut(0.2);
conv2.setActivationFn(new ActivationReLU());
BatchNormalization bn2 = new BatchNormalization();
ConvolutionLayer conv3 = new ConvolutionLayer();
conv3.setKernelSize(new int[] {2, vectorSize});
conv3.setNOut(10);
conv3.setStride(new int[] {1, vectorSize});
conv3.setConvolutionMode(ConvolutionMode.Same);
conv3.setDropOut(0.2);
conv3.setActivationFn(new ActivationReLU());
BatchNormalization bn3 = new BatchNormalization();
GlobalPoolingLayer gpl = new GlobalPoolingLayer();
gpl.setDropOut(0.33);
OutputLayer out = new OutputLayer();
// clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out);
clf.setLayers(conv1, conv2, conv3, gpl, out);
// clf.setNumEpochs(50);
clf.setCacheMode(CacheMode.MEMORY);
final EpochListener l = new EpochListener();
l.setN(1);
clf.setIterationListener(l);
clf.setEarlyStopping(new EarlyStopping(10, 15));
clf.setDebug(true);
// NNC
NeuralNetConfiguration nnc = new NeuralNetConfiguration();
nnc.setLearningRate(0.01);
nnc.setUseRegularization(true);
nnc.setL2(1e-3);
clf.setNeuralNetConfiguration(nnc);
// Data
final Instances data = DatasetLoader.loadImdb();
data.randomize(new Random(42));
RemovePercentage rp = new RemovePercentage();
rp.setInputFormat(data);
rp.setPercentage(98);
final Instances dataFiltered = Filter.useFilter(data, rp);
TestUtil.holdout(clf, dataFiltered);
}
示例12: testTextCnnRegression
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnRegression() throws Exception {
CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
cnnTextIter.setTrainBatchSize(64);
cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
clf.setInstanceIterator(cnnTextIter);
cnnTextIter.initialize();
final WordVectors wordVectors = cnnTextIter.getWordVectors();
int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
ConvolutionLayer conv1 = new ConvolutionLayer();
conv1.setKernelSize(new int[] {3, vectorSize});
conv1.setNOut(10);
conv1.setStride(new int[] {1, vectorSize});
conv1.setConvolutionMode(ConvolutionMode.Same);
ConvolutionLayer conv2 = new ConvolutionLayer();
conv2.setKernelSize(new int[] {2, vectorSize});
conv2.setNOut(10);
conv2.setStride(new int[] {1, vectorSize});
conv2.setConvolutionMode(ConvolutionMode.Same);
GlobalPoolingLayer gpl = new GlobalPoolingLayer();
OutputLayer out = new OutputLayer();
out.setLossFn(new LossMSE());
out.setActivationFn(new ActivationIdentity());
clf.setLayers(conv1, conv2, gpl, out);
// clf.setNumEpochs(200);
clf.setCacheMode(CacheMode.MEMORY);
final EpochListener l = new EpochListener();
l.setN(20);
clf.setIterationListener(l);
clf.setDebug(true);
clf.setEarlyStopping(new EarlyStopping(0, 10));
final Instances data = DatasetLoader.loadAnger();
NeuralNetConfiguration nnc = new NeuralNetConfiguration();
nnc.setLearningRate(0.01);
nnc.setUseRegularization(true);
nnc.setL2(0.00001);
clf.setNeuralNetConfiguration(nnc);
TestUtil.holdout(clf, data);
}