当前位置: 首页>>代码示例>>Java>>正文


Java WordVectors.getWordVector方法代码示例

本文整理汇总了Java中org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVector方法的典型用法代码示例。如果您正苦于以下问题:Java WordVectors.getWordVector方法的具体用法?Java WordVectors.getWordVector怎么用?Java WordVectors.getWordVector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.deeplearning4j.models.embeddings.wordvectors.WordVectors的用法示例。


在下文中一共展示了WordVectors.getWordVector方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: SentimentExampleIterator

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * @param dataDirectory the directory of the IMDB review data set
 * @param wordVectors WordVectors object
 * @param batchSize Size of each minibatch for training
 * @param truncateLength If reviews exceed
 * @param train If true: return the training data. If false: return the testing data.
 */
public SentimentExampleIterator(String dataDirectory, WordVectors wordVectors, int batchSize, int truncateLength, boolean train) throws IOException {
	this.batchSize = batchSize;
	this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;


	File p = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/pos/") + "/");
	File n = new File(FilenameUtils.concat(dataDirectory, "aclImdb/" + (train ? "train" : "test") + "/neg/") + "/");
	positiveFiles = p.listFiles();
	negativeFiles = n.listFiles();

	this.wordVectors = wordVectors;
	this.truncateLength = truncateLength;

	tokenizerFactory = new DefaultTokenizerFactory();
	tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
}
 
开发者ID:IsaacChanghau,项目名称:NeuralNetworksLite,代码行数:24,代码来源:SentimentExampleIterator.java

示例2: RnnTextEmbeddingDataSetIterator

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 * @param data Instances with documents and labels
 * @param wordVectors WordVectors object
 * @param tokenFact Tokenizer factory
 * @param tpp Token pre processor
 * @param stopWords Stop word object
 * @param batchSize Size of each minibatch for training
 * @param truncateLength If reviews exceed
 */
public RnnTextEmbeddingDataSetIterator(
    Instances data,
    WordVectors wordVectors,
    TokenizerFactory tokenFact,
    TokenPreProcess tpp,
    AbstractStopwords stopWords,
    LabeledSentenceProvider sentenceProvider,
    int batchSize,
    int truncateLength) {
  this.batchSize = batchSize;
  this.vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  this.data = data;

  this.wordVectors = wordVectors;
  this.truncateLength = truncateLength;

  this.tokenizerFactory = tokenFact;
  this.tokenizerFactory.setTokenPreProcessor(tpp);
  this.stopWords = stopWords;
  this.sentenceProvider = sentenceProvider;
}
 
开发者ID:Waikato,项目名称:wekaDeeplearning4j,代码行数:32,代码来源:RnnTextEmbeddingDataSetIterator.java

示例3: testWriteWordVectors

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
    WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例4: testFromTableAndVocab

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {

    WordVectors vec = WordVectorSerializer.loadGoogleModel(textFile, false);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();

    WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:17,代码来源:WordVectorSerializerTest.java

示例5: getSenseEmbedding

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
/**
 *
 * @param wordVector The Word Embeddings dictionary
 * @param synset The synset ...
 * @param word The word ...
 * @return The sense embedding of a synset
 */
public static double[] getSenseEmbedding(WordVectors wordVector, Synset synset, String word, SenseComputation senseComputation) {
    if(wordEmbeddings.containsKey(synset)){
        return ArrayUtils.toPrimitive(wordEmbeddings.get(synset));
    }

    String[] words = getSenseBag(synset, word);

    double[] senseEmbedding, tmpEmbedding;
    Double[] tmpEmbedding2, tmpSenseEmbedding;
    ArrayList<Double[]> senseEmbeddings = new ArrayList<>();

    // For each word in the sense bag, get the coresponding word embeddings and store them in an array
    for (String w : words) {
        if (w != null) {
            if (wordVector.hasWord(w)) {
                tmpEmbedding = wordVector.getWordVector(w);

                tmpEmbedding2 = new Double[tmpEmbedding.length];
                for (int i = 0; i < tmpEmbedding.length; i++) {
                    tmpEmbedding2[i] = tmpEmbedding[i];
                }
                senseEmbeddings.add(tmpEmbedding2);
            }
        }
    }

    senseEmbedding = senseComputation.compute(senseEmbeddings);

    tmpSenseEmbedding = new Double[senseEmbedding.length];
    for (int i = 0; i < tmpSenseEmbedding.length; i++) {
        tmpSenseEmbedding[i] = senseEmbedding[i];
    }
    wordEmbeddings.put(synset, tmpSenseEmbedding);

    return senseEmbedding;
}
 
开发者ID:butnaruandrei,项目名称:ShotgunWSD,代码行数:44,代码来源:SenseEmbedding.java

示例6: testTextCnnTextFilesRegression

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnTextFilesRegression() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[] {3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[] {1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[] {2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[] {1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFn(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMeta();
  TestUtil.holdout(clf, data);
}
 
开发者ID:Waikato,项目名称:wekaDeeplearning4j,代码行数:36,代码来源:Dl4jMlpTest.java

示例7: testTextCnnTextFilesClassification

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnTextFilesClassification() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[] {4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[] {1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setDropOut(0.2);
  conv1.setActivationFn(new ActivationReLU());

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[] {3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[] {1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setDropOut(0.2);
  conv2.setActivationFn(new ActivationReLU());

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();
  gpl.setDropOut(0.33);

  OutputLayer out = new OutputLayer();

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMetaClassification();
  TestUtil.holdout(clf, data);
}
 
开发者ID:Waikato,项目名称:wekaDeeplearning4j,代码行数:39,代码来源:Dl4jMlpTest.java

示例8: getNMostSimilarByVector

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
public ArrayList<String> getNMostSimilarByVector(int n, String esw, WordVectors ven, double[] v) {
	//Target language vectors lookup
	ArrayList<String> candidates = new ArrayList<String>();			
	ArrayList<Term> arr = new ArrayList<Term>();
	int numbEnglishWords = ven.vocab().numWords();
	Double similarity = 0.0;
	for (int i=0; i < numbEnglishWords; i++) {
		String w = ven.vocab().wordAtIndex(i);
		double[] wordVector_en = ven.getWordVector(w);
		Double simAux = cosineSimilarity(v, wordVector_en);
		if (simAux > similarity) {
			similarity = simAux;
		}
		Term t = new Term(w,simAux);
		arr.add(t);
	}			 
	Collections.sort(arr, new Comparator<Term>() {
		@Override
		public int compare(Term t1, Term t2) {
			// Sort from max to min
			return new Double(t2.score).compareTo(new Double(t1.score));					 
		}
	});
	System.out.println("-----Closest Words to spanish word " + esw + " in English: ");
	for (int i=0; i <n && i < arr.size(); i++) {
		String term = arr.get(i).term;
		candidates.add(term);
		System.out.println(term);
	}
	System.out.println("--Score: " + similarity);
	return candidates;
}
 
开发者ID:josemanuelgp,项目名称:word2vec_vector-translation-java,代码行数:33,代码来源:VectorTranslation.java

示例9: testGetMatrix

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testGetMatrix() throws IOException {
	// Reload target and source vectors
	WordVectors ves = WordVectorSerializer.loadTxtVectors(new File(sourceVector));	 
	//Load source and target training set from dictionary
	System.out.println("Source vector loaded");
	WordVectors ven = WordVectorSerializer.loadTxtVectors(new File(targetVector));
	System.out.println("Target vector loaded");
	VectorTranslation mapper = new VectorTranslation(dictionaryFile, dictionaryLength, columns);
	DoubleMatrix translationMatrix = mapper.calculateTranslationMatrix(ves, ven);
	//Example Spanish -> English
	String[] terms1 = {
			"ser",
			"haber",
			"espacio",
			"mostrar",
			"asesino",
			"intimidad",
			// Hey, I know the numbers, too!
			"dos", "tres", "cuatro", "sesenta",
			"honradez",
			"banquero",
			"medios",
			"deporte",
			"decidido"
	};
	for (String term : terms1) {
		DoubleMatrix vsource = new DoubleMatrix(ves.getWordVector(term));
        double [] vtargetestimated = translationMatrix.mmul(vsource).transpose().toArray();
        mapper.getNMostSimilarByVector(n, term, ven, vtargetestimated);
	}
}
 
开发者ID:josemanuelgp,项目名称:word2vec_vector-translation-java,代码行数:33,代码来源:VectorTranslationTest.java

示例10: testLoaderBinary

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testLoaderBinary() throws IOException {
    WordVectors vec = WordVectorSerializer.loadGoogleModel(binaryFile, true);
    assertEquals(vec.vocab().numWords(), 30);
    assertTrue(vec.vocab().hasToken("Morgan_Freeman"));
    assertTrue(vec.vocab().hasToken("JA_Montalbano"));
    double[] wordVector1 = vec.getWordVector("Morgan_Freeman");
    double[] wordVector2 = vec.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:14,代码来源:WordVectorSerializerTest.java

示例11: testTextCnnClassification

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnClassification() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(128);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[] {4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[] {1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setDropOut(0.2);
  conv1.setActivationFn(new ActivationReLU());

  BatchNormalization bn1 = new BatchNormalization();

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[] {3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[] {1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setDropOut(0.2);
  conv2.setActivationFn(new ActivationReLU());

  BatchNormalization bn2 = new BatchNormalization();

  ConvolutionLayer conv3 = new ConvolutionLayer();
  conv3.setKernelSize(new int[] {2, vectorSize});
  conv3.setNOut(10);
  conv3.setStride(new int[] {1, vectorSize});
  conv3.setConvolutionMode(ConvolutionMode.Same);
  conv3.setDropOut(0.2);
  conv3.setActivationFn(new ActivationReLU());

  BatchNormalization bn3 = new BatchNormalization();

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();
  gpl.setDropOut(0.33);

  OutputLayer out = new OutputLayer();

  //    clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out);
  clf.setLayers(conv1, conv2, conv3, gpl, out);
  //    clf.setNumEpochs(50);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(1);
  clf.setIterationListener(l);

  clf.setEarlyStopping(new EarlyStopping(10, 15));
  clf.setDebug(true);

  // NNC
  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setLearningRate(0.01);
  nnc.setUseRegularization(true);
  nnc.setL2(1e-3);
  clf.setNeuralNetConfiguration(nnc);

  // Data
  final Instances data = DatasetLoader.loadImdb();
  data.randomize(new Random(42));
  RemovePercentage rp = new RemovePercentage();
  rp.setInputFormat(data);
  rp.setPercentage(98);
  final Instances dataFiltered = Filter.useFilter(data, rp);

  TestUtil.holdout(clf, dataFiltered);
}
 
开发者ID:Waikato,项目名称:wekaDeeplearning4j,代码行数:75,代码来源:Dl4jMlpTest.java

示例12: testTextCnnRegression

import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; //导入方法依赖的package包/类
@Test
public void testTextCnnRegression() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[] {3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[] {1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[] {2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[] {1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFn(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  //    clf.setNumEpochs(200);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(20);
  clf.setIterationListener(l);
  clf.setDebug(true);
  clf.setEarlyStopping(new EarlyStopping(0, 10));
  final Instances data = DatasetLoader.loadAnger();

  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setLearningRate(0.01);
  nnc.setUseRegularization(true);
  nnc.setL2(0.00001);
  clf.setNeuralNetConfiguration(nnc);
  TestUtil.holdout(clf, data);
}
 
开发者ID:Waikato,项目名称:wekaDeeplearning4j,代码行数:47,代码来源:Dl4jMlpTest.java


注:本文中的org.deeplearning4j.models.embeddings.wordvectors.WordVectors.getWordVector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。