本文整理汇总了Java中org.deeplearning4j.models.word2vec.wordstore.VocabCache类的典型用法代码示例。如果您正苦于以下问题:Java VocabCache类的具体用法?Java VocabCache怎么用?Java VocabCache使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
VocabCache类属于org.deeplearning4j.models.word2vec.wordstore包,在下文中一共展示了VocabCache类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: fromPair
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* Load word vectors from the given pair
*
* @param pair
* the given pair
* @return a read only word vectors impl based on the given lookup table and vocab
*/
public static Word2Vec fromPair(Pair<InMemoryLookupTable, VocabCache> pair) {
Word2Vec vectors = new Word2Vec();
vectors.setLookupTable(pair.getFirst());
vectors.setVocab(pair.getSecond());
vectors.setModelUtils(new BasicModelUtils());
return vectors;
}
示例2: writeSequenceVectors
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method saves specified SequenceVectors model to target OutputStream
*
* @param vectors SequenceVectors model
* @param factory SequenceElementFactory implementation for your objects
* @param stream Target output stream
* @param <T>
*/
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull SequenceElementFactory<T> factory, @NonNull OutputStream stream) throws IOException {
WeightLookupTable<T> lookupTable = vectors.getLookupTable();
VocabCache<T> vocabCache = vectors.getVocab();
PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));
// at first line we save VectorsConfiguration
writer.write(vectors.getConfiguration().toEncodedJson());
// now we have elements one by one
for (int x = 0; x < vocabCache.numWords(); x++) {
T element = vocabCache.elementAtIndex(x);
String json = factory.serialize(element);
INDArray d = Nd4j.create(1);
double[] vector = lookupTable.vector(element.getLabel()).dup().data().asDouble();
ElementPair pair = new ElementPair(json, vector);
writer.println(pair.toEncodedJson());
writer.flush();
}
writer.flush();
writer.close();
}
示例3: readVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method reads vocab cache from provided InputStream.
* Please note: it reads only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
*
* @param stream
* @return
* @throws IOException
*/
public static VocabCache<VocabWord> readVocabCache(@NonNull InputStream stream) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
AbstractCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
VocabWordFactory factory = new VocabWordFactory();
String line = "";
while ((line = reader.readLine()) != null) {
VocabWord word = factory.deserialize(line);
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
}
return vocabCache;
}
示例4: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
cbow.configure(vocabCache, lookupTable, configuration);
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
this.syn0 = ((InMemoryLookupTable<T>) lookupTable).getSyn0();
this.syn1 = ((InMemoryLookupTable<T>) lookupTable).getSyn1();
this.syn1Neg = ((InMemoryLookupTable<T>) lookupTable).getSyn1Neg();
this.expTable = ((InMemoryLookupTable<T>) lookupTable).getExpTable();
this.table = ((InMemoryLookupTable<T>) lookupTable).getTable();
}
示例5: weights
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
private static INDArray weights(GloveWeightLookupTable glove, Map<String, float[]> data, VocabCache vocab) {
INDArray ret = Nd4j.create(data.size(), glove.layerSize());
for (Map.Entry<String, float[]> entry : data.entrySet()) {
String key = entry.getKey();
INDArray row = Nd4j.create(Nd4j.createBuffer(entry.getValue()));
if (row.length() != glove.layerSize())
continue;
if (vocab.indexOf(key) >= data.size())
continue;
if (vocab.indexOf(key) < 0)
continue;
ret.putRow(vocab.indexOf(key), row);
}
return ret;
}
示例6: getSyn0Vector
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public INDArray getSyn0Vector(Integer wordIndex, VocabCache<VocabWord> vocabCache) {
if (!workers.contains(Thread.currentThread().getId()))
workers.add(Thread.currentThread().getId());
VocabWord word = vocabCache.elementAtIndex(wordIndex);
if (!indexSyn0VecMap.containsKey(word)) {
synchronized (this) {
if (!indexSyn0VecMap.containsKey(word)) {
indexSyn0VecMap.put(word, getRandomSyn0Vec(vectorLength.get(), wordIndex));
}
}
}
return indexSyn0VecMap.get(word);
}
示例7: testGlove
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Test
public void testGlove() throws Exception {
Glove glove = new Glove(true, 5, 100);
JavaRDD<String> corpus = sc.textFile(new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath())
.map(new Function<String, String>() {
@Override
public String call(String s) throws Exception {
return s.toLowerCase();
}
});
Pair<VocabCache<VocabWord>, GloveWeightLookupTable> table = glove.train(corpus);
WordVectors vectors = WordVectorSerializer
.fromPair(new Pair<>((InMemoryLookupTable) table.getSecond(), (VocabCache) table.getFirst()));
Collection<String> words = vectors.wordsNearest("day", 20);
assertTrue(words.contains("week"));
}
示例8: buildShallowVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method builds shadow vocabulary and huffman tree
*
* @param counter
* @return
*/
protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<Long> counter) {
// TODO: need simplified cache here, that will operate on Long instead of string labels
VocabCache<ShallowSequenceElement> vocabCache = new AbstractCache<>();
for (Long id : counter.keySet()) {
ShallowSequenceElement shallowElement = new ShallowSequenceElement(counter.getCount(id), id);
vocabCache.addToken(shallowElement);
}
// building huffman tree
Huffman huffman = new Huffman(vocabCache.vocabWords());
huffman.build();
huffman.applyIndexes(vocabCache);
return vocabCache;
}
示例9: writeWordVectors
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This mehod writes word vectors to the given OutputStream.
* Please note: this method doesn't load whole vocab/lookupTable into memory, so it's able to process large vocabularies served over network.
*
* @param lookupTable
* @param stream
* @param <T>
* @throws IOException
*/
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable,
OutputStream stream) throws IOException {
VocabCache<T> vocabCache = lookupTable.getVocabCache();
PrintWriter writer = new PrintWriter(new OutputStreamWriter(stream, "UTF-8"));
// saving header as "NUM_WORDS VECTOR_SIZE NUM_DOCS"
String str = vocabCache.numWords() + " " + lookupTable.layerSize() + " " + vocabCache.totalNumberOfDocs();
log.debug("Saving header: {}", str);
writer.println(str);
// saving vocab content
for (int x = 0; x < vocabCache.numWords(); x++) {
T element = vocabCache.elementAtIndex(x);
StringBuilder builder = new StringBuilder();
builder.append(encodeB64(element.getLabel())).append(" ");
INDArray vec = lookupTable.vector(element.getLabel());
for (int i = 0; i < vec.length(); i++) {
builder.append(vec.getDouble(i));
if (i < vec.length() - 1)
builder.append(" ");
}
writer.println(builder.toString());
}
writer.flush();
writer.close();
}
示例10: writeVocabCache
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* This method saves vocab cache to provided OutputStream.
* Please note: it saves only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
*
* @param vocabCache
* @param stream
* @throws UnsupportedEncodingException
*/
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull OutputStream stream)
throws IOException {
PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, "UTF-8")));
for (int x = 0; x < vocabCache.numWords(); x++) {
VocabWord word = vocabCache.elementAtIndex(x);
writer.println(word.toJSON());
}
writer.flush();
writer.close();
}
示例11: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
if (configuration.getNegative() > 0) {
if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
logger.info("Initializing syn1Neg...");
((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
}
}
this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
this.variableWindows = configuration.getVariableWindows();
}
示例12: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
/**
* SkipGram initialization over given vocabulary and WeightLookupTable
*
* @param vocabCache
* @param lookupTable
* @param configuration
*/
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.configuration = configuration;
if (configuration.getNegative() > 0) {
if (((InMemoryLookupTable<T>) lookupTable).getSyn1Neg() == null) {
log.info("Initializing syn1Neg...");
((InMemoryLookupTable<T>) lookupTable).setUseHS(configuration.isUseHierarchicSoftmax());
((InMemoryLookupTable<T>) lookupTable).setNegative(configuration.getNegative());
((InMemoryLookupTable<T>) lookupTable).resetWeights(false);
}
}
this.expTable = new DeviceLocalNDArray(Nd4j.create(((InMemoryLookupTable<T>) lookupTable).getExpTable()));
this.syn0 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn0());
this.syn1 = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1());
this.syn1Neg = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getSyn1Neg());
this.table = new DeviceLocalNDArray(((InMemoryLookupTable<T>) lookupTable).getTable());
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.sampling = configuration.getSampling();
this.variableWindows = configuration.getVariableWindows();
this.vectorLength = configuration.getLayersSize();
}
示例13: configure
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
@Override
public void configure(@NonNull VocabCache<T> vocabCache, @NonNull WeightLookupTable<T> lookupTable,
@NonNull VectorsConfiguration configuration) {
this.vocabCache = vocabCache;
this.lookupTable = lookupTable;
this.window = configuration.getWindow();
this.useAdaGrad = configuration.isUseAdaGrad();
this.negative = configuration.getNegative();
this.configuration = configuration;
skipGram.configure(vocabCache, lookupTable, configuration);
}
示例14: InMemoryLookupTable
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public InMemoryLookupTable(VocabCache<T> vocab, int vectorLength, boolean useAdaGrad, double lr, Random gen,
double negative) {
this.vocab = vocab;
this.vectorLength = vectorLength;
this.useAdaGrad = useAdaGrad;
this.lr.set(lr);
this.rng = gen;
this.negative = negative;
initExpTable();
if (useAdaGrad) {
initAdaGrad();
}
}
示例15: ASCIICoOccurrenceReader
import org.deeplearning4j.models.word2vec.wordstore.VocabCache; //导入依赖的package包/类
public ASCIICoOccurrenceReader(@NonNull File file, @NonNull VocabCache<T> vocabCache) {
this.vocabCache = vocabCache;
this.file = file;
try {
iterator = new PrefetchingSentenceIterator.Builder(new BasicLineIterator(file)).build();
} catch (Exception e) {
throw new RuntimeException(e);
}
}