本文整理汇总了Java中org.deeplearning4j.models.paragraphvectors.ParagraphVectors类的典型用法代码示例。如果您正苦于以下问题:Java ParagraphVectors类的具体用法?Java ParagraphVectors怎么用?Java ParagraphVectors使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParagraphVectors类属于org.deeplearning4j.models.paragraphvectors包,在下文中一共展示了ParagraphVectors类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeParagraphVectors
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
void makeParagraphVectors() throws Exception {
// build a iterator for our dataset
File dir = TYPE_LEARNING_DIR;
dir.mkdirs();
iterator = new FileLabelAwareIterator.Builder()
.addSourceFolder(new File(dir, "corpus"))
.build();
tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
// ParagraphVectors training configuration
paragraphVectors = new ParagraphVectors.Builder()
.learningRate(0.025)
.minLearningRate(0.001)
.batchSize(1000)
.epochs(5)
.iterate(iterator)
.trainWordVectors(true)
.tokenizerFactory(tokenizerFactory)
.build();
// Start model training
paragraphVectors.fit();
}
示例2: Par2Hier
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
public Par2Hier(ParagraphVectors paragraphVectors, Par2HierUtils.Method smoothing, int k) {
this.smoothing = smoothing;
this.k = k;
this.labelsSource = paragraphVectors.getLabelsSource();
this.labelAwareIterator = paragraphVectors.getLabelAwareIterator();
this.lookupTable = paragraphVectors.getLookupTable();
this.vocab = paragraphVectors.getVocab();
this.tokenizerFactory = paragraphVectors.getTokenizerFactory();
}
示例3: loadParagraphVectors
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
private static ParagraphVectors loadParagraphVectors() {
ParagraphVectors paragraphVectors = null;
try {
paragraphVectors = WordVectorSerializer.readParagraphVectors((PARAGRAPHVECTORMODELPATH));
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
paragraphVectors.setTokenizerFactory(t);
paragraphVectors.getConfiguration().setIterations(10); // please note, we set iterations to 1 here, just to speedup inference
} catch (IOException e) {
e.printStackTrace();
}
return paragraphVectors;
}
示例4: trainParagraghVecModel
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
public void trainParagraghVecModel(String locationToSave) throws FileNotFoundException {
ClassPathResource resource = new ClassPathResource("/paragraphVectors/paragraphVectorTraining.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
AbstractCache<VocabWord> cache = new AbstractCache<VocabWord>();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
/*
if you don't have LabelAwareIterator handy, you can use synchronized labels generator
it will be used to label each document/sequence/line with it's own label.
But if you have LabelAwareIterator ready, you can can provide it, for your in-house labels
*/
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder()
.minWordFrequency(1)
.iterations(100)
.epochs(1)
.layerSize(50)
.learningRate(0.02)
.labelsSource(source)
.windowSize(5)
.iterate(iter)
.trainWordVectors(true)
.vocabCache(cache)
.tokenizerFactory(t)
.sampling(0)
.build();
vec.fit();
WordVectorSerializer.writeParagraphVectors(vec, locationToSave);
}
示例5: writeWordVectors
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
/**
* This method saves paragraph vectors to the given file.
*
* @param vectors
* @param path
*/
@Deprecated
public static void writeWordVectors(@NonNull ParagraphVectors vectors, @NonNull File path) {
try (BufferedOutputStream fos = new BufferedOutputStream(new FileOutputStream(path))) {
writeWordVectors(vectors, fos);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
示例6: writeParagraphVectors
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
/**
* This method saves ParagraphVectors model into compressed zip file
*
* @param file
*/
public static void writeParagraphVectors(ParagraphVectors vectors, File file) {
try (FileOutputStream fos = new FileOutputStream(file);
BufferedOutputStream stream = new BufferedOutputStream(fos)) {
writeParagraphVectors(vectors, stream);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
示例7: readParagraphVectorsFromText
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
/**
* Restores previously serialized ParagraphVectors model
*
* Deprecation note: Please, consider using readParagraphVectors() method instead
*
* @param file File that contains previously serialized model
* @return
*/
@Deprecated
public static ParagraphVectors readParagraphVectorsFromText(@NonNull File file) {
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
return readParagraphVectorsFromText(bis);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
示例8: main
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ClassPathResource srcFile = new ClassPathResource("/raw_sentences.txt");
File file = srcFile.getFile();
SentenceIterator iter = new BasicLineIterator(file);
TokenizerFactory tFact = new DefaultTokenizerFactory();
tFact.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource labelFormat = new LabelsSource("LINE_");
ParagraphVectors vec = new ParagraphVectors.Builder()
.minWordFrequency(1)
.iterations(5)
.epochs(1)
.layerSize(100)
.learningRate(0.025)
.labelsSource(labelFormat)
.windowSize(5)
.iterate(iter)
.trainWordVectors(false)
.tokenizerFactory(tFact)
.sampling(0)
.build();
vec.fit();
double similar1 = vec.similarity("LINE_9835", "LINE_12492");
out.println("Comparing lines 9836 & 12493 ('This is my house .'/'This is my world .') Similarity = " + similar1);
double similar2 = vec.similarity("LINE_3720", "LINE_16392");
out.println("Comparing lines 3721 & 16393 ('This is my way .'/'This is my work .') Similarity = " + similar2);
double similar3 = vec.similarity("LINE_6347", "LINE_3720");
out.println("Comparing lines 6348 & 3721 ('This is my case .'/'This is my way .') Similarity = " + similar3);
double dissimilar1 = vec.similarity("LINE_3720", "LINE_9852");
out.println("Comparing lines 3721 & 9853 ('This is my way .'/'We now have one .') Similarity = " + dissimilar1);
double dissimilar2 = vec.similarity("LINE_3720", "LINE_3719");
out.println("Comparing lines 3721 & 3720 ('This is my way .'/'At first he says no .') Similarity = " + dissimilar2);
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:46,代码来源:ClassifyBySimilarity.java
示例9: main
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ClassPathResource resource = new ClassPathResource("paravec/labeled");
iter = new FileLabelAwareIterator.Builder()
.addSourceFolder(resource.getFile())
.build();
tFact = new DefaultTokenizerFactory();
tFact.setTokenPreProcessor(new CommonPreprocessor());
pVect = new ParagraphVectors.Builder()
.learningRate(0.025)
.minLearningRate(0.001)
.batchSize(1000)
.epochs(20)
.iterate(iter)
.trainWordVectors(true)
.tokenizerFactory(tFact)
.build();
pVect.fit();
ClassPathResource unlabeledText = new ClassPathResource("paravec/unlabeled");
FileLabelAwareIterator unlabeledIter = new FileLabelAwareIterator.Builder()
.addSourceFolder(unlabeledText.getFile())
.build();
MeansBuilder mBuilder = new MeansBuilder(
(InMemoryLookupTable<VocabWord>) pVect.getLookupTable(),
tFact);
LabelSeeker lSeeker = new LabelSeeker(iter.getLabelsSource().getLabels(),
(InMemoryLookupTable<VocabWord>) pVect.getLookupTable());
while (unlabeledIter.hasNextDocument()) {
LabelledDocument doc = unlabeledIter.nextDocument();
INDArray docCentroid = mBuilder.documentAsVector(doc);
List<Pair<String, Double>> scores = lSeeker.getScores(docCentroid);
out.println("Document '" + doc.getLabel() + "' falls into the following categories: ");
for (Pair<String, Double> score : scores) {
out.println(" " + score.getFirst() + ": " + score.getSecond());
}
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:49,代码来源:ParagraphVectorsClassifierExample.java
示例10: testParaVecSerialization1
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
@Test
public void testParaVecSerialization1() throws Exception {
VectorsConfiguration configuration = new VectorsConfiguration();
configuration.setIterations(14123);
configuration.setLayersSize(156);
INDArray syn0 = Nd4j.rand(100, configuration.getLayersSize());
INDArray syn1 = Nd4j.rand(100, configuration.getLayersSize());
AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
for (int i = 0; i < 100; i++) {
VocabWord word = new VocabWord((float) i, "word_" + i);
List<Integer> points = new ArrayList<>();
List<Byte> codes = new ArrayList<>();
int num = org.apache.commons.lang3.RandomUtils.nextInt(1, 20);
for (int x = 0; x < num; x++) {
points.add(org.apache.commons.lang3.RandomUtils.nextInt(1, 100000));
codes.add(org.apache.commons.lang3.RandomUtils.nextBytes(10)[0]);
}
if (RandomUtils.nextInt(10) < 3) {
word.markAsLabel(true);
}
word.setIndex(i);
word.setPoints(points);
word.setCodes(codes);
cache.addToken(word);
cache.addWordToIndex(i, word.getLabel());
}
InMemoryLookupTable<VocabWord> lookupTable =
(InMemoryLookupTable<VocabWord>) new InMemoryLookupTable.Builder<VocabWord>()
.vectorLength(configuration.getLayersSize()).cache(cache).build();
lookupTable.setSyn0(syn0);
lookupTable.setSyn1(syn1);
ParagraphVectors originalVectors =
new ParagraphVectors.Builder(configuration).vocabCache(cache).lookupTable(lookupTable).build();
File tempFile = File.createTempFile("paravec", "tests");
tempFile.deleteOnExit();
WordVectorSerializer.writeParagraphVectors(originalVectors, tempFile);
ParagraphVectors restoredVectors = WordVectorSerializer.readParagraphVectors(tempFile);
InMemoryLookupTable<VocabWord> restoredLookupTable =
(InMemoryLookupTable<VocabWord>) restoredVectors.getLookupTable();
AbstractCache<VocabWord> restoredVocab = (AbstractCache<VocabWord>) restoredVectors.getVocab();
assertEquals(restoredLookupTable.getSyn0(), lookupTable.getSyn0());
assertEquals(restoredLookupTable.getSyn1(), lookupTable.getSyn1());
for (int i = 0; i < cache.numWords(); i++) {
assertEquals(cache.elementAtIndex(i).isLabel(), restoredVocab.elementAtIndex(i).isLabel());
assertEquals(cache.wordAtIndex(i), restoredVocab.wordAtIndex(i));
assertEquals(cache.elementAtIndex(i).getElementFrequency(),
restoredVocab.elementAtIndex(i).getElementFrequency(), 0.1f);
List<Integer> originalPoints = cache.elementAtIndex(i).getPoints();
List<Integer> restoredPoints = restoredVocab.elementAtIndex(i).getPoints();
assertEquals(originalPoints.size(), restoredPoints.size());
for (int x = 0; x < originalPoints.size(); x++) {
assertEquals(originalPoints.get(x), restoredPoints.get(x));
}
List<Byte> originalCodes = cache.elementAtIndex(i).getCodes();
List<Byte> restoredCodes = restoredVocab.elementAtIndex(i).getCodes();
assertEquals(originalCodes.size(), restoredCodes.size());
for (int x = 0; x < originalCodes.size(); x++) {
assertEquals(originalCodes.get(x), restoredCodes.get(x));
}
}
}
示例11: testBiggerParavecLoader
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
@Ignore
@Test
public void testBiggerParavecLoader() throws Exception {
ParagraphVectors vectors =
WordVectorSerializer.readParagraphVectors("C:\\Users\\raver\\Downloads\\10kNews.zip");
}
示例12: readParagraphVectors
import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; //导入依赖的package包/类
/**
* This method restores ParagraphVectors model previously saved with writeParagraphVectors()
*
* @return
*/
public static ParagraphVectors readParagraphVectors(File file) throws IOException {
File tmpFileL = File.createTempFile("paravec", "l");
tmpFileL.deleteOnExit();
Word2Vec w2v = readWord2Vec(file);
// and "convert" it to ParaVec model + optionally trying to restore labels information
ParagraphVectors vectors = new ParagraphVectors.Builder(w2v.getConfiguration()).vocabCache(w2v.getVocab())
.lookupTable(w2v.getLookupTable()).resetModel(false).build();
ZipFile zipFile = new ZipFile(file);
// now we try to restore labels information
ZipEntry labels = zipFile.getEntry("labels.txt");
if (labels != null) {
InputStream stream = zipFile.getInputStream(labels);
Files.copy(stream, Paths.get(tmpFileL.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
try (BufferedReader reader = new BufferedReader(new FileReader(tmpFileL))) {
String line;
while ((line = reader.readLine()) != null) {
VocabWord word = vectors.getVocab().tokenFor(decodeB64(line.trim()));
if (word != null) {
word.markAsLabel(true);
}
}
}
}
vectors.extractLabels();
return vectors;
}