本文整理汇总了Java中org.deeplearning4j.nn.conf.layers.GravesLSTM类的典型用法代码示例。如果您正苦于以下问题:Java GravesLSTM类的具体用法?Java GravesLSTM怎么用?Java GravesLSTM使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
GravesLSTM类属于org.deeplearning4j.nn.conf.layers包,在下文中一共展示了GravesLSTM类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeLayer
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static FeedForwardLayer makeLayer(Config layerConfig){
Type layerType = Type.valueOf(layerConfig.getString("type"));
switch (layerType) {
case GravesLSTM:
return new GravesLSTM.Builder()
.activation(layerConfig.getString("activation"))
.nIn(layerConfig.getInt("nIn"))
.nOut(layerConfig.getInt("nOut")).build();
case RnnOutputLayer:
return new RnnOutputLayer.Builder()
.activation(layerConfig.getString("activation"))
.lossFunction(LossFunctions.LossFunction.valueOf(layerConfig.getString("lossFunction")))
.nIn(layerConfig.getInt("nIn"))
.nOut(layerConfig.getInt("nOut")).build();
default:
throw new RuntimeException("UNAVAILABLE LAYER TYPE CONFIG.");
}
}
示例2: getNetworkConf
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
private MultiLayerConfiguration getNetworkConf(boolean useTBPTT) {
MultiLayerConfiguration.Builder builder =
new NeuralNetConfiguration.Builder()
.updater(new AdaGrad(0.1)).l2(0.0025)
.stepFunction(new NegativeDefaultStepFunction())
.list()
.layer(0, new GravesLSTM.Builder().weightInit(WeightInit.DISTRIBUTION)
.dist(new NormalDistribution(0.0, 0.01)).nIn(nIn)
.nOut(layerSize).activation(Activation.TANH).build())
.layer(1, new OutputLayer.Builder(
LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(layerSize)
.nOut(nIn).activation(Activation.SOFTMAX)
.build())
.inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).backprop(true)
.pretrain(false);
if (useTBPTT) {
builder.backpropType(BackpropType.TruncatedBPTT);
builder.tBPTTBackwardLength(window / 3);
builder.tBPTTForwardLength(window / 3);
}
return builder.build();
}
示例3: getConfiguration
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
@Override
protected MultiLayerConfiguration getConfiguration()
{
final int[] hiddenLayerNodes = parameters.getHiddeLayerNodes();
final int nLayers = hiddenLayerNodes.length + 1;
final ListBuilder list = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.iterations(parameters.getIterations()).learningRate(parameters.getLearningRate()).rmsDecay(0.95)
.seed(parameters.getSeed()).regularization(true).l2(0.001).list(nLayers).pretrain(false).backprop(true);
for (int i = 0; i < nLayers; i++)
{
int nIn;
if (i == 0)
{
nIn = parameters.getInputSize();
}
else
{
nIn = hiddenLayerNodes[i - 1];
}
if (i < nLayers - 1)
{
final GravesLSTM layer = new GravesLSTM.Builder().nIn(nIn).nOut(hiddenLayerNodes[i])
.updater(Updater.RMSPROP).activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build();
list.layer(i, layer);
}
else
{
final RnnOutputLayer outputLayer = new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax")
.updater(Updater.RMSPROP).nIn(hiddenLayerNodes[1]).nOut(parameters.getOutputSize())
.weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.08, 0.08)).build();
list.layer(i, outputLayer);
}
}
return list.build();
}
示例4: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
getModelData();
System.out.println("Total memory = " + Runtime.getRuntime().totalMemory());
int batchSize = 50;
int vectorSize = 300;
int nEpochs = 5;
int truncateReviewsToLength = 300;
MultiLayerConfiguration sentimentNN = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.updater(Updater.RMSPROP)
.regularization(true).l2(1e-5)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.learningRate(0.0018)
.list()
.layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(200)
.activation("softsign").build())
.layer(1, new RnnOutputLayer.Builder().activation("softmax")
.lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200).nOut(2).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(sentimentNN);
net.init();
net.setListeners(new ScoreIterationListener(1));
WordVectors wordVectors = WordVectorSerializer.loadGoogleModel(new File(GNEWS_VECTORS_PATH), true, false);
DataSetIterator trainData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, batchSize, truncateReviewsToLength, true), 1);
DataSetIterator testData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, 100, truncateReviewsToLength, false), 1);
for (int i = 0; i < nEpochs; i++) {
net.fit(trainData);
trainData.reset();
Evaluation evaluation = new Evaluation();
while (testData.hasNext()) {
DataSet t = testData.next();
INDArray dataFeatures = t.getFeatureMatrix();
INDArray dataLabels = t.getLabels();
INDArray inMask = t.getFeaturesMaskArray();
INDArray outMask = t.getLabelsMaskArray();
INDArray predicted = net.output(dataFeatures, false, inMask, outMask);
evaluation.evalTimeSeries(dataLabels, predicted, outMask);
}
testData.reset();
System.out.println(evaluation.stats());
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-End-to-Endguide-for-Java-developers,代码行数:54,代码来源:DL4JSentimentAnalysisExample.java
示例5: buildLstmNetworks
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static MultiLayerNetwork buildLstmNetworks(int nIn, int nOut) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(seed)
.iterations(iterations)
.learningRate(learningRate)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER)
.updater(Updater.RMSPROP)
.regularization(true)
.l2(1e-4)
.list()
.layer(0, new GravesLSTM.Builder()
.nIn(nIn)
.nOut(lstmLayer1Size)
.activation(Activation.TANH)
.gateActivationFunction(Activation.HARDSIGMOID)
.dropOut(dropoutRatio)
.build())
.layer(1, new GravesLSTM.Builder()
.nIn(lstmLayer1Size)
.nOut(lstmLayer2Size)
.activation(Activation.TANH)
.gateActivationFunction(Activation.HARDSIGMOID)
.dropOut(dropoutRatio)
.build())
.layer(2, new DenseLayer.Builder()
.nIn(lstmLayer2Size)
.nOut(denseLayerSize)
.activation(Activation.RELU)
.build())
.layer(3, new RnnOutputLayer.Builder()
.nIn(denseLayerSize)
.nOut(nOut)
.activation(Activation.IDENTITY)
.lossFunction(LossFunctions.LossFunction.MSE)
.build())
.backpropType(BackpropType.TruncatedBPTT)
.tBPTTForwardLength(truncatedBPTTLength)
.tBPTTBackwardLength(truncatedBPTTLength)
.pretrain(false)
.backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(100));
return net;
}
示例6: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
downloadData();
int batchSize = 50;
int vectorSize = 300;
int nEpochs = 5;
int truncateReviewsToLength = 300;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.updater(Updater.RMSPROP)
.regularization(true).l2(1e-5)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.learningRate(0.0018)
.list()
.layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(200)
.activation("softsign").build())
.layer(1, new RnnOutputLayer.Builder().activation("softmax")
.lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200).nOut(2).build())
.pretrain(false)
.backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
WordVectors wordVectors = WordVectorSerializer.loadGoogleModel(new File(WORD_VECTORS_PATH), true, false);
DataSetIterator train = new AsyncDataSetIterator(new SentimentExampleIterator(DATA_PATH,wordVectors,batchSize,truncateReviewsToLength,true),1);
DataSetIterator test = new AsyncDataSetIterator(new SentimentExampleIterator(DATA_PATH,wordVectors,100,truncateReviewsToLength,false),1);
System.out.println("Starting training");
for( int i=0; i<nEpochs; i++ ){
net.fit(train);
train.reset();
System.out.println("Epoch " + i + " complete. Starting evaluation:");
Evaluation evaluation = new Evaluation();
while(test.hasNext()){
DataSet t = test.next();
INDArray features = t.getFeatureMatrix();
INDArray lables = t.getLabels();
INDArray inMask = t.getFeaturesMaskArray();
INDArray outMask = t.getLabelsMaskArray();
INDArray predicted = net.output(features,false,inMask,outMask);
evaluation.evalTimeSeries(lables,predicted,outMask);
}
test.reset();
System.out.println(evaluation.stats());
}
}
示例7: createComputationGraph
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
/**
* Configure and initialize the computation graph. This is done once in the
* beginning to prepare the computation graph for training.
*/
public static ComputationGraph createComputationGraph (Map<String, Double> dict) {
final NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
.iterations(1)
.learningRate(LEARNING_RATE)
.rmsDecay(RMS_DECAY)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.miniBatch(true)
.updater(Updater.RMSPROP)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer);
final ComputationGraphConfiguration.GraphBuilder graphBuilder = builder.graphBuilder()
.addInputs("inputLine", "decoderInput")
.setInputTypes(InputType.recurrent(dict.size()), InputType.recurrent(dict.size()))
.addLayer("embeddingEncoder",
new EmbeddingLayer.Builder()
.nIn(dict.size())
.nOut(EMBEDDING_WIDTH)
.build(),
"inputLine")
.addLayer("encoder",
new GravesLSTM.Builder()
.nIn(EMBEDDING_WIDTH)
.nOut(HIDDEN_LAYER_WIDTH)
.activation(Activation.TANH)
.gateActivationFunction(Activation.HARDSIGMOID)
.build(),
"embeddingEncoder")
.addVertex("thoughtVector",
new LastTimeStepVertex("inputLine"),
"encoder")
.addVertex("dup",
new DuplicateToTimeSeriesVertex("decoderInput"),
"thoughtVector")
.addVertex("merge",
new MergeVertex(),
"decoderInput",
"dup")
.addLayer("decoder",
new GravesLSTM.Builder()
.nIn(dict.size() + HIDDEN_LAYER_WIDTH)
.nOut(HIDDEN_LAYER_WIDTH)
.activation(Activation.TANH)
.gateActivationFunction(Activation.HARDSIGMOID) // always be a (hard) sigmoid function
.build(),
"merge")
.addLayer("output",
new RnnOutputLayer.Builder()
.nIn(HIDDEN_LAYER_WIDTH)
.nOut(dict.size())
.activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT) // multi-class cross entropy
.build(),
"decoder")
.setOutputs("output")
.backpropType(BackpropType.Standard) // why not BackpropType.TruncatedBPTT
.tBPTTForwardLength(TBPTT_SIZE)
.tBPTTBackwardLength(TBPTT_SIZE)
.pretrain(false)
.backprop(true);
ComputationGraph net = new ComputationGraph(graphBuilder.build());
net.init();
return net;
}
示例8: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static void main( String[] args ) throws Exception {
int lstmLayerSize = 200; //Number of units in each GravesLSTM layer
int miniBatchSize = 32; //Size of mini batch to use when training
int examplesPerEpoch = 50 * miniBatchSize; //i.e., how many examples to learn on between generating samples
int exampleLength = 100; //Length of each training example
int numEpochs = 30; //Total number of training + sample generation epochs
int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch
int nCharactersToSample = 300; //Length of each sample to generate
String generationInitialization = null; //Optional character initialization; a random character is used if null
// Above is Used to 'prime' the LSTM with a character sequence to continue/complete.
// Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default
Random rng = new Random(12345);
//Get a DataSetIterator that handles vectorization of text into something we can use to train
// our GravesLSTM network.
CharacterIterator iter = getShakespeareIterator(miniBatchSize,exampleLength,examplesPerEpoch);
int nOut = iter.totalOutcomes();
//Set up network configuration:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.learningRate(0.1)
.rmsDecay(0.95)
.seed(12345)
.regularization(true)
.l2(0.001)
.list(3)
.layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
.updater(Updater.RMSPROP)
.activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
.updater(Updater.RMSPROP)
.activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification
.updater(Updater.RMSPROP)
.nIn(lstmLayerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.pretrain(false).backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
//Print the number of parameters in the network (and for each layer)
Layer[] layers = net.getLayers();
int totalNumParams = 0;
for( int i=0; i<layers.length; i++ ){
int nParams = layers[i].numParams();
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
//Do training, and then generate and print samples from network
for( int i=0; i<numEpochs; i++ ){
net.fit(iter);
System.out.println("--------------------");
System.out.println("Completed epoch " + i );
System.out.println("Sampling characters from network given initialization \""+ (generationInitialization == null ? "" : generationInitialization) +"\"");
String[] samples = sampleCharactersFromNetwork(generationInitialization,net,iter,rng,nCharactersToSample,nSamplesToGenerate);
for( int j=0; j<samples.length; j++ ){
System.out.println("----- Sample " + j + " -----");
System.out.println(samples[j]);
System.out.println();
}
iter.reset(); //Reset iterator for another epoch
}
System.out.println("\n\nExample complete");
}
示例9: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static void main( String[] args ) throws Exception {
int lstmLayerSize = 200; //Number of units in each GravesLSTM layer
int miniBatchSize = 32; //Size of mini batch to use when training
int examplesPerEpoch = 50 * miniBatchSize; //i.e., how many examples to learn on between generating samples
int exampleLength = 100; //Length of each training example
int numEpochs = 30; //Total number of training + sample generation epochs
int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch
int nCharactersToSample = 130; //Length of each sample to generate
String generationInitialization = null; //Optional character initialization; a random character is used if null
// Above is Used to 'prime' the LSTM with a character sequence to continue/complete.
// Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default
Random rng = new Random(12345);
//Get a DataSetIterator that handles vectorization of text into something we can use to train
// our GravesLSTM network.
CharacterIterator iter = getShakespeareIterator(miniBatchSize,exampleLength,examplesPerEpoch);
int nOut = iter.totalOutcomes();
//Set up network configuration:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.learningRate(0.1)
.rmsDecay(0.95)
.seed(12345)
.regularization(true)
.l2(0.001)
.list(3)
.layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
.updater(Updater.RMSPROP)
.activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
.updater(Updater.RMSPROP)
.activation("tanh").weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification
.updater(Updater.RMSPROP)
.nIn(lstmLayerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION)
.dist(new UniformDistribution(-0.08, 0.08)).build())
.pretrain(false).backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
//Print the number of parameters in the network (and for each layer)
Layer[] layers = net.getLayers();
int totalNumParams = 0;
for( int i=0; i<layers.length; i++ ){
int nParams = layers[i].numParams();
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
String[] initStrings = { "diary", "gozer", "are", "I", "dear" };
//Do training, and then generate and print samples from network
for( int i=0; i<numEpochs; i++ ){
net.fit(iter);
System.out.println("--------------------");
System.out.println("Completed epoch " + i );
System.out.println("Sampling characters from network given initialization \""+ (generationInitialization == null ? "" : generationInitialization) +"\"");
String[] samples = sampleCharactersFromNetwork( initStrings[ i % initStrings.length ] ,net,iter,rng,nCharactersToSample,nSamplesToGenerate);
for( int j=0; j<samples.length; j++ ){
System.out.println("----- Sample " + j + " -----");
System.out.println( "Init String: " + initStrings[ i % initStrings.length ] );
System.out.println(samples[j]);
System.out.println();
}
iter.reset(); //Reset iterator for another epoch
}
System.out.println("\n\nExample complete");
}
开发者ID:jpatanooga,项目名称:dl4j-rnn-timeseries-examples,代码行数:79,代码来源:LSTM_GenericTimeseriesClassification.java
示例10: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
public static void main( String[] args ) throws Exception {
int lstmLayerSize = 200; //Number of units in each GravesLSTM layer
int miniBatchSize = 32; //Size of mini batch to use when training
int exampleLength = 1000; //Length of each training example sequence to use. This could certainly be increased
int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters
int numEpochs = 1; //Total number of training epochs
int generateSamplesEveryNMinibatches = 10; //How frequently to generate samples from the network? 1000 characters / 50 tbptt length: 20 parameter updates per minibatch
int nSamplesToGenerate = 4; //Number of samples to generate after each training epoch
int nCharactersToSample = 300; //Length of each sample to generate
String generationInitialization = null; //Optional character initialization; a random character is used if null
// Above is Used to 'prime' the LSTM with a character sequence to continue/complete.
// Initialization characters must all be in CharacterIterator.getMinimalCharacterSet() by default
Random rng = new Random(12345);
//Get a DataSetIterator that handles vectorization of text into something we can use to train
// our GravesLSTM network.
CharacterIterator iter = getShakespeareIterator(miniBatchSize,exampleLength);
int nOut = iter.totalOutcomes();
//Set up network configuration:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.learningRate(0.1)
.rmsDecay(0.95)
.seed(12345)
.regularization(true)
.l2(0.001)
.weightInit(WeightInit.XAVIER)
.updater(Updater.RMSPROP)
.list()
.layer(0, new GravesLSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize)
.activation("tanh").build())
.layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)
.activation("tanh").build())
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation("softmax") //MCXENT + softmax for classification
.nIn(lstmLayerSize).nOut(nOut).build())
.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)
.pretrain(false).backprop(true)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new ScoreIterationListener(1));
//Print the number of parameters in the network (and for each layer)
Layer[] layers = net.getLayers();
int totalNumParams = 0;
for( int i=0; i<layers.length; i++ ){
int nParams = layers[i].numParams();
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
//Do training, and then generate and print samples from network
int miniBatchNumber = 0;
for( int i=0; i<numEpochs; i++ ){
while(iter.hasNext()){
DataSet ds = iter.next();
net.fit(ds);
if(++miniBatchNumber % generateSamplesEveryNMinibatches == 0){
System.out.println("--------------------");
System.out.println("Completed " + miniBatchNumber + " minibatches of size " + miniBatchSize + "x" + exampleLength + " characters" );
System.out.println("Sampling characters from network given initialization \"" + (generationInitialization == null ? "" : generationInitialization) + "\"");
String[] samples = sampleCharactersFromNetwork(generationInitialization,net,iter,rng,nCharactersToSample,nSamplesToGenerate);
for( int j=0; j<samples.length; j++ ){
System.out.println("----- Sample " + j + " -----");
System.out.println(samples[j]);
System.out.println();
}
}
}
iter.reset(); //Reset iterator for another epoch
}
System.out.println("\n\nExample complete");
}
示例11: LSTMTrainer
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
/**
* Constructor
* @param trainingSet Text file containing several ABC music files
* @throws IOException
*/
public LSTMTrainer(String trainingSet, int seed) throws IOException {
lstmLayerSize_ = 200; // original 200
batchSize_ = 32; // original 32
truncatedBackPropThroughTimeLength_ = 50;
nbEpochs_ = 100;
learningRate_ = 0.04; // 0.1 original // best 0.05 3epochs
generateSamplesEveryNMinibatches_ = 200;
generationInitialization_ = "X";
seed_ = seed;
random_ = new Random(seed);
output_ = null;
trainingSetIterator_ = new ABCIterator(trainingSet, Charset.forName("ASCII"), batchSize_, random_);
charToInt_ = trainingSetIterator_.getCharToInt();
intToChar_ = trainingSetIterator_.getIntToChar();
exampleLength_ = trainingSetIterator_.getExampleLength();
int nOut = trainingSetIterator_.totalOutcomes();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.learningRate(learningRate_)
.rmsDecay(0.95) // 0.95 original
.seed(seed_)
.regularization(true) // true original
.l2(0.001)
.weightInit(WeightInit.XAVIER)
.updater(Updater.RMSPROP)
.list()
.layer(0, new GravesLSTM.Builder().nIn(trainingSetIterator_.inputColumns()).nOut(lstmLayerSize_)
.activation("tanh").build())
.layer(1, new GravesLSTM.Builder().nIn(lstmLayerSize_).nOut(lstmLayerSize_)
.activation("tanh").build())
.layer(2, new GravesLSTM.Builder().nIn(lstmLayerSize_).nOut(lstmLayerSize_)
.activation("tanh").build())
.layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation("softmax")
.nIn(lstmLayerSize_).nOut(nOut).build())
.backpropType(BackpropType.TruncatedBPTT)
.tBPTTForwardLength(truncatedBackPropThroughTimeLength_)
.tBPTTBackwardLength(truncatedBackPropThroughTimeLength_)
.pretrain(false).backprop(true)
.build();
lstmNet_ = new MultiLayerNetwork(conf);
lstmNet_.init();
//lstmNet_.setListeners(new ScoreIterationListener(1));
//lstmNet_.setListeners(new HistogramIterationListener(1));
UIServer uiServer = UIServer.getInstance();
StatsStorage statsStorage = new InMemoryStatsStorage();
uiServer.attach(statsStorage);
lstmNet_.setListeners(new StatsListener(statsStorage));
if (ExecutionParameters.verbose) {
Layer[] layers = lstmNet_.getLayers();
int totalNumParams = 0;
for (int i = 0; i < layers.length; i++) {
int nParams = layers[i].numParams();
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
}
}
示例12: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
/**
* args[0] input: word2vecファイル名
* args[1] input: train/test親フォルダ名
* args[2] output: 出力ディレクトリ名
*
* @param args
* @throws Exception
*/
public static void main (final String[] args) throws Exception {
if (args[0]==null || args[1]==null || args[2]==null)
System.exit(1);
WordVectors wvec = WordVectorSerializer.loadTxtVectors(new File(args[0]));
int numInputs = wvec.lookupTable().layerSize();
int numOutputs = 2; // FIXME positive or negative
int batchSize = 16;//100;
int testBatch = 64;
int nEpochs = 5000;
int thresEpochs = 10;
double minImprovement = 1e-5;
int listenfreq = 10;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(7485)
//.updater(Updater.RMSPROP)
.updater(Updater.ADADELTA)
//.learningRate(0.001) //RMSPROP
//.rmsDecay(0.90) //RMSPROP
.rho(0.95) //ADADELTA
.epsilon(1e-5) //1e-8 //ALL
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(1.0)
//.regularization(true)
//.l2(1e-5)
.list()
.layer(0, new GravesLSTM.Builder()
.nIn(numInputs).nOut(numInputs)
.activation("softsign")
.build())
.layer(1, new RnnOutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation("softmax")
.nIn(numInputs).nOut(numOutputs)
.build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.setListeners(new ScoreIterationListener(listenfreq));
//model.setListeners(new HistogramIterationListener(listenfreq)); //FIXME error occur
LOG.info("Starting training");
DataSetIterator train = new AsyncDataSetIterator(
new SentimentRecurrentIterator(args[1],wvec,batchSize,300,true),2);
DataSetIterator test = new AsyncDataSetIterator(
new SentimentRecurrentIterator(args[1],wvec,testBatch,300,false),2);
EarlyStoppingModelSaver<MultiLayerNetwork> saver = new LocalFileModelSaver(args[2]);//new InMemoryModelSaver<>();
EarlyStoppingConfiguration<MultiLayerNetwork> esConf = new EarlyStoppingConfiguration.Builder<MultiLayerNetwork>()
.epochTerminationConditions(
new MaxEpochsTerminationCondition(nEpochs),
new ScoreImprovementEpochTerminationCondition(thresEpochs,minImprovement))
.scoreCalculator(new DataSetLossCalculator(test, true))
.modelSaver(saver)
.build();
IEarlyStoppingTrainer<MultiLayerNetwork> trainer = new EarlyStoppingTrainer(esConf,model,train);
EarlyStoppingResult<MultiLayerNetwork> result = trainer.fit();
LOG.info("Termination reason: " + result.getTerminationReason());
LOG.info("Termination details: " + result.getTerminationDetails());
LOG.info("Total epochs: " + result.getTotalEpochs());
LOG.info("Best epoch number: " + result.getBestModelEpoch());
LOG.info("Score at best epoch: " + result.getBestModelScore());
//LOG.info("Save model");
//MultiLayerNetwork best = result.getBestModel();
//ModelSerializer.writeModel(best, new FileOutputStream(args[2]+"/sentiment.rnn.es.model"), true);
}
示例13: main
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
/**
* args[0] input: word2vecファイル名
* args[1] input: train/test親フォルダ名
* args[2] output: 学習モデル名
*
* @param args
* @throws Exception
*/
public static void main (final String[] args) throws Exception {
if (args[0]==null || args[1]==null || args[2]==null)
System.exit(1);
WordVectors wvec = WordVectorSerializer.loadTxtVectors(new File(args[0]));
int numInputs = wvec.lookupTable().layerSize();
int numOutputs = 2; // FIXME positive or negative
int batchSize = 16;//100;
int testBatch = 64;
int nEpochs = 5000;
int listenfreq = 10;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(7485)
.updater(Updater.RMSPROP) //ADADELTA
.learningRate(0.001) //RMSPROP
.rmsDecay(0.90) //RMSPROP
//.rho(0.95) //ADADELTA
.epsilon(1e-8) //ALL
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
.gradientNormalizationThreshold(1.0)
//.regularization(true)
//.l2(1e-5)
.list()
.layer(0, new GravesLSTM.Builder()
.nIn(numInputs).nOut(numInputs)
.activation("softsign")
.build())
.layer(1, new RnnOutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation("softmax")
.nIn(numInputs).nOut(numOutputs)
.build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
model.setListeners(new ScoreIterationListener(listenfreq));
LOG.info("Starting training");
DataSetIterator train = new AsyncDataSetIterator(
new SentimentRecurrentIterator(args[1],wvec,batchSize,300,true),2);
DataSetIterator test = new AsyncDataSetIterator(
new SentimentRecurrentIterator(args[1],wvec,testBatch,300,false),2);
for( int i=0; i<nEpochs; i++ ){
model.fit(train);
train.reset();
LOG.info("Epoch " + i + " complete. Starting evaluation:");
Evaluation evaluation = new Evaluation();
while(test.hasNext()) {
DataSet t = test.next();
INDArray features = t.getFeatures();
INDArray lables = t.getLabels();
INDArray inMask = t.getFeaturesMaskArray();
INDArray outMask = t.getLabelsMaskArray();
INDArray predicted = model.output(features,false,inMask,outMask);
evaluation.evalTimeSeries(lables,predicted,outMask);
}
test.reset();
LOG.info(evaluation.stats());
LOG.info("Save model");
ModelSerializer.writeModel(model, new FileOutputStream(args[2]), true);
}
}
示例14: testRWInit
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
@Test
public void testRWInit() {
for (boolean rwInit : new boolean[]{false, true}) {
for (int i = 0; i < 3; i++) {
NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder()
.weightInit(new UniformDistribution(0, 1))
.list();
if(rwInit) {
switch (i) {
case 0:
b.layer(new LSTM.Builder().nIn(10).nOut(10)
.weightInitRecurrent(new UniformDistribution(2, 3))
.build());
break;
case 1:
b.layer(new GravesLSTM.Builder().nIn(10).nOut(10)
.weightInitRecurrent(new UniformDistribution(2, 3))
.build());
break;
case 2:
b.layer(new SimpleRnn.Builder().nIn(10).nOut(10)
.weightInitRecurrent(new UniformDistribution(2, 3)).build());
break;
default:
throw new RuntimeException();
}
} else {
switch (i) {
case 0:
b.layer(new LSTM.Builder().nIn(10).nOut(10).build());
break;
case 1:
b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build());
break;
case 2:
b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build());
break;
default:
throw new RuntimeException();
}
}
MultiLayerNetwork net = new MultiLayerNetwork(b.build());
net.init();
INDArray rw = net.getParam("0_RW");
double min = rw.minNumber().doubleValue();
double max = rw.maxNumber().doubleValue();
if(rwInit){
assertTrue(String.valueOf(min), min >= 2.0);
assertTrue(String.valueOf(max), max <= 3.0);
} else {
assertTrue(String.valueOf(min), min >= 0.0);
assertTrue(String.valueOf(max), max <= 1.0);
}
}
}
}
示例15: testSerialization
import org.deeplearning4j.nn.conf.layers.GravesLSTM; //导入依赖的package包/类
@Test
public void testSerialization() throws Exception {
for(WorkspaceMode wsm : WorkspaceMode.values()) {
log.info("*** Starting workspace mode: " + wsm);
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
.activation(Activation.TANH)
.weightInit(WeightInit.XAVIER)
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.updater(new Adam())
.list()
.layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
.layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
.layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
.nIn(10).nOut(10).build())
.build();
MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
net1.init();
INDArray in = Nd4j.rand(new int[]{3, 10, 5});
INDArray labels = Nd4j.rand(new int[]{3, 10, 5});
net1.fit(in, labels);
byte[] bytes;
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ModelSerializer.writeModel(net1, baos, true);
bytes = baos.toByteArray();
}
MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(new ByteArrayInputStream(bytes), true);
in = Nd4j.rand(new int[]{3, 10, 5});
labels = Nd4j.rand(new int[]{3, 10, 5});
INDArray out1 = net1.output(in);
INDArray out2 = net2.output(in);
assertEquals(out1, out2);
net1.setInput(in);
net2.setInput(in);
net1.setLabels(labels);
net2.setLabels(labels);
net1.computeGradientAndScore();
net2.computeGradientAndScore();
assertEquals(net1.score(), net2.score(), 1e-6);
assertEquals(net1.gradient().gradient(), net2.gradient().gradient());
}
}