本文整理汇总了Java中org.deeplearning4j.nn.conf.NeuralNetConfiguration.addVariable方法的典型用法代码示例。如果您正苦于以下问题:Java NeuralNetConfiguration.addVariable方法的具体用法?Java NeuralNetConfiguration.addVariable怎么用?Java NeuralNetConfiguration.addVariable使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.deeplearning4j.nn.conf.NeuralNetConfiguration
的用法示例。
在下文中一共展示了NeuralNetConfiguration.addVariable方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);
org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
(org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer();
int nIn = layerConf.getNIn();
int nOut = layerConf.getNOut();
int nWeightParams = nIn * nOut;
INDArray visibleBiasView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams));
conf.addVariable(VISIBLE_BIAS_KEY);
return params;
}
示例2: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer();
if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer();
int depthWiseParams = numDepthWiseParams(layerConf);
int biasParams = numBiasParams(layerConf);
INDArray depthWiseWeightView = paramsView.get(
NDArrayIndex.point(0), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));
INDArray pointWiseWeightView = paramsView.get(
NDArrayIndex.point(0), NDArrayIndex.interval(biasParams + depthWiseParams, numParams(conf)));
params.put(DEPTH_WISE_WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
conf.addVariable(DEPTH_WISE_WEIGHT_KEY);
params.put(POINT_WISE_WEIGHT_KEY, createPointWiseWeightMatrix(conf, pointWiseWeightView, initializeParams));
conf.addVariable(POINT_WISE_WEIGHT_KEY);
if(layer.hasBias()){
INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, biasParams));
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
conf.addVariable(BIAS_KEY);
}
return params;
}
示例3: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
SimpleRnn c = (SimpleRnn)conf.getLayer();
int nIn = c.getNIn();
int nOut = c.getNOut();
Map<String,INDArray> m;
if (initializeParams) {
Distribution dist = Distributions.createDistribution(c.getDist());
m = getSubsets(paramsView, nIn, nOut, false);
INDArray w = WeightInitUtil.initWeights(nIn, nOut, new int[]{nIn, nOut}, c.getWeightInit(), dist, 'f', m.get(WEIGHT_KEY));
m.put(WEIGHT_KEY, w);
WeightInit rwInit;
Distribution rwDist;
if (c.getWeightInitRecurrent() != null) {
rwInit = c.getWeightInitRecurrent();
rwDist = Distributions.createDistribution(c.getDistRecurrent());
} else {
rwInit = c.getWeightInit();
rwDist = dist;
}
INDArray rw = WeightInitUtil.initWeights(nOut, nOut, new int[]{nOut, nOut}, rwInit, rwDist, 'f', m.get(RECURRENT_WEIGHT_KEY));
m.put(RECURRENT_WEIGHT_KEY, rw);
} else {
m = getSubsets(paramsView, nIn, nOut, true);
}
conf.addVariable(WEIGHT_KEY);
conf.addVariable(RECURRENT_WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
return m;
}
示例4: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf =
(org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer();
int nIn = layerConf.getNIn();
int nOut = layerConf.getNOut(); // also equal to numClasses
int wEndOffset = nIn * nOut;
int bEndOffset = wEndOffset + nOut;
int cEndOffset = bEndOffset + nIn * nOut;
INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, wEndOffset));
INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(wEndOffset, bEndOffset));
INDArray centerLossView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bEndOffset, cEndOffset))
.reshape('c', nOut, nIn);
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
params.put(CENTER_KEY, createCenterLossMatrix(conf, centerLossView, initializeParams));
conf.addVariable(WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
conf.addVariable(CENTER_KEY);
return params;
}
示例5: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
/**
* Initialize the parameters
*
* @param conf the configuration
* @param paramsView a view of the full network (backprop) parameters
* @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the
* values in the paramsView array (but do select out the appropriate subset, reshape etc as required)
* @return Map of parameters keyed by type (view of the 'paramsView' array)
*/
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
int length = numParams(conf);
if (paramsView.length() != length)
throw new IllegalStateException(
"Expected params view of length " + length + ", got length " + paramsView.length());
org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
int nIn = layerConf.getNIn();
int nWeightParams = nIn ;
INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));
INDArray biasView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nWeightParams, nWeightParams + nIn));
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
conf.addVariable(WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
return params;
}
示例6: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
int length = numParams(conf);
if (paramsView.length() != length)
throw new IllegalStateException(
"Expected params view of length " + length + ", got length " + paramsView.length());
org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
int nIn = layerConf.getNIn();
int nOut = layerConf.getNOut();
int nWeightParams = nIn * nOut;
INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
conf.addVariable(WEIGHT_KEY);
if(hasBias(layerConf)){
INDArray biasView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nWeightParams, nWeightParams + nOut));
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
conf.addVariable(BIAS_KEY);
}
return params;
}
示例7: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
// TODO setup for RNN
BatchNormalization layer = (BatchNormalization) conf.getLayer();
int nOut = layer.getNOut();
int meanOffset = 0;
if (!layer.isLockGammaBeta()) { //No gamma/beta parameters when gamma/beta are locked
INDArray gammaView = paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut));
INDArray betaView = paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, 2 * nOut));
params.put(GAMMA, createGamma(conf, gammaView, initializeParams));
conf.addVariable(GAMMA);
params.put(BETA, createBeta(conf, betaView, initializeParams));
conf.addVariable(BETA);
meanOffset = 2 * nOut;
}
INDArray globalMeanView =
paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(meanOffset, meanOffset + nOut));
INDArray globalVarView = paramView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(meanOffset + nOut, meanOffset + 2 * nOut));
if (initializeParams) {
globalMeanView.assign(0);
globalVarView.assign(1);
}
params.put(GLOBAL_MEAN, globalMeanView);
conf.addVariable(GLOBAL_MEAN);
params.put(GLOBAL_VAR, globalVarView);
conf.addVariable(GLOBAL_VAR);
return params;
}
示例8: testSphereFnMultipleStepsHelper
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];
for (int i = 0; i <= nOptIter; i++) {
Random rng = new DefaultRandom(12345L);
org.nd4j.linalg.api.rng.distribution.Distribution dist =
new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1))
.layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Model m = new SphereFunctionModel(100, dist, conf);
if (i == 0) {
m.computeGradientAndScore();
scores[0] = m.score(); //Before optimization
} else {
ConvexOptimizer opt = getOptimizer(oa, conf, m);
for( int j=0; j<100; j++ ) {
opt.optimize();
}
m.computeGradientAndScore();
scores[i] = m.score();
assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
}
}
if (PRINT_OPT_RESULTS) {
System.out.println("Multiple optimization iterations (" + nOptIter
+ " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
+ oa);
System.out.println(Arrays.toString(scores));
}
for (int i = 1; i < scores.length; i++) {
assertTrue(scores[i] <= scores[i - 1]);
}
assertTrue(scores[scores.length - 1] < 1.0); //Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter
}
示例9: testRastriginFnMultipleStepsHelper
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];
for (int i = 0; i <= nOptIter; i++) {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false)
.updater(new AdaGrad(1e-2))
.layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Model m = new RastriginFunctionModel(10, conf);
int nParams = m.numParams();
if (i == 0) {
m.computeGradientAndScore();
scores[0] = m.score(); //Before optimization
} else {
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true);
opt.optimize();
m.computeGradientAndScore();
scores[i] = m.score();
assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
}
}
if (PRINT_OPT_RESULTS) {
System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter
+ " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
+ oa);
System.out.println(Arrays.toString(scores));
}
for (int i = 1; i < scores.length; i++) {
if (i == 1) {
assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement
} else {
assertTrue(scores[i] <= scores[i - 1]);
}
}
}
示例10: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf =
(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
double forgetGateInit = layerConf.getForgetGateBiasInit();
Distribution dist = Distributions.createDistribution(layerConf.getDist());
int nL = layerConf.getNOut(); //i.e., n neurons in this layer
int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer
conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
conf.addVariable(BIAS_KEY_FORWARDS);
conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
conf.addVariable(BIAS_KEY_BACKWARDS);
int nParamsInput = nLast * (4 * nL);
int nParamsRecurrent = nL * (4 * nL + 3);
int nBias = 4 * nL;
int rwFOffset = nParamsInput;
int bFOffset = rwFOffset + nParamsRecurrent;
int iwROffset = bFOffset + nBias;
int rwROffset = iwROffset + nParamsInput;
int bROffset = rwROffset + nParamsRecurrent;
INDArray iwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, rwFOffset));
INDArray rwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwFOffset, bFOffset));
INDArray bF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bFOffset, iwROffset));
INDArray iwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(iwROffset, rwROffset));
INDArray rwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwROffset, bROffset));
INDArray bR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bROffset, bROffset + nBias));
if (initializeParams) {
bF.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
Nd4j.ones(1, nL).muli(forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG
bR.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
Nd4j.ones(1, nL).muli(forgetGateInit));
}
/*The above line initializes the forget gate biases to specified value.
* See Sutskever PhD thesis, pg19:
* "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
* which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
* not done, it will be harder to learn long range dependencies because the smaller values of the forget
* gates will create a vanishing gradients problem."
* http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
*/
if (initializeParams) {
//As per standard LSTM
int fanIn = nL;
int fanOut = nLast + nL;
int[] inputWShape = new int[] {nLast, 4 * nL};
int[] recurrentWShape = new int[] {nL, 4 * nL + 3};
params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
layerConf.getWeightInit(), dist, iwF));
params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
layerConf.getWeightInit(), dist, rwF));
params.put(BIAS_KEY_FORWARDS, bF);
params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
layerConf.getWeightInit(), dist, iwR));
params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
layerConf.getWeightInit(), dist, rwR));
params.put(BIAS_KEY_BACKWARDS, bR);
} else {
params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, iwF));
params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, rwF));
params.put(BIAS_KEY_FORWARDS, bF);
params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, iwR));
params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, rwR));
params.put(BIAS_KEY_BACKWARDS, bR);
}
return params;
}
示例11: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer();
double forgetGateInit = layerConf.getForgetGateBiasInit();
Distribution dist = Distributions.createDistribution(layerConf.getDist());
int nL = layerConf.getNOut(); //i.e., n neurons in this layer
int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer
conf.addVariable(INPUT_WEIGHT_KEY);
conf.addVariable(RECURRENT_WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
int length = numParams(conf);
if (paramsView.length() != length)
throw new IllegalStateException(
"Expected params view of length " + length + ", got length " + paramsView.length());
int nParamsIn = nLast * (4 * nL);
int nParamsRecurrent = nL * (4 * nL);
int nBias = 4 * nL;
INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
INDArray biasView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));
if (initializeParams) {
int fanIn = nL;
int fanOut = nLast + nL;
int[] inputWShape = new int[] {nLast, 4 * nL};
int[] recurrentWShape = new int[] {nL, 4 * nL};
WeightInit rwInit;
Distribution rwDist;
if(layerConf.getWeightInitRecurrent() != null){
rwInit = layerConf.getWeightInitRecurrent();
rwDist = Distributions.createDistribution(layerConf.getDistRecurrent());
} else {
rwInit = layerConf.getWeightInit();
rwDist = dist;
}
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
layerConf.getWeightInit(), dist, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
rwInit, rwDist, recurrentWeightView));
biasView.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
Nd4j.valueArrayOf(1, nL, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
/*The above line initializes the forget gate biases to specified value.
* See Sutskever PhD thesis, pg19:
* "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
* which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
* not done, it will be harder to learn long range dependencies because the smaller values of the forget
* gates will create a vanishing gradients problem."
* http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
*/
params.put(BIAS_KEY, biasView);
} else {
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY,
WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL}, recurrentWeightView));
params.put(BIAS_KEY, biasView);
}
return params;
}
示例12: init
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf =
(org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
double forgetGateInit = layerConf.getForgetGateBiasInit();
Distribution dist = Distributions.createDistribution(layerConf.getDist());
int nL = layerConf.getNOut(); //i.e., n neurons in this layer
int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer
conf.addVariable(INPUT_WEIGHT_KEY);
conf.addVariable(RECURRENT_WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
int length = numParams(conf);
if (paramsView.length() != length)
throw new IllegalStateException(
"Expected params view of length " + length + ", got length " + paramsView.length());
int nParamsIn = nLast * (4 * nL);
int nParamsRecurrent = nL * (4 * nL + 3);
int nBias = 4 * nL;
INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
INDArray biasView = paramsView.get(NDArrayIndex.point(0),
NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));
if (initializeParams) {
int fanIn = nL;
int fanOut = nLast + nL;
int[] inputWShape = new int[] {nLast, 4 * nL};
int[] recurrentWShape = new int[] {nL, 4 * nL + 3};
WeightInit rwInit;
Distribution rwDist;
if(layerConf.getWeightInitRecurrent() != null){
rwInit = layerConf.getWeightInitRecurrent();
rwDist = Distributions.createDistribution(layerConf.getDistRecurrent());
} else {
rwInit = layerConf.getWeightInit();
rwDist = dist;
}
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
layerConf.getWeightInit(), dist, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
rwInit, rwDist, recurrentWeightView));
biasView.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
Nd4j.valueArrayOf(1, nL, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
/*The above line initializes the forget gate biases to specified value.
* See Sutskever PhD thesis, pg19:
* "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
* which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
* not done, it will be harder to learn long range dependencies because the smaller values of the forget
* gates will create a vanishing gradients problem."
* http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
*/
params.put(BIAS_KEY, biasView);
} else {
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY,
WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, recurrentWeightView));
params.put(BIAS_KEY, biasView);
}
return params;
}
示例13: testSphereFnOptHelper
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) {
if (PRINT_OPT_RESULTS)
System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= "
+ nDimensions);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter)
.updater(new Sgd(1e-2))
.layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Random rng = new DefaultRandom(12345L);
org.nd4j.linalg.api.rng.distribution.Distribution dist =
new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
Model m = new SphereFunctionModel(nDimensions, dist, conf);
m.computeGradientAndScore();
double scoreBefore = m.score();
assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore));
if (PRINT_OPT_RESULTS) {
System.out.println("Before:");
System.out.println(scoreBefore);
System.out.println(m.params());
}
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.setupSearchState(m.gradientAndScore());
for( int i=0; i<100; i++ ) {
opt.optimize();
}
m.computeGradientAndScore();
double scoreAfter = m.score();
assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter));
if (PRINT_OPT_RESULTS) {
System.out.println("After:");
System.out.println(scoreAfter);
System.out.println(m.params());
}
//Expected behaviour after optimization:
//(a) score is better (lower) after optimization.
//(b) Parameters are closer to minimum after optimization (TODO)
assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")",
scoreAfter < scoreBefore);
}
示例14: testRosenbrockFnMultipleStepsHelper
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];
for (int i = 0; i <= nOptIter; i++) {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
.maxNumLineSearchIterations(maxNumLineSearchIter)
.updater(new Sgd(1e-1))
.stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction())
.layer(new DenseLayer.Builder().nIn(1).nOut(1).build())
.build();
conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here
Model m = new RosenbrockFunctionModel(100, conf);
if (i == 0) {
m.computeGradientAndScore();
scores[0] = m.score(); //Before optimization
} else {
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.optimize();
m.computeGradientAndScore();
scores[i] = m.score();
assertTrue("NaN or infinite score: " + scores[i],
!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
}
}
if (PRINT_OPT_RESULTS) {
System.out.println("Rosenbrock: Multiple optimization iterations ( " + nOptIter
+ " opt. iter.) score vs iteration, maxNumLineSearchIter= " + maxNumLineSearchIter + ": "
+ oa);
System.out.println(Arrays.toString(scores));
}
for (int i = 1; i < scores.length; i++) {
if (i == 1) {
assertTrue(scores[i] < scores[i - 1]); //Require at least one step of improvement
} else {
assertTrue(scores[i] <= scores[i - 1]);
}
}
}