当前位置: 首页>>代码示例>>Java>>正文


Java NeuralNetConfiguration.addVariable方法代码示例

本文整理汇总了Java中org.deeplearning4j.nn.conf.NeuralNetConfiguration.addVariable方法的典型用法代码示例。如果您正苦于以下问题:Java NeuralNetConfiguration.addVariable方法的具体用法?Java NeuralNetConfiguration.addVariable怎么用?Java NeuralNetConfiguration.addVariable使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.deeplearning4j.nn.conf.NeuralNetConfiguration的用法示例。


在下文中一共展示了NeuralNetConfiguration.addVariable方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);

    org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
                    (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();
    int nWeightParams = nIn * nOut;

    INDArray visibleBiasView = paramsView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
    params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams));
    conf.addVariable(VISIBLE_BIAS_KEY);

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:18,代码来源:PretrainParamInitializer.java

示例2: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer();
    if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer();

    int depthWiseParams = numDepthWiseParams(layerConf);
    int biasParams = numBiasParams(layerConf);

    INDArray depthWiseWeightView = paramsView.get(
            NDArrayIndex.point(0), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));
    INDArray pointWiseWeightView = paramsView.get(
            NDArrayIndex.point(0), NDArrayIndex.interval(biasParams + depthWiseParams, numParams(conf)));

    params.put(DEPTH_WISE_WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
    conf.addVariable(DEPTH_WISE_WEIGHT_KEY);
    params.put(POINT_WISE_WEIGHT_KEY, createPointWiseWeightMatrix(conf, pointWiseWeightView, initializeParams));
    conf.addVariable(POINT_WISE_WEIGHT_KEY);

    if(layer.hasBias()){
        INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, biasParams));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:30,代码来源:SeparableConvolutionParamInitializer.java

示例3: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SimpleRnn c = (SimpleRnn)conf.getLayer();
    int nIn = c.getNIn();
    int nOut = c.getNOut();

    Map<String,INDArray> m;

    if (initializeParams) {
        Distribution dist = Distributions.createDistribution(c.getDist());

        m = getSubsets(paramsView, nIn, nOut, false);
        INDArray w = WeightInitUtil.initWeights(nIn, nOut, new int[]{nIn, nOut}, c.getWeightInit(), dist, 'f', m.get(WEIGHT_KEY));
        m.put(WEIGHT_KEY, w);

        WeightInit rwInit;
        Distribution rwDist;
        if (c.getWeightInitRecurrent() != null) {
            rwInit = c.getWeightInitRecurrent();
            rwDist = Distributions.createDistribution(c.getDistRecurrent());
        } else {
            rwInit = c.getWeightInit();
            rwDist = dist;
        }

        INDArray rw = WeightInitUtil.initWeights(nOut, nOut, new int[]{nOut, nOut}, rwInit, rwDist, 'f', m.get(RECURRENT_WEIGHT_KEY));
        m.put(RECURRENT_WEIGHT_KEY, rw);
    } else {
        m = getSubsets(paramsView, nIn, nOut, true);
    }

    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    return m;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:38,代码来源:SimpleRnnParamInitializer.java

示例4: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer();

    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut(); // also equal to numClasses

    int wEndOffset = nIn * nOut;
    int bEndOffset = wEndOffset + nOut;
    int cEndOffset = bEndOffset + nIn * nOut;

    INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, wEndOffset));
    INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(wEndOffset, bEndOffset));
    INDArray centerLossView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bEndOffset, cEndOffset))
                    .reshape('c', nOut, nIn);

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    params.put(CENTER_KEY, createCenterLossMatrix(conf, centerLossView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    conf.addVariable(CENTER_KEY);

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:29,代码来源:CenterLossParamInitializer.java

示例5: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
/**
 * Initialize the parameters
 *
 * @param conf             the configuration
 * @param paramsView       a view of the full network (backprop) parameters
 * @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the
 *                         values in the paramsView array (but do select out the appropriate subset, reshape etc as required)
 * @return Map of parameters keyed by type (view of the 'paramsView' array)
 */
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    int length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
            (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    int nIn = layerConf.getNIn();

    int nWeightParams = nIn ;
    INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));
    INDArray biasView = paramsView.get(NDArrayIndex.point(0),
            NDArrayIndex.interval(nWeightParams, nWeightParams + nIn));


    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:39,代码来源:ElementWiseParamInitializer.java

示例6: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    int length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    int nWeightParams = nIn * nOut;
    INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);


    if(hasBias(layerConf)){
        INDArray biasView = paramsView.get(NDArrayIndex.point(0),
                NDArrayIndex.interval(nWeightParams, nWeightParams + nOut));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:34,代码来源:DefaultParamInitializer.java

示例7: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    // TODO setup for RNN
    BatchNormalization layer = (BatchNormalization) conf.getLayer();
    int nOut = layer.getNOut();

    int meanOffset = 0;
    if (!layer.isLockGammaBeta()) { //No gamma/beta parameters when gamma/beta are locked
        INDArray gammaView = paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut));
        INDArray betaView = paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, 2 * nOut));

        params.put(GAMMA, createGamma(conf, gammaView, initializeParams));
        conf.addVariable(GAMMA);
        params.put(BETA, createBeta(conf, betaView, initializeParams));
        conf.addVariable(BETA);

        meanOffset = 2 * nOut;
    }

    INDArray globalMeanView =
                    paramView.get(NDArrayIndex.point(0), NDArrayIndex.interval(meanOffset, meanOffset + nOut));
    INDArray globalVarView = paramView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(meanOffset + nOut, meanOffset + 2 * nOut));

    if (initializeParams) {
        globalMeanView.assign(0);
        globalVarView.assign(1);
    }

    params.put(GLOBAL_MEAN, globalMeanView);
    conf.addVariable(GLOBAL_MEAN);
    params.put(GLOBAL_VAR, globalVarView);
    conf.addVariable(GLOBAL_VAR);

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:38,代码来源:BatchNormalizationParamInitializer.java

示例8: testSphereFnMultipleStepsHelper

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new SphereFunctionModel(100, dist, conf);
        if (i == 0) {
            m.computeGradientAndScore();
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            for( int j=0; j<100; j++ ) {
                opt.optimize();
            }
            m.computeGradientAndScore();
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }

    for (int i = 1; i < scores.length; i++) {
        assertTrue(scores[i] <= scores[i - 1]);
    }
    assertTrue(scores[scores.length - 1] < 1.0); //Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:41,代码来源:TestOptimizers.java

示例9: testRastriginFnMultipleStepsHelper

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false)
                        .updater(new AdaGrad(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RastriginFunctionModel(10, conf);
        int nParams = m.numParams();
        if (i == 0) {
            m.computeGradientAndScore();
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true);
            opt.optimize();
            m.computeGradientAndScore();
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:41,代码来源:TestOptimizers.java

示例10: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf =
                    (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    Distribution dist = Distributions.createDistribution(layerConf.getDist());

    int nL = layerConf.getNOut(); //i.e., n neurons in this layer
    int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(BIAS_KEY_FORWARDS);
    conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(BIAS_KEY_BACKWARDS);

    int nParamsInput = nLast * (4 * nL);
    int nParamsRecurrent = nL * (4 * nL + 3);
    int nBias = 4 * nL;

    int rwFOffset = nParamsInput;
    int bFOffset = rwFOffset + nParamsRecurrent;
    int iwROffset = bFOffset + nBias;
    int rwROffset = iwROffset + nParamsInput;
    int bROffset = rwROffset + nParamsRecurrent;

    INDArray iwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, rwFOffset));
    INDArray rwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwFOffset, bFOffset));
    INDArray bF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bFOffset, iwROffset));
    INDArray iwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(iwROffset, rwROffset));
    INDArray rwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwROffset, bROffset));
    INDArray bR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bROffset, bROffset + nBias));

    if (initializeParams) {
        bF.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.ones(1, nL).muli(forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG
        bR.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.ones(1, nL).muli(forgetGateInit));
    }
    /*The above line initializes the forget gate biases to specified value.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */

    if (initializeParams) {
        //As per standard LSTM
        int fanIn = nL;
        int fanOut = nLast + nL;
        int[] inputWShape = new int[] {nLast, 4 * nL};
        int[] recurrentWShape = new int[] {nL, 4 * nL + 3};

        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
                        layerConf.getWeightInit(), dist, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
                        layerConf.getWeightInit(), dist, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
                        layerConf.getWeightInit(), dist, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
                        layerConf.getWeightInit(), dist, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    } else {
        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    }

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:81,代码来源:GravesBidirectionalLSTMParamInitializer.java

示例11: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    Distribution dist = Distributions.createDistribution(layerConf.getDist());

    int nL = layerConf.getNOut(); //i.e., n neurons in this layer
    int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    int length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    int nParamsIn = nLast * (4 * nL);
    int nParamsRecurrent = nL * (4 * nL);
    int nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        int fanIn = nL;
        int fanOut = nLast + nL;
        int[] inputWShape = new int[] {nLast, 4 * nL};
        int[] recurrentWShape = new int[] {nL, 4 * nL};

        WeightInit rwInit;
        Distribution rwDist;
        if(layerConf.getWeightInitRecurrent() != null){
            rwInit = layerConf.getWeightInitRecurrent();
            rwDist = Distributions.createDistribution(layerConf.getDistRecurrent());
        } else {
            rwInit = layerConf.getWeightInit();
            rwDist = dist;
        }

        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
                        layerConf.getWeightInit(), dist, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
                        rwInit, rwDist, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(1, nL, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:70,代码来源:LSTMParamInitializer.java

示例12: init

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf =
                    (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    Distribution dist = Distributions.createDistribution(layerConf.getDist());

    int nL = layerConf.getNOut(); //i.e., n neurons in this layer
    int nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    int length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    int nParamsIn = nLast * (4 * nL);
    int nParamsRecurrent = nL * (4 * nL + 3);
    int nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.point(0),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        int fanIn = nL;
        int fanOut = nLast + nL;
        int[] inputWShape = new int[] {nLast, 4 * nL};
        int[] recurrentWShape = new int[] {nL, 4 * nL + 3};

        WeightInit rwInit;
        Distribution rwDist;
        if(layerConf.getWeightInitRecurrent() != null){
            rwInit = layerConf.getWeightInitRecurrent();
            rwDist = Distributions.createDistribution(layerConf.getDistRecurrent());
        } else {
            rwInit = layerConf.getWeightInit();
            rwDist = dist;
        }

        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape,
                        layerConf.getWeightInit(), dist, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape,
                        rwInit, rwDist, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(1, nL, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new int[] {nL, 4 * nL + 3}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:71,代码来源:GravesLSTMParamInitializer.java

示例13: testSphereFnOptHelper

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) {

        if (PRINT_OPT_RESULTS)
            System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= "
                            + nDimensions);

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter)
                        .updater(new Sgd(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        Model m = new SphereFunctionModel(nDimensions, dist, conf);
        m.computeGradientAndScore();
        double scoreBefore = m.score();
        assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore));
        if (PRINT_OPT_RESULTS) {
            System.out.println("Before:");
            System.out.println(scoreBefore);
            System.out.println(m.params());
        }

        ConvexOptimizer opt = getOptimizer(oa, conf, m);

        opt.setupSearchState(m.gradientAndScore());
        for( int i=0; i<100; i++ ) {
            opt.optimize();
        }
        m.computeGradientAndScore();
        double scoreAfter = m.score();

        assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter));
        if (PRINT_OPT_RESULTS) {
            System.out.println("After:");
            System.out.println(scoreAfter);
            System.out.println(m.params());
        }

        //Expected behaviour after optimization:
        //(a) score is better (lower) after optimization.
        //(b) Parameters are closer to minimum after optimization (TODO)
        assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")",
                        scoreAfter < scoreBefore);
    }
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:47,代码来源:TestOptimizers.java

示例14: testRosenbrockFnMultipleStepsHelper

import org.deeplearning4j.nn.conf.NeuralNetConfiguration; //导入方法依赖的package包/类
private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter)
                        .updater(new Sgd(1e-1))
                        .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction())
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build())
                        .build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RosenbrockFunctionModel(100, conf);
        if (i == 0) {
            m.computeGradientAndScore();
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.optimize();
            m.computeGradientAndScore();
            scores[i] = m.score();
            assertTrue("NaN or infinite score: " + scores[i],
                            !Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rosenbrock: Multiple optimization iterations ( " + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter= " + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] < scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:42,代码来源:TestOptimizers.java


注:本文中的org.deeplearning4j.nn.conf.NeuralNetConfiguration.addVariable方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。