当前位置: 首页>>代码示例>>Python>>正文


Python GradientDescent.add_updates方法代码示例

本文整理汇总了Python中blocks.algorithms.GradientDescent.add_updates方法的典型用法代码示例。如果您正苦于以下问题:Python GradientDescent.add_updates方法的具体用法?Python GradientDescent.add_updates怎么用?Python GradientDescent.add_updates使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在blocks.algorithms.GradientDescent的用法示例。


在下文中一共展示了GradientDescent.add_updates方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
开发者ID:anirudh9119,项目名称:discgen,代码行数:62,代码来源:train_celeba_vae.py

示例2: train_rnnrbm

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_rnnrbm(train, rnnrbm, epochs=1000, test=None, bokeh=True,
                 load_path=None):
    cdk = theano.shared(10)
    lr = theano.shared(float32(0.004))

    cost, v_sample = rnnrbm.cost(examples=x, mask=x_mask, k=cdk)

    error_rate = MismulitclassificationRate().apply(x, v_sample[-1], x_mask)
    error_rate.name = "error on note as a whole"
    mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask)
    mistake_rate.name = "single error within note"
    cost.name = 'rbm_cost'

    model = Model(cost)
    cg = ComputationGraph([cost])
    step_rule = CompositeRule(
        [RemoveNotFinite(), StepClipping(30.0), Adam(learning_rate=lr), StepClipping(6.0),
         RemoveNotFinite()])  # Scale(0.01)
    gradients = dict(equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample])))
    algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost,
                                params=cg.parameters)
    algorithm.add_updates(cg.updates)
    extensions = [
        SharedVariableModifier(parameter=cdk,
                               function=lambda n, v: rnnrbm_cdk[n] if rnnrbm_cdk.get(n) else v),
        SharedVariableModifier(parameter=lr,
                               function=lambda n, v: float32(0.78 * v) if n % (200 * 5) == 0 else v),
        FinishAfter(after_n_epochs=epochs),
        TrainingDataMonitoring(
            [cost, error_rate, mistake_rate, ],  # hidden_states, debug_val, param_nans,
            # aggregation.mean(algorithm.total_gradient_norm)],  #+ params,
            prefix="train",
            after_epoch=False, every_n_batches=40),
        Timing(),
        Printing(),
        ProgressBar()]
    if test is not None:
        extensions.append(DataStreamMonitoring(
            [cost, error_rate, mistake_rate],
            data_stream=test,
            updates=cg.updates,
            prefix="test", after_epoch=False, every_n_batches=40))
    if bokeh:
        extensions.append(Plot(
            'Training RNN-RBM',
            channels=[
                ['train_error on note as a whole', 'train_single error within note',
                 'test_error on note as a whole',
                 'test_single error within note'],
                ['train_final_cost'],
                # ['train_total_gradient_norm'],
            ]))

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train,
                         model=model,
                         extensions=extensions
                         )
    return main_loop
开发者ID:makistsantekidis,项目名称:rnnrbm,代码行数:61,代码来源:main.py

示例3: test_gradient_descent_finds_inputs_additional_updates

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
开发者ID:Beronx86,项目名称:blocks,代码行数:10,代码来源:test_algorithms.py

示例4: create_main_loop

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def create_main_loop(dataset, nvis, nhid, num_epochs, debug_level=0, lrate=1e-3):
    seed = 188229
    n_inference_steps = 6
    num_examples = dataset.num_examples
    batch_size = num_examples

    train_loop_stream = Flatten(
        DataStream.default_stream(
            dataset=dataset,
            iteration_scheme=SequentialScheme(dataset.num_examples, batch_size)  # Repeat(
            # , n_inference_steps)
            #            ShuffledScheme(dataset.num_examples, batch_size), n_inference_steps))
        ),
        which_sources=("features",),
    )

    model_brick = FivEM(
        nvis=nvis,
        nhid=nhid,
        epsilon=0.01,
        batch_size=batch_size,
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0),
        noise_scaling=1,
        debug=debug_level,
        lateral_x=False,
        lateral_h=False,
        n_inference_steps=n_inference_steps,
    )
    model_brick.initialize()

    x = tensor.matrix("features")

    cost = model_brick.cost(x)
    computation_graph = ComputationGraph([cost])
    model = Model(cost)
    # step_rule = Adam(learning_rate=2e-5, beta1=0.1, beta2=0.001, epsilon=1e-8,
    #                 decay_factor=(1 - 1e-8))
    step_rule = Momentum(learning_rate=lrate, momentum=0.95)
    # step_rule = AdaDelta()
    # step_rule = RMSProp(learning_rate=0.01)
    # step_rule = AdaGrad(learning_rate=1e-4)
    algorithm = GradientDescent(cost=cost, params=computation_graph.parameters, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        TrainingDataMonitoring([cost] + computation_graph.auxiliary_variables, after_batch=False, after_epoch=True),
        #                       every_n_epochs=1),
        Printing(after_epoch=True, after_batch=False),  # every_n_epochs=1,
        # Checkpoint(path="./fivem.zip",every_n_epochs=10,after_training=True)
    ]
    main_loop = MainLoop(model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=extensions)
    return main_loop
开发者ID:pombredanne,项目名称:neuroml,代码行数:57,代码来源:train_stdp_learner.py

示例5: run

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
开发者ID:anirudh9119,项目名称:discgen,代码行数:51,代码来源:train_celeba_classifier.py

示例6: main

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
    """Run the example.

    Parameters
    ----------
    num_epochs : int, optional
        Number of epochs for which to train.

    batch_normalized : bool, optional
        Batch-normalize the training graph. Defaults to `True`.

    alpha : float, optional
        Weight to apply to a new sample when calculating running
        averages for population statistics (1 - alpha weight is
        given to the existing average).

    """
    if batch_normalized:
        # Add an extra keyword argument that only BatchNormalizedMLP takes,
        # in order to speed things up at the cost of a bit of extra memory.
        mlp_class = BatchNormalizedMLP
        extra_kwargs = {'conserve_memory': False}
    else:
        mlp_class = MLP
        extra_kwargs = {}
    mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
                    [2, 5, 5, 5, 3],
                    weights_init=IsotropicGaussian(0.2),
                    biases_init=Constant(0.), **extra_kwargs)
    mlp.initialize()

    # Generate a dataset with 3 spiral arms, using 8000 examples for
    # training and 2000 for testing.
    dataset = Spiral(num_examples=10000, classes=3,
                     sources=['features', 'label'],
                     noise=0.05)
    train_stream = DataStream(dataset,
                              iteration_scheme=ShuffledScheme(examples=8000,
                                                              batch_size=20))
    test_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 examples=list(range(8000, 10000)),
                                 batch_size=2000))

    # Build a cost graph; this contains BatchNormalization bricks that will
    # by default run in inference mode.
    features = tensor.matrix('features')
    label = tensor.lvector('label')
    prediction = mlp.apply(features)
    cost = CategoricalCrossEntropy().apply(label, prediction)
    misclass = MisclassificationRate().apply(label, prediction)
    misclass.name = 'misclass'  # The default name for this is annoyingly long
    original_cg = ComputationGraph([cost, misclass])

    if batch_normalized:
        cg = apply_batch_normalization(original_cg)
        # Add updates for population parameters
        pop_updates = get_batch_normalization_updates(cg)
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
    else:
        cg = original_cg
        extra_updates = []

    algorithm = GradientDescent(step_rule=Adam(0.001),
                                cost=cg.outputs[0],
                                parameters=cg.parameters)
    algorithm.add_updates(extra_updates)

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train_stream,
                         # Use the original cost and misclass variables so
                         # that we monitor the (original) inference-mode graph.
                         extensions=[DataStreamMonitoring([cost, misclass],
                                                          train_stream,
                                                          prefix='train'),
                                     DataStreamMonitoring([cost, misclass],
                                                          test_stream,
                                                          prefix='test'),
                                     Printing(),
                                     FinishAfter(after_n_epochs=num_epochs)])
    main_loop.run()
    return main_loop
开发者ID:Beronx86,项目名称:blocks-examples,代码行数:85,代码来源:__init__.py

示例7: train

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]

#.........这里部分代码省略.........

    if weight_noise > 0:
        weights = VariableFilter(roles=[WEIGHT])(cg_train.variables)
        cg_train = apply_noise(cg_train, weights, weight_noise)
        cost_train = cg_train.outputs[0].copy(name='cost_train')

    model = Model(cost_train)

    learning_rate = float(learning_rate)
    clipping = StepClipping(threshold=np.cast[floatX](clipping))
    if algorithm == 'adam':
        adam = Adam(learning_rate=learning_rate)
        learning_rate = adam.learning_rate
        step_rule = CompositeRule([adam, clipping])
    elif algorithm == 'rms_prop':
        rms_prop = RMSProp(learning_rate=learning_rate)
        learning_rate = rms_prop.learning_rate
        step_rule = CompositeRule([clipping, rms_prop])
    elif algorithm == 'momentum':
        sgd_momentum = Momentum(learning_rate=learning_rate, momentum=momentum)
        learning_rate = sgd_momentum.learning_rate
        step_rule = CompositeRule([clipping, sgd_momentum])
    elif algorithm == 'sgd':
        sgd = Scale(learning_rate=learning_rate)
        learning_rate = sgd.learning_rate
        step_rule = CompositeRule([clipping, sgd])
    else:
        raise NotImplementedError
    algorithm = GradientDescent(step_rule=step_rule,
                                cost=cost_train,
                                parameters=cg_train.parameters)
                                # theano_func_kwargs={"mode": theano.compile.MonitorMode(post_func=detect_nan)})

    algorithm.add_updates(init_updates)

    def cond_number(x):
        _, _, sing_vals = T.nlinalg.svd(x, True, True)
        sing_mags = abs(sing_vals)
        return T.max(sing_mags) / T.min(sing_mags)
    def rms(x):
        return (x*x).mean().sqrt()

    whysplode_cond = []
    whysplode_rms = []
    for i, p in enumerate(init_updates):
        v = p.get_value()
        if p.get_value().shape == 2:
            whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
        whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
    for i, p in enumerate(cg_train.parameters):
        v = p.get_value()
        if p.get_value().shape == 2:
            whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
        whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))

    observed_vars = [cost_train, cost, bpc, perp, learning_rate,
                     aggregation.mean(algorithm.total_gradient_norm).copy("gradient_norm_mean")] # + whysplode_rms

    parameters = model.get_parameter_dict()
    for name, param in parameters.iteritems():
        observed_vars.append(param.norm(2).copy(name=name + "_norm"))
        observed_vars.append(
            algorithm.gradients[param].norm(2).copy(name=name + "_grad_norm"))
    
    train_monitor = TrainingDataMonitoring(
        variables=observed_vars,
开发者ID:teganmaharaj,项目名称:zoneout,代码行数:70,代码来源:zoneout_word_ptb.py

示例8: main

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(save_to, num_epochs,
         regularization=0.0001, subset=None, num_batches=None,
         batch_size=None, histogram=None, resume=False):
    output_size = 10
    convnet = create_res_net()

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    test_probs = convnet.apply(x)
    test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs)
            .copy(name='cost'))
    test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs)
                  .copy(name='error_rate'))
    test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs)
                  .copy(name='confusion'))
    test_confusion.tag.aggregation_scheme = Sum(test_confusion)

    test_cg = ComputationGraph([test_cost, test_error_rate])

    # Apply dropout to all layer outputs except final softmax
    # dropout_vars = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
    # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)

    # Apply 0.2 dropout to the pre-averaging layer
    # dropout_vars_2 = VariableFilter(
    #         roles=[OUTPUT], bricks=[Convolutional],
    #         theano_name_regex="^conv_8_apply_output$")(test_cg.variables)
    # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2)

    # Apply 0.2 dropout to the input, as in the paper
    # train_cg = apply_dropout(test_cg, [x], 0.2)
    # train_cg = drop_cg
    # train_cg = apply_batch_normalization(test_cg)

    # train_cost, train_error_rate, train_components = train_cg.outputs

    with batch_normalization(convnet):
        train_probs = convnet.apply(x)
    train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs)
                .copy(name='cost'))
    train_components = (ComponentwiseCrossEntropy().apply(y.flatten(),
                train_probs).copy(name='components'))
    train_error_rate = (MisclassificationRate().apply(y.flatten(),
                train_probs).copy(name='error_rate'))
    train_cg = ComputationGraph([train_cost,
                train_error_rate, train_components])
    population_updates = get_batch_normalization_updates(train_cg)
    bn_alpha = 0.9
    extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha))
                for p, m in population_updates]

    # Apply regularization to the cost
    biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
    weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
    l2_norm = sum([(W ** 2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    l2_regularization = regularization * l2_norm
    l2_regularization.name = 'l2_regularization'
    test_cost = test_cost + l2_regularization
    test_cost.name = 'cost_with_regularization'

    # Training version of cost
    train_cost_without_regularization = train_cost
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + regularization * l2_norm
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train",))
    cifar10_train_stream = RandomPadCropFlip(
        NormalizeBatchLevels(DataStream.default_stream(
            cifar10_train, iteration_scheme=ShuffledScheme(
                cifar10_train.num_examples, batch_size)),
        which_sources=('features',)),
        (32, 32), pad=4, which_sources=('features',))

    test_batch_size = 500
    cifar10_test = CIFAR10(("test",))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(
            cifar10_test.num_examples, test_batch_size)),
        which_sources=('features',))

    momentum = Momentum(0.01, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])

    # from theano.compile.nanguardmode import NanGuardMode

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=train_cost, parameters=train_cg.parameters,
        step_rule=momentum)
    algorithm.add_updates(extra_updates)
#.........这里部分代码省略.........
开发者ID:davidbau,项目名称:net-intent,代码行数:103,代码来源:rrun.py

示例9: main

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(nvis, nhid, encoding_lstm_dim, decoding_lstm_dim, T=1):
    x = tensor.matrix('features')

    # Construct and initialize model
    encoding_mlp = MLP([Tanh()], [None, None])
    decoding_mlp = MLP([Tanh()], [None, None])
    encoding_lstm = LSTM(dim=encoding_lstm_dim)
    decoding_lstm = LSTM(dim=decoding_lstm_dim)
    draw = DRAW(nvis=nvis, nhid=nhid, T=T, encoding_mlp=encoding_mlp,
                decoding_mlp=decoding_mlp, encoding_lstm=encoding_lstm,
                decoding_lstm=decoding_lstm, biases_init=Constant(0),
                weights_init=Orthogonal())
    draw.push_initialization_config()
    encoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    decoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    draw.initialize()

    # Compute cost
    cost = -draw.log_likelihood_lower_bound(x).mean()
    cost.name = 'nll_upper_bound'
    model = Model(cost)

    # Datasets and data streams
    mnist_train = BinarizedMNIST('train')
    train_loop_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 100)))
    train_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 500)))
    mnist_valid = BinarizedMNIST('valid')
    valid_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_valid,
        iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500)))
    mnist_test = BinarizedMNIST('test')
    test_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples, 500)))

    # Get parameters and monitoring channels
    computation_graph = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(computation_graph.variables)
    monitoring_channels = dict([
        ('avg_' + channel.tag.name, channel.mean()) for channel in
        VariableFilter(name='.*term$')(computation_graph.auxiliary_variables)])
    for name, channel in monitoring_channels.items():
        channel.name = name
    monitored_quantities = monitoring_channels.values() + [cost]

    # Training loop
    step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95)
    algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)
    main_loop = MainLoop(
        model=model, data_stream=train_loop_stream, algorithm=algorithm,
        extensions=[
            Timing(),
            SerializeMainLoop('vae.pkl', save_separately=['model']),
            FinishAfter(after_n_epochs=200),
            DataStreamMonitoring(
                monitored_quantities, train_monitor_stream, prefix="train",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, valid_monitor_stream, prefix="valid",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, test_monitor_stream, prefix="test",
                updates=computation_graph.updates),
            ProgressBar(),
            Printing()])
    main_loop.run()
开发者ID:pombredanne,项目名称:research,代码行数:73,代码来源:train.py

示例10: ComputationGraph

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
                                              batch_size=m)))

# uwaga: dyskryminator bierze 2m probek, pierwsze m to generowane, kolejne m to z danych

# observables.append(cost_discriminator)

generator_cost = theano.shared(value=np.array(0., dtype=np.float32), name='g_cost')
discriminator_cost = theano.shared(value=np.array(0., dtype=np.float32), name='d_cost')

generator_step_norm = theano.shared(value=np.array(0., dtype=np.float32), name='g_step_norm')
generator_grad_norm = theano.shared(value=np.array(0., dtype=np.float32), name='g_grad_norm')
discriminator_step_norm = theano.shared(value=np.array(0., dtype=np.float32), name='d_step_norm')
discriminator_grad_norm = theano.shared(value=np.array(0., dtype=np.float32), name='d_grad_norm')

discriminator_descent.add_updates([
    (discriminator_cost, ComputationGraph(cost_discriminator).outputs[0]),
    (discriminator_step_norm, discriminator_descent.total_step_norm),
    (discriminator_grad_norm, discriminator_descent.total_gradient_norm)])

generator_descent.add_updates([
    (generator_cost, ComputationGraph(cost_generator).outputs[0]),
    (generator_step_norm, generator_descent.total_step_norm),
    (generator_grad_norm, generator_descent.total_gradient_norm)])

observables = []
observables.append(generator_cost)
observables.append(discriminator_cost)
observables.append(generator_step_norm)
observables.append(generator_grad_norm)
observables.append(discriminator_step_norm)
observables.append(discriminator_grad_norm)
开发者ID:Bjornwolf,项目名称:language-model,代码行数:33,代码来源:gan_train.py

示例11: train_model

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_model(cost, cross_entropy, updates,
                train_stream, valid_stream, args, gate_values=None):

    step_rule = learning_algorithm(args)
    cg = ComputationGraph(cost)

    # ADD REGULARIZATION
    # WEIGHT NOISE
    weight_noise = args.weight_noise
    if weight_noise > 0:
        weights = VariableFilter(roles=[WEIGHT])(cg.variables)
        cg_train = apply_noise(cg, weights, weight_noise)
        cost = cg_train.outputs[0]
    cost.name = "cost_with_weight_noise"
    cg = ComputationGraph(cost)

    logger.info(cg.parameters)

    algorithm = GradientDescent(cost=cost, step_rule=step_rule,
                                params=cg.parameters)
    algorithm.add_updates(updates)

    # extensions to be added
    extensions = []
    if args.load_path is not None:
        extensions.append(Load(args.load_path))

    outputs = [
        variable for variable in cg.variables if variable.name == "presoft"]

    if args.generate:
        extensions.append(TextGenerationExtension(
            outputs=outputs,
            generation_length=args.generated_text_lenght,
            initial_text_length=args.initial_text_length,
            every_n_batches=args.monitoring_freq,
            ploting_path=os.path.join(args.save_path, 'prob_plot.png'),
            softmax_sampling=args.softmax_sampling,
            dataset=args.dataset,
            updates=updates,
            interactive_mode=args.interactive_mode))
    extensions.extend([
        TrainingDataMonitoring([cost], prefix='train',
                               every_n_batches=args.monitoring_freq,
                               after_epoch=True),
        DataStreamMonitoring([cost, cross_entropy],
                             valid_stream, args.mini_batch_size_valid,
                             state_updates=updates,
                             prefix='valid',
                             before_first_epoch=not(args.visualize_gates),
                             every_n_batches=args.monitoring_freq),
        ResetStates([v for v, _ in updates], every_n_batches=100),
        ProgressBar()])
    # Creating directory for saving model.
    if not args.interactive_mode:
        if not os.path.exists(args.save_path):
            os.makedirs(args.save_path)
        else:
            raise Exception('Directory already exists')
    early_stopping = EarlyStopping('valid_cross_entropy',
                                   args.patience, args.save_path,
                                   every_n_batches=args.monitoring_freq)

    # Visualizing extensions
    if args.interactive_mode:
        extensions.append(InteractiveMode())
    if args.visualize_gates and (gate_values is not None):
        if args.rnn_type == "lstm":
            extensions.append(VisualizeGateLSTM(gate_values, updates,
                                                args.dataset,
                                                ploting_path=None))
        elif args.rnn_type == "soft":
            extensions.append(VisualizeGateSoft(gate_values, updates,
                                                args.dataset,
                                                ploting_path=None))
        else:
            assert(False)

    extensions.append(early_stopping)
    extensions.append(Printing(every_n_batches=args.monitoring_freq))

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=extensions
    )
    main_loop.run()
开发者ID:pjadzinsky,项目名称:RNN_Experiments,代码行数:90,代码来源:train.py

示例12: train_model

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_model(cost, unregularized_cost, updates,
                train_stream, valid_stream, args, gate_values=None):

    step_rule = learning_algorithm(args)
    cg = ComputationGraph(cost)

    # ADD REGULARIZATION
    # WEIGHT NOISE
    weight_noise = args.weight_noise
    if weight_noise > 0:
        weights = VariableFilter(roles=[WEIGHT])(cg.variables)
        cg_train = apply_noise(cg, weights, weight_noise)
        cost = cg_train.outputs[0]
    cost.name = "cost_with_weight_noise"
    cg = ComputationGraph(cost)

    logger.info(cg.parameters)

    # Define algorithm
    algorithm = GradientDescent(cost=cost, step_rule=step_rule,
                                parameters=cg.parameters)
    # Add the updates to carry the hidden state
    algorithm.add_updates(updates)

    # Extensions to be added
    extensions = []

    # Load from a dumped model
    if args.load_path is not None:
        extensions.append(Load(args.load_path))

    # Generation extension
    if args.generate:
        extensions.append(TextGenerationExtension(
            cost=cost,
            generation_length=args.generated_text_lenght,
            initial_text_length=args.initial_text_length,
            every_n_batches=1,
            ploting_path=os.path.join(args.save_path, 'prob_plot.png'),
            softmax_sampling=args.softmax_sampling,
            dataset=args.dataset,
            updates=updates,
            interactive_mode=args.interactive_mode))

    # Training and Validation score monitoring
    extensions.extend([
        TrainingDataMonitoring([cost], prefix='train',
                               every_n_batches=args.monitoring_freq),
        DataStreamMonitoring([cost, unregularized_cost],
                             valid_stream, args.mini_batch_size_valid,
                             args.dataset,
                             state_updates=updates,
                             prefix='valid',
                             before_first_epoch=(args.visualize == "nothing"),
                             every_n_batches=args.monitoring_freq)])

    # Creating directory for saving model.
    if not args.interactive_mode:
        if not os.path.exists(args.save_path):
            os.makedirs(args.save_path)
        elif 'test' in args.save_path:
            print "Rewriting in " + args.save_path
        else:
            raise Exception('Directory already exists')

    # Early stopping
    extensions.append(EarlyStopping('valid_' + unregularized_cost.name,
                                    args.patience, args.save_path,
                                    every_n_batches=args.monitoring_freq))

    # Printing
    extensions.append(ProgressBar())
    extensions.append(Printing(every_n_batches=args.monitoring_freq))

    # Reset the initial states
    if args.dataset == "sine":
        reset_frequency = 1
    else:
        reset_frequency = 100
    extensions.append(ResetStates([v for v, _ in updates],
                                  every_n_batches=reset_frequency))

    # Visualizing extensions
    if args.interactive_mode:
        extensions.append(InteractiveMode())

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=extensions
    )
    main_loop.run()
开发者ID:anirudh9119,项目名称:RNN_Experiments,代码行数:95,代码来源:train.py

示例13: Model

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
    # Build datastream
    train_stream = datastream.setup_datastream(config.dataset,
                                               config.num_seqs,
                                               config.seq_len,
                                               config.seq_div_size)

    # Build model
    m = config.Model(config)

    # Train the model
    cg = Model(m.sgd_cost)
    algorithm = GradientDescent(cost=m.sgd_cost,
                                step_rule=config.step_rule,
                                parameters=cg.parameters)

    algorithm.add_updates(m.states)

    monitor_vars = list(set(v for p in m.monitor_vars for v in p))
    extensions = [
            ProgressBar(),
            TrainingDataMonitoring(
                monitor_vars,
                prefix='train', every_n_batches=config.monitor_freq),
            Printing(every_n_batches=config.monitor_freq, after_epoch=False),

            ResetStates([v for v, _ in m.states], after_epoch=True)
    ]
    if plot_avail:
        plot_channels = [['train_' + v.name for v in p] for p in m.monitor_vars]
        extensions.append(
            Plot(document='text_'+model_name,
开发者ID:Alexis211,项目名称:text_rnn,代码行数:33,代码来源:train.py

示例14: train

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train(ladder, batch_size=100, labeled_samples=100,
          unlabeled_samples=50000, valid_set_size=10000,
          num_epochs=150, valid_batch_size=100, lrate_decay=0.67,
          save_path='results/mnist_100_full0'):
    # Setting Logger
    log_path = os.path.join(save_path, 'log.txt')
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)
    logger.info('Logging into %s' % log_path)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info('Found the following parameters: %s' % str(all_params))

    training_algorithm = GradientDescent(
        cost=ladder.costs.total, params=all_params,
        step_rule=Adam(learning_rate=ladder.lr))

    # Fetch all batch normalization updates. They are in the clean path.
    # In addition to actual training, also do BN variable approximations
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    training_algorithm.add_updates(bn_updates)

    monitored_variables = [
        ladder.costs.class_corr, ladder.costs.class_clean,
        ladder.error, training_algorithm.total_gradient_norm,
        ladder.costs.total] + ladder.costs.denois.values()

    data = get_mnist_data_dict(unlabeled_samples=unlabeled_samples,
                               valid_set_size=valid_set_size)

    train_data_stream = make_datastream(
        data.train, data.train_ind, batch_size,
        n_labeled=labeled_samples,
        n_unlabeled=unlabeled_samples)

    valid_data_stream = make_datastream(
        data.valid, data.valid_ind, valid_batch_size,
        n_labeled=len(data.valid_ind),
        n_unlabeled=len(data.valid_ind))

    train_monitoring = TrainingDataMonitoring(
        variables=monitored_variables,
        prefix="train",
        after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables,
        data_stream=valid_data_stream,
        prefix="valid",
        after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=Model(ladder.costs.total),
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams(None, all_params, save_path, after_epoch=True),
            SaveLog(save_path, after_training=True),
            LRDecay(lr=ladder.lr,
                    decay_first=num_epochs * lrate_decay,
                    decay_last=num_epochs,
                    after_epoch=True),
            Printing()])
    main_loop.run()
开发者ID:codeaudit,项目名称:ladder_network,代码行数:71,代码来源:main.py

示例15: main

# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]

#.........这里部分代码省略.........
    bn_cost.name = 'cost'

    bn_error_rate = MisclassificationRate().apply(y, bn_p[-1])
    bn_error_rate.name = 'error_rate'

    # ------------------------------------------------------------
    bn_cg = ComputationGraph([bn_cost, bn_error_rate])

    # Prepare algorithm
    algorithm = GradientDescent(
        cost=bn_cg.outputs[0],
        on_unused_sources='ignore',
        parameters=bn_cg.parameters,
        step_rule=CompositeRule([
            RemoveNotFinite(),
            StepClipping(10.),
            Adam(learning_rate)
        ])
    )

    pop_updates = get_batch_normalization_updates(bn_cg)
    update_params = [conv1_bn.population_mean, conv1_bn.population_stdev, conv2_bn.population_mean, conv2_bn.population_stdev, conv3_bn.population_mean,
                     conv3_bn.population_stdev, conv4_bn.population_mean, conv4_bn.population_stdev, conv5_bn.population_mean, conv5_bn.population_stdev,
                     conv6_bn.population_mean, conv6_bn.population_stdev, conv_mlp_bn.population_mean, conv_mlp_bn.population_stdev,
                     loc_mlp_bn.population_mean, loc_mlp_bn.population_stdev, classification_mlp1_bn.population_mean, classification_mlp1_bn.population_stdev,
                     classification_mlp2_bn.population_mean, classification_mlp2_bn.population_stdev]
    update_values = [m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, m_c_bn, s_c_bn, m_l_bn, s_l_bn,
                     m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn]

    pop_updates.extend([(p, m) for p, m in zip(update_params, update_values)])

    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates]
    algorithm.add_updates(extra_updates)

    # ------------------------------------------------------------------------
    # Setup monitors

    p, l = edram.calculate_test(x, x_coarse)
    cost_where, cost_y = compute_cost(p, wr, y, l)
    cost = cost_y + cost_where
    cost = cost.sum(axis=0)
    cost = cost.mean()
    cost.name = 'cost'

    error_rate = MisclassificationRate().apply(y, p[-1])
    error_rate.name = 'error_rate'
    monitors = [cost, error_rate]

    plotting_extensions = []
    # Live plotting...
    if live_plotting:
        plot_channels = [
            ['train_cost', 'test_cost'],
            ['train_error_rate', 'test_error_rate'],
        ]
        plotting_extensions = [
            Plot(subdir, channels=plot_channels, server_url='http://155.69.150.60:80/')
        ]

    # ------------------------------------------------------------

    mnist_cluttered_train = MNISTCluttered(which_sets=['train'], sources=('features', 'locations', 'labels'))
    mnist_cluttered_test = MNISTCluttered(which_sets=['test'], sources=('features', 'locations', 'labels'))

    main_loop = MainLoop(
开发者ID:ablavatski,项目名称:EDRAM,代码行数:70,代码来源:train_mnist_cluttered.py


注:本文中的blocks.algorithms.GradientDescent.add_updates方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。