当前位置: 首页>>代码示例>>Python>>正文


Python algorithms.GradientDescent类代码示例

本文整理汇总了Python中blocks.algorithms.GradientDescent的典型用法代码示例。如果您正苦于以下问题:Python GradientDescent类的具体用法?Python GradientDescent怎么用?Python GradientDescent使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了GradientDescent类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
开发者ID:anirudh9119,项目名称:discgen,代码行数:60,代码来源:train_celeba_vae.py

示例2: train_rnnrbm

def train_rnnrbm(train, rnnrbm, epochs=1000, test=None, bokeh=True,
                 load_path=None):
    cdk = theano.shared(10)
    lr = theano.shared(float32(0.004))

    cost, v_sample = rnnrbm.cost(examples=x, mask=x_mask, k=cdk)

    error_rate = MismulitclassificationRate().apply(x, v_sample[-1], x_mask)
    error_rate.name = "error on note as a whole"
    mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask)
    mistake_rate.name = "single error within note"
    cost.name = 'rbm_cost'

    model = Model(cost)
    cg = ComputationGraph([cost])
    step_rule = CompositeRule(
        [RemoveNotFinite(), StepClipping(30.0), Adam(learning_rate=lr), StepClipping(6.0),
         RemoveNotFinite()])  # Scale(0.01)
    gradients = dict(equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample])))
    algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost,
                                params=cg.parameters)
    algorithm.add_updates(cg.updates)
    extensions = [
        SharedVariableModifier(parameter=cdk,
                               function=lambda n, v: rnnrbm_cdk[n] if rnnrbm_cdk.get(n) else v),
        SharedVariableModifier(parameter=lr,
                               function=lambda n, v: float32(0.78 * v) if n % (200 * 5) == 0 else v),
        FinishAfter(after_n_epochs=epochs),
        TrainingDataMonitoring(
            [cost, error_rate, mistake_rate, ],  # hidden_states, debug_val, param_nans,
            # aggregation.mean(algorithm.total_gradient_norm)],  #+ params,
            prefix="train",
            after_epoch=False, every_n_batches=40),
        Timing(),
        Printing(),
        ProgressBar()]
    if test is not None:
        extensions.append(DataStreamMonitoring(
            [cost, error_rate, mistake_rate],
            data_stream=test,
            updates=cg.updates,
            prefix="test", after_epoch=False, every_n_batches=40))
    if bokeh:
        extensions.append(Plot(
            'Training RNN-RBM',
            channels=[
                ['train_error on note as a whole', 'train_single error within note',
                 'test_error on note as a whole',
                 'test_single error within note'],
                ['train_final_cost'],
                # ['train_total_gradient_norm'],
            ]))

    main_loop = MainLoop(algorithm=algorithm,
                         data_stream=train,
                         model=model,
                         extensions=extensions
                         )
    return main_loop
开发者ID:makistsantekidis,项目名称:rnnrbm,代码行数:59,代码来源:main.py

示例3: test_gradient_descent_finds_inputs_additional_updates

def test_gradient_descent_finds_inputs_additional_updates():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    n = shared_floatx(1)
    m = tensor.scalar('m')
    algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
    algorithm.add_updates([(n, n + m)])
    algorithm.initialize()
    assert m in algorithm.inputs
开发者ID:Beronx86,项目名称:blocks,代码行数:8,代码来源:test_algorithms.py

示例4: test_gradient_descent

def test_gradient_descent():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
开发者ID:Beronx86,项目名称:blocks,代码行数:10,代码来源:test_algorithms.py

示例5: create_main_loop

def create_main_loop(dataset, nvis, nhid, num_epochs, debug_level=0, lrate=1e-3):
    seed = 188229
    n_inference_steps = 6
    num_examples = dataset.num_examples
    batch_size = num_examples

    train_loop_stream = Flatten(
        DataStream.default_stream(
            dataset=dataset,
            iteration_scheme=SequentialScheme(dataset.num_examples, batch_size)  # Repeat(
            # , n_inference_steps)
            #            ShuffledScheme(dataset.num_examples, batch_size), n_inference_steps))
        ),
        which_sources=("features",),
    )

    model_brick = FivEM(
        nvis=nvis,
        nhid=nhid,
        epsilon=0.01,
        batch_size=batch_size,
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0),
        noise_scaling=1,
        debug=debug_level,
        lateral_x=False,
        lateral_h=False,
        n_inference_steps=n_inference_steps,
    )
    model_brick.initialize()

    x = tensor.matrix("features")

    cost = model_brick.cost(x)
    computation_graph = ComputationGraph([cost])
    model = Model(cost)
    # step_rule = Adam(learning_rate=2e-5, beta1=0.1, beta2=0.001, epsilon=1e-8,
    #                 decay_factor=(1 - 1e-8))
    step_rule = Momentum(learning_rate=lrate, momentum=0.95)
    # step_rule = AdaDelta()
    # step_rule = RMSProp(learning_rate=0.01)
    # step_rule = AdaGrad(learning_rate=1e-4)
    algorithm = GradientDescent(cost=cost, params=computation_graph.parameters, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        TrainingDataMonitoring([cost] + computation_graph.auxiliary_variables, after_batch=False, after_epoch=True),
        #                       every_n_epochs=1),
        Printing(after_epoch=True, after_batch=False),  # every_n_epochs=1,
        # Checkpoint(path="./fivem.zip",every_n_epochs=10,after_training=True)
    ]
    main_loop = MainLoop(model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=extensions)
    return main_loop
开发者ID:pombredanne,项目名称:neuroml,代码行数:55,代码来源:train_stdp_learner.py

示例6: _test

 def _test(f):
     W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
     W_start_value = W.get_value()
     cost = tensor.sum(W ** 2)
     gradients = OrderedDict()
     gradients[W] = tensor.grad(cost, W)
     algorithm = GradientDescent(gradients=f(gradients))
     algorithm.step_rule.learning_rate.set_value(0.75)
     algorithm.initialize()
     algorithm.process_batch(dict())
     assert_allclose(W.get_value(), -0.5 * W_start_value)
开发者ID:leomauro,项目名称:blocks,代码行数:11,代码来源:test_algorithms.py

示例7: test_theano_profile_for_sgd_function

def test_theano_profile_for_sgd_function():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(
        cost=cost, parameters=[W], theano_func_kwargs={'profile': True})
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict())
    assert_allclose(W.get_value(), -0.5 * W_start_value)
    assert isinstance(algorithm._function.profile, ProfileStats)
开发者ID:Beronx86,项目名称:blocks,代码行数:12,代码来源:test_algorithms.py

示例8: run

def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
开发者ID:anirudh9119,项目名称:discgen,代码行数:49,代码来源:train_celeba_classifier.py

示例9: test_gradient_descent_spurious_sources

def test_gradient_descent_spurious_sources():
    W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
    W_start_value = W.get_value()
    cost = tensor.sum(W ** 2)

    algorithm = GradientDescent(cost=cost, parameters=[W])
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    with assert_raises(ValueError):
        algorithm.process_batch(dict(example_id='test'))

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                on_unused_sources='ignore')
    algorithm.step_rule.learning_rate.set_value(0.75)
    algorithm.initialize()
    algorithm.process_batch(dict(example_id='test'))
    assert_allclose(W.get_value(), -0.5 * W_start_value)
开发者ID:mila-udem,项目名称:blocks,代码行数:17,代码来源:test_algorithms.py

示例10: train_model

def train_model(cost, unregularized_cost, updates,
                train_stream, valid_stream, args, gate_values=None):

    step_rule = learning_algorithm(args)
    cg = ComputationGraph(cost)

    # ADD REGULARIZATION
    # WEIGHT NOISE
    weight_noise = args.weight_noise
    if weight_noise > 0:
        weights = VariableFilter(roles=[WEIGHT])(cg.variables)
        cg_train = apply_noise(cg, weights, weight_noise)
        cost = cg_train.outputs[0]
    cost.name = "cost_with_weight_noise"
    cg = ComputationGraph(cost)

    logger.info(cg.parameters)

    # Define algorithm
    algorithm = GradientDescent(cost=cost, step_rule=step_rule,
                                parameters=cg.parameters)
    # Add the updates to carry the hidden state
    algorithm.add_updates(updates)

    # Extensions to be added
    extensions = []

    # Load from a dumped model
    if args.load_path is not None:
        extensions.append(Load(args.load_path))

    # Generation extension
    if args.generate:
        extensions.append(TextGenerationExtension(
            cost=cost,
            generation_length=args.generated_text_lenght,
            initial_text_length=args.initial_text_length,
            every_n_batches=1,
            ploting_path=os.path.join(args.save_path, 'prob_plot.png'),
            softmax_sampling=args.softmax_sampling,
            dataset=args.dataset,
            updates=updates,
            interactive_mode=args.interactive_mode))

    # Training and Validation score monitoring
    extensions.extend([
        TrainingDataMonitoring([cost], prefix='train',
                               every_n_batches=args.monitoring_freq),
        DataStreamMonitoring([cost, unregularized_cost],
                             valid_stream, args.mini_batch_size_valid,
                             args.dataset,
                             state_updates=updates,
                             prefix='valid',
                             before_first_epoch=(args.visualize == "nothing"),
                             every_n_batches=args.monitoring_freq)])

    # Creating directory for saving model.
    if not args.interactive_mode:
        if not os.path.exists(args.save_path):
            os.makedirs(args.save_path)
        elif 'test' in args.save_path:
            print "Rewriting in " + args.save_path
        else:
            raise Exception('Directory already exists')

    # Early stopping
    extensions.append(EarlyStopping('valid_' + unregularized_cost.name,
                                    args.patience, args.save_path,
                                    every_n_batches=args.monitoring_freq))

    # Printing
    extensions.append(ProgressBar())
    extensions.append(Printing(every_n_batches=args.monitoring_freq))

    # Reset the initial states
    if args.dataset == "sine":
        reset_frequency = 1
    else:
        reset_frequency = 100
    extensions.append(ResetStates([v for v, _ in updates],
                                  every_n_batches=reset_frequency))

    # Visualizing extensions
    if args.interactive_mode:
        extensions.append(InteractiveMode())

    main_loop = MainLoop(
        model=Model(cost),
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=extensions
    )
    main_loop.run()
开发者ID:anirudh9119,项目名称:RNN_Experiments,代码行数:93,代码来源:train.py

示例11: main

def main(nvis, nhid, encoding_lstm_dim, decoding_lstm_dim, T=1):
    x = tensor.matrix('features')

    # Construct and initialize model
    encoding_mlp = MLP([Tanh()], [None, None])
    decoding_mlp = MLP([Tanh()], [None, None])
    encoding_lstm = LSTM(dim=encoding_lstm_dim)
    decoding_lstm = LSTM(dim=decoding_lstm_dim)
    draw = DRAW(nvis=nvis, nhid=nhid, T=T, encoding_mlp=encoding_mlp,
                decoding_mlp=decoding_mlp, encoding_lstm=encoding_lstm,
                decoding_lstm=decoding_lstm, biases_init=Constant(0),
                weights_init=Orthogonal())
    draw.push_initialization_config()
    encoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    decoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    draw.initialize()

    # Compute cost
    cost = -draw.log_likelihood_lower_bound(x).mean()
    cost.name = 'nll_upper_bound'
    model = Model(cost)

    # Datasets and data streams
    mnist_train = BinarizedMNIST('train')
    train_loop_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 100)))
    train_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 500)))
    mnist_valid = BinarizedMNIST('valid')
    valid_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_valid,
        iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500)))
    mnist_test = BinarizedMNIST('test')
    test_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples, 500)))

    # Get parameters and monitoring channels
    computation_graph = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(computation_graph.variables)
    monitoring_channels = dict([
        ('avg_' + channel.tag.name, channel.mean()) for channel in
        VariableFilter(name='.*term$')(computation_graph.auxiliary_variables)])
    for name, channel in monitoring_channels.items():
        channel.name = name
    monitored_quantities = monitoring_channels.values() + [cost]

    # Training loop
    step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95)
    algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)
    main_loop = MainLoop(
        model=model, data_stream=train_loop_stream, algorithm=algorithm,
        extensions=[
            Timing(),
            SerializeMainLoop('vae.pkl', save_separately=['model']),
            FinishAfter(after_n_epochs=200),
            DataStreamMonitoring(
                monitored_quantities, train_monitor_stream, prefix="train",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, valid_monitor_stream, prefix="valid",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, test_monitor_stream, prefix="test",
                updates=computation_graph.updates),
            ProgressBar(),
            Printing()])
    main_loop.run()
开发者ID:pombredanne,项目名称:research,代码行数:71,代码来源:train.py

示例12: CategoricalCrossEntropy

        s.set_value(sqrt(init_var).astype(floatX))

cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'

cg = ComputationGraph([cost])
    
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
    for m,s,var in statistics_list:
        parameters.extend([m,s])

algorithm = GradientDescent(
    cost=cost, parameters=parameters, step_rule=Adam(0.01))

#update the M and S with batch statistics
alpha = 0.1
updates = []
if normalization == 'bn2':
    for m,s,var in statistics_list:
        updates.append((m, cast(alpha*m + (1-alpha)*var.mean(axis=0), floatX)))
        updates.append((s, cast(alpha*s + (1-alpha)*var.std(axis=0) , floatX)))

algorithm.add_updates(updates)
# Since this line wont work with the extension to include parameters
# in the gradient descent. Here's an extension that will do the job.

from blocks.extensions import SimpleExtension
from theano import function
开发者ID:anirudh9119,项目名称:SpeechSyn,代码行数:32,代码来源:bn2.py

示例13: list

for param in discriminator_cg.parameters:
    param.name += '_d'

both = list(set(dsamples_cg.parameters) & set(generator_cg.parameters))
indices = []
for (i, par) in enumerate(dsamples_cg.parameters):
    if par in generator_cg.parameters:
        indices.append(i)

good_params = [dsamples_cg.parameters[i] for i in indices]
print 'tests'
for param in dsamples_cg.parameters:
    print param.name

discriminator_descent = GradientDescent(cost=cost_discriminator, 
                                        parameters=discriminator_cg.parameters,
                                        step_rule=RMSProp(learning_rate=0.01, decay_rate=0.97))
print filter(lambda x: x.name[-2:] == '_g', dsamples_cg.parameters)
generator_descent = GradientDescent(cost=cost_generator, 
                                    parameters=filter(lambda x: x.name[-2:] == '_g', 
                                                      dsamples_cg.parameters),
                                    # parameters=good_params,
                                    # parameters=dsamples_cg.parameters,
                                    step_rule=RMSProp(learning_rate=1., decay_rate=0.97))

generator_descent.total_step_norm.name = 'generator_total_step_norm'
generator_descent.total_gradient_norm.name = 'generator_total_gradient_norm'
discriminator_descent.total_step_norm.name = 'discriminator_total_step_norm'
discriminator_descent.total_gradient_norm.name = 'discriminator_total_gradient_norm'
from fuel.datasets import MNIST
mnist = MNIST(("train",))
开发者ID:Bjornwolf,项目名称:language-model,代码行数:31,代码来源:gan_train.py

示例14: main

def main():
    nclasses = 27

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--length", type=int, default=180)
    parser.add_argument("--num-epochs", type=int, default=100)
    parser.add_argument("--batch-size", type=int, default=64)
    parser.add_argument("--learning-rate", type=float, default=1e-3)
    parser.add_argument("--epsilon", type=float, default=1e-5)
    parser.add_argument("--num-hidden", type=int, default=1000)
    parser.add_argument("--baseline", action="store_true")
    parser.add_argument("--initialization", choices="identity glorot orthogonal uniform".split(), default="identity")
    parser.add_argument("--initial-gamma", type=float, default=1e-1)
    parser.add_argument("--initial-beta", type=float, default=0)
    parser.add_argument("--cluster", action="store_true")
    parser.add_argument("--activation", choices=list(activations.keys()), default="tanh")
    parser.add_argument("--optimizer", choices="sgdmomentum adam rmsprop", default="rmsprop")
    parser.add_argument("--continue-from")
    parser.add_argument("--evaluate")
    parser.add_argument("--dump-hiddens")
    args = parser.parse_args()

    np.random.seed(args.seed)
    blocks.config.config.default_seed = args.seed

    if args.continue_from:
        from blocks.serialization import load

        main_loop = load(args.continue_from)
        main_loop.run()
        sys.exit(0)

    graphs, extensions, updates = construct_graphs(args, nclasses)

    ### optimization algorithm definition
    if args.optimizer == "adam":
        optimizer = Adam(learning_rate=args.learning_rate)
    elif args.optimizer == "rmsprop":
        optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=0.9)
    elif args.optimizer == "sgdmomentum":
        optimizer = Momentum(learning_rate=args.learning_rate, momentum=0.99)
    step_rule = CompositeRule([StepClipping(1.0), optimizer])
    algorithm = GradientDescent(
        cost=graphs["training"].outputs[0], parameters=graphs["training"].parameters, step_rule=step_rule
    )
    algorithm.add_updates(updates["training"])
    model = Model(graphs["training"].outputs[0])
    extensions = extensions["training"] + extensions["inference"]

    # step monitor
    step_channels = []
    step_channels.extend(
        [
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()
        ]
    )
    step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
    step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
    step_channels.extend(graphs["training"].outputs)
    logger.warning("constructing training data monitor")
    extensions.append(TrainingDataMonitoring(step_channels, prefix="iteration", after_batch=True))

    # parameter monitor
    extensions.append(
        DataStreamMonitoring(
            [param.norm(2).copy(name="parameter.norm:%s" % name) for name, param in model.get_parameter_dict().items()],
            data_stream=None,
            after_epoch=True,
        )
    )

    validation_interval = 500
    # performance monitor
    for situation in "training inference".split():
        if situation == "inference" and not args.evaluate:
            # save time when we don't need the inference graph
            continue

        for which_set in "train valid test".split():
            logger.warning("constructing %s %s monitor" % (which_set, situation))
            channels = list(graphs[situation].outputs)
            extensions.append(
                DataStreamMonitoring(
                    channels,
                    prefix="%s_%s" % (which_set, situation),
                    every_n_batches=validation_interval,
                    data_stream=get_stream(
                        which_set=which_set, batch_size=args.batch_size, num_examples=10000, length=args.length
                    ),
                )
            )

    extensions.extend(
        [
            TrackTheBest("valid_training_error_rate", "best_valid_training_error_rate"),
            DumpBest("best_valid_training_error_rate", "best.zip"),
#.........这里部分代码省略.........
开发者ID:cooijmanstim,项目名称:recurrent-batch-normalization,代码行数:101,代码来源:text8.py

示例15: main


#.........这里部分代码省略.........
    train_cost_without_regularization.name = 'cost_without_regularization'
    train_cost = train_cost + l2_regularization + train_nit_regularization
    train_cost.name = 'cost_with_regularization'

    cifar10_train = CIFAR10(("train",))
    cifar10_train_stream = RandomPadCropFlip(
        NormalizeBatchLevels(DataStream.default_stream(
            cifar10_train, iteration_scheme=ShuffledScheme(
                cifar10_train.num_examples, batch_size)),
        which_sources=('features',)),
        (32, 32), pad=4, which_sources=('features',))

    test_batch_size = 128
    cifar10_test = CIFAR10(("test",))
    cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
        cifar10_test,
        iteration_scheme=ShuffledScheme(
            cifar10_test.num_examples, test_batch_size)),
        which_sources=('features',))

    momentum = Momentum(0.01, 0.9)

    # Create a step rule that doubles the learning rate of biases, like Caffe.
    # scale_bias = Restrict(Scale(2), biases)
    # step_rule = CompositeRule([scale_bias, momentum])

    # Create a step rule that reduces the learning rate of noise
    scale_mask = Restrict(noise_step_rule, mask_parameters)
    step_rule = CompositeRule([scale_mask, momentum])

    # from theano.compile.nanguardmode import NanGuardMode

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=train_cost, parameters=trainable_parameters,
        step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    #,
    #    theano_func_kwargs={
    #        'mode': NanGuardMode(
    #            nan_is_error=True, inf_is_error=True, big_is_error=True)})

    exp_name = save_to.replace('.%d', '')

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  EpochSchedule(momentum.learning_rate, [
                      (0, 0.01),     # Warm up with 0.01 learning rate
                      (50, 0.1),     # Then go back to 0.1
                      (100, 0.01),
                      (150, 0.001)
                      # (83, 0.01),  # Follow the schedule in the paper
                      # (125, 0.001)
                  ]),
                  EpochSchedule(noise_step_rule.learning_rate, [
                      (0, 1e-2),
                      (2, 1e-1),
                      (4, 1)
                      # (0, 1e-6),
                      # (2, 1e-5),
                      # (4, 1e-4)
开发者ID:davidbau,项目名称:net-intent,代码行数:67,代码来源:nrrun.py


注:本文中的blocks.algorithms.GradientDescent类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。