本文整理汇总了Python中blocks.algorithms.GradientDescent.add_updates方法的典型用法代码示例。如果您正苦于以下问题:Python GradientDescent.add_updates方法的具体用法?Python GradientDescent.add_updates怎么用?Python GradientDescent.add_updates使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.algorithms.GradientDescent
的用法示例。
在下文中一共展示了GradientDescent.add_updates方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def run(discriminative_regularization=True):
streams = create_celeba_streams(training_batch_size=100,
monitoring_batch_size=500,
include_targets=False)
main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]
# Compute parameter updates for the batch normalization population
# statistics. They are updated following an exponential moving average.
rval = create_training_computation_graphs(discriminative_regularization)
cg, bn_cg, variance_parameters = rval
pop_updates = list(
set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
decay_rate = 0.05
extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
for p, m in pop_updates]
model = Model(bn_cg.outputs[0])
selector = Selector(
find_bricks(
model.top_bricks,
lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
'decoder_convnet', 'decoder_mlp')))
parameters = list(selector.get_parameters().values()) + variance_parameters
# Prepare algorithm
step_rule = Adam()
algorithm = GradientDescent(cost=bn_cg.outputs[0],
parameters=parameters,
step_rule=step_rule)
algorithm.add_updates(extra_updates)
# Prepare monitoring
monitored_quantities_list = []
for graph in [bn_cg, cg]:
cost, kl_term, reconstruction_term = graph.outputs
cost.name = 'nll_upper_bound'
avg_kl_term = kl_term.mean(axis=0)
avg_kl_term.name = 'avg_kl_term'
avg_reconstruction_term = -reconstruction_term.mean(axis=0)
avg_reconstruction_term.name = 'avg_reconstruction_term'
monitored_quantities_list.append(
[cost, avg_kl_term, avg_reconstruction_term])
train_monitoring = DataStreamMonitoring(
monitored_quantities_list[0], train_monitor_stream, prefix="train",
updates=extra_updates, after_epoch=False, before_first_epoch=False,
every_n_epochs=5)
valid_monitoring = DataStreamMonitoring(
monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
after_epoch=False, before_first_epoch=False, every_n_epochs=5)
# Prepare checkpoint
save_path = 'celeba_vae_{}regularization.zip'.format(
'' if discriminative_regularization else 'no_')
checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)
extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
valid_monitoring, checkpoint, Printing(), ProgressBar()]
main_loop = MainLoop(data_stream=main_loop_stream,
algorithm=algorithm, extensions=extensions)
main_loop.run()
示例2: train_rnnrbm
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_rnnrbm(train, rnnrbm, epochs=1000, test=None, bokeh=True,
load_path=None):
cdk = theano.shared(10)
lr = theano.shared(float32(0.004))
cost, v_sample = rnnrbm.cost(examples=x, mask=x_mask, k=cdk)
error_rate = MismulitclassificationRate().apply(x, v_sample[-1], x_mask)
error_rate.name = "error on note as a whole"
mistake_rate = MismulitmistakeRate().apply(x, v_sample[-1], x_mask)
mistake_rate.name = "single error within note"
cost.name = 'rbm_cost'
model = Model(cost)
cg = ComputationGraph([cost])
step_rule = CompositeRule(
[RemoveNotFinite(), StepClipping(30.0), Adam(learning_rate=lr), StepClipping(6.0),
RemoveNotFinite()]) # Scale(0.01)
gradients = dict(equizip(cg.parameters, T.grad(cost, cg.parameters, consider_constant=[v_sample])))
algorithm = GradientDescent(step_rule=step_rule, gradients=gradients, cost=cost,
params=cg.parameters)
algorithm.add_updates(cg.updates)
extensions = [
SharedVariableModifier(parameter=cdk,
function=lambda n, v: rnnrbm_cdk[n] if rnnrbm_cdk.get(n) else v),
SharedVariableModifier(parameter=lr,
function=lambda n, v: float32(0.78 * v) if n % (200 * 5) == 0 else v),
FinishAfter(after_n_epochs=epochs),
TrainingDataMonitoring(
[cost, error_rate, mistake_rate, ], # hidden_states, debug_val, param_nans,
# aggregation.mean(algorithm.total_gradient_norm)], #+ params,
prefix="train",
after_epoch=False, every_n_batches=40),
Timing(),
Printing(),
ProgressBar()]
if test is not None:
extensions.append(DataStreamMonitoring(
[cost, error_rate, mistake_rate],
data_stream=test,
updates=cg.updates,
prefix="test", after_epoch=False, every_n_batches=40))
if bokeh:
extensions.append(Plot(
'Training RNN-RBM',
channels=[
['train_error on note as a whole', 'train_single error within note',
'test_error on note as a whole',
'test_single error within note'],
['train_final_cost'],
# ['train_total_gradient_norm'],
]))
main_loop = MainLoop(algorithm=algorithm,
data_stream=train,
model=model,
extensions=extensions
)
return main_loop
示例3: test_gradient_descent_finds_inputs_additional_updates
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def test_gradient_descent_finds_inputs_additional_updates():
W = shared_floatx(numpy.array([[1, 2], [3, 4]]))
n = shared_floatx(1)
m = tensor.scalar('m')
algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)]))
algorithm.add_updates([(n, n + m)])
algorithm.initialize()
assert m in algorithm.inputs
示例4: create_main_loop
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def create_main_loop(dataset, nvis, nhid, num_epochs, debug_level=0, lrate=1e-3):
seed = 188229
n_inference_steps = 6
num_examples = dataset.num_examples
batch_size = num_examples
train_loop_stream = Flatten(
DataStream.default_stream(
dataset=dataset,
iteration_scheme=SequentialScheme(dataset.num_examples, batch_size) # Repeat(
# , n_inference_steps)
# ShuffledScheme(dataset.num_examples, batch_size), n_inference_steps))
),
which_sources=("features",),
)
model_brick = FivEM(
nvis=nvis,
nhid=nhid,
epsilon=0.01,
batch_size=batch_size,
weights_init=IsotropicGaussian(0.1),
biases_init=Constant(0),
noise_scaling=1,
debug=debug_level,
lateral_x=False,
lateral_h=False,
n_inference_steps=n_inference_steps,
)
model_brick.initialize()
x = tensor.matrix("features")
cost = model_brick.cost(x)
computation_graph = ComputationGraph([cost])
model = Model(cost)
# step_rule = Adam(learning_rate=2e-5, beta1=0.1, beta2=0.001, epsilon=1e-8,
# decay_factor=(1 - 1e-8))
step_rule = Momentum(learning_rate=lrate, momentum=0.95)
# step_rule = AdaDelta()
# step_rule = RMSProp(learning_rate=0.01)
# step_rule = AdaGrad(learning_rate=1e-4)
algorithm = GradientDescent(cost=cost, params=computation_graph.parameters, step_rule=step_rule)
algorithm.add_updates(computation_graph.updates)
extensions = [
Timing(),
FinishAfter(after_n_epochs=num_epochs),
TrainingDataMonitoring([cost] + computation_graph.auxiliary_variables, after_batch=False, after_epoch=True),
# every_n_epochs=1),
Printing(after_epoch=True, after_batch=False), # every_n_epochs=1,
# Checkpoint(path="./fivem.zip",every_n_epochs=10,after_training=True)
]
main_loop = MainLoop(model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=extensions)
return main_loop
示例5: run
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def run():
streams = create_celeba_streams(training_batch_size=100,
monitoring_batch_size=500,
include_targets=True)
main_loop_stream = streams[0]
train_monitor_stream = streams[1]
valid_monitor_stream = streams[2]
cg, bn_dropout_cg = create_training_computation_graphs()
# Compute parameter updates for the batch normalization population
# statistics. They are updated following an exponential moving average.
pop_updates = get_batch_normalization_updates(bn_dropout_cg)
decay_rate = 0.05
extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
for p, m in pop_updates]
# Prepare algorithm
step_rule = Adam()
algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
parameters=bn_dropout_cg.parameters,
step_rule=step_rule)
algorithm.add_updates(extra_updates)
# Prepare monitoring
cost = bn_dropout_cg.outputs[0]
cost.name = 'cost'
train_monitoring = DataStreamMonitoring(
[cost], train_monitor_stream, prefix="train",
before_first_epoch=False, after_epoch=False, after_training=True,
updates=extra_updates)
cost, accuracy = cg.outputs
cost.name = 'cost'
accuracy.name = 'accuracy'
monitored_quantities = [cost, accuracy]
valid_monitoring = DataStreamMonitoring(
monitored_quantities, valid_monitor_stream, prefix="valid",
before_first_epoch=False, after_epoch=False, every_n_epochs=5)
# Prepare checkpoint
checkpoint = Checkpoint(
'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)
extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
valid_monitoring, checkpoint, Printing(), ProgressBar()]
main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
extensions=extensions)
main_loop.run()
示例6: main
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
"""Run the example.
Parameters
----------
num_epochs : int, optional
Number of epochs for which to train.
batch_normalized : bool, optional
Batch-normalize the training graph. Defaults to `True`.
alpha : float, optional
Weight to apply to a new sample when calculating running
averages for population statistics (1 - alpha weight is
given to the existing average).
"""
if batch_normalized:
# Add an extra keyword argument that only BatchNormalizedMLP takes,
# in order to speed things up at the cost of a bit of extra memory.
mlp_class = BatchNormalizedMLP
extra_kwargs = {'conserve_memory': False}
else:
mlp_class = MLP
extra_kwargs = {}
mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
[2, 5, 5, 5, 3],
weights_init=IsotropicGaussian(0.2),
biases_init=Constant(0.), **extra_kwargs)
mlp.initialize()
# Generate a dataset with 3 spiral arms, using 8000 examples for
# training and 2000 for testing.
dataset = Spiral(num_examples=10000, classes=3,
sources=['features', 'label'],
noise=0.05)
train_stream = DataStream(dataset,
iteration_scheme=ShuffledScheme(examples=8000,
batch_size=20))
test_stream = DataStream(dataset,
iteration_scheme=SequentialScheme(
examples=list(range(8000, 10000)),
batch_size=2000))
# Build a cost graph; this contains BatchNormalization bricks that will
# by default run in inference mode.
features = tensor.matrix('features')
label = tensor.lvector('label')
prediction = mlp.apply(features)
cost = CategoricalCrossEntropy().apply(label, prediction)
misclass = MisclassificationRate().apply(label, prediction)
misclass.name = 'misclass' # The default name for this is annoyingly long
original_cg = ComputationGraph([cost, misclass])
if batch_normalized:
cg = apply_batch_normalization(original_cg)
# Add updates for population parameters
pop_updates = get_batch_normalization_updates(cg)
extra_updates = [(p, m * alpha + p * (1 - alpha))
for p, m in pop_updates]
else:
cg = original_cg
extra_updates = []
algorithm = GradientDescent(step_rule=Adam(0.001),
cost=cg.outputs[0],
parameters=cg.parameters)
algorithm.add_updates(extra_updates)
main_loop = MainLoop(algorithm=algorithm,
data_stream=train_stream,
# Use the original cost and misclass variables so
# that we monitor the (original) inference-mode graph.
extensions=[DataStreamMonitoring([cost, misclass],
train_stream,
prefix='train'),
DataStreamMonitoring([cost, misclass],
test_stream,
prefix='test'),
Printing(),
FinishAfter(after_n_epochs=num_epochs)])
main_loop.run()
return main_loop
示例7: train
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
#.........这里部分代码省略.........
if weight_noise > 0:
weights = VariableFilter(roles=[WEIGHT])(cg_train.variables)
cg_train = apply_noise(cg_train, weights, weight_noise)
cost_train = cg_train.outputs[0].copy(name='cost_train')
model = Model(cost_train)
learning_rate = float(learning_rate)
clipping = StepClipping(threshold=np.cast[floatX](clipping))
if algorithm == 'adam':
adam = Adam(learning_rate=learning_rate)
learning_rate = adam.learning_rate
step_rule = CompositeRule([adam, clipping])
elif algorithm == 'rms_prop':
rms_prop = RMSProp(learning_rate=learning_rate)
learning_rate = rms_prop.learning_rate
step_rule = CompositeRule([clipping, rms_prop])
elif algorithm == 'momentum':
sgd_momentum = Momentum(learning_rate=learning_rate, momentum=momentum)
learning_rate = sgd_momentum.learning_rate
step_rule = CompositeRule([clipping, sgd_momentum])
elif algorithm == 'sgd':
sgd = Scale(learning_rate=learning_rate)
learning_rate = sgd.learning_rate
step_rule = CompositeRule([clipping, sgd])
else:
raise NotImplementedError
algorithm = GradientDescent(step_rule=step_rule,
cost=cost_train,
parameters=cg_train.parameters)
# theano_func_kwargs={"mode": theano.compile.MonitorMode(post_func=detect_nan)})
algorithm.add_updates(init_updates)
def cond_number(x):
_, _, sing_vals = T.nlinalg.svd(x, True, True)
sing_mags = abs(sing_vals)
return T.max(sing_mags) / T.min(sing_mags)
def rms(x):
return (x*x).mean().sqrt()
whysplode_cond = []
whysplode_rms = []
for i, p in enumerate(init_updates):
v = p.get_value()
if p.get_value().shape == 2:
whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
for i, p in enumerate(cg_train.parameters):
v = p.get_value()
if p.get_value().shape == 2:
whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
observed_vars = [cost_train, cost, bpc, perp, learning_rate,
aggregation.mean(algorithm.total_gradient_norm).copy("gradient_norm_mean")] # + whysplode_rms
parameters = model.get_parameter_dict()
for name, param in parameters.iteritems():
observed_vars.append(param.norm(2).copy(name=name + "_norm"))
observed_vars.append(
algorithm.gradients[param].norm(2).copy(name=name + "_grad_norm"))
train_monitor = TrainingDataMonitoring(
variables=observed_vars,
示例8: main
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(save_to, num_epochs,
regularization=0.0001, subset=None, num_batches=None,
batch_size=None, histogram=None, resume=False):
output_size = 10
convnet = create_res_net()
x = tensor.tensor4('features')
y = tensor.lmatrix('targets')
# Normalize input and apply the convnet
test_probs = convnet.apply(x)
test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs)
.copy(name='cost'))
test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs)
.copy(name='error_rate'))
test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs)
.copy(name='confusion'))
test_confusion.tag.aggregation_scheme = Sum(test_confusion)
test_cg = ComputationGraph([test_cost, test_error_rate])
# Apply dropout to all layer outputs except final softmax
# dropout_vars = VariableFilter(
# roles=[OUTPUT], bricks=[Convolutional],
# theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables)
# drop_cg = apply_dropout(test_cg, dropout_vars, 0.5)
# Apply 0.2 dropout to the pre-averaging layer
# dropout_vars_2 = VariableFilter(
# roles=[OUTPUT], bricks=[Convolutional],
# theano_name_regex="^conv_8_apply_output$")(test_cg.variables)
# train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2)
# Apply 0.2 dropout to the input, as in the paper
# train_cg = apply_dropout(test_cg, [x], 0.2)
# train_cg = drop_cg
# train_cg = apply_batch_normalization(test_cg)
# train_cost, train_error_rate, train_components = train_cg.outputs
with batch_normalization(convnet):
train_probs = convnet.apply(x)
train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs)
.copy(name='cost'))
train_components = (ComponentwiseCrossEntropy().apply(y.flatten(),
train_probs).copy(name='components'))
train_error_rate = (MisclassificationRate().apply(y.flatten(),
train_probs).copy(name='error_rate'))
train_cg = ComputationGraph([train_cost,
train_error_rate, train_components])
population_updates = get_batch_normalization_updates(train_cg)
bn_alpha = 0.9
extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha))
for p, m in population_updates]
# Apply regularization to the cost
biases = VariableFilter(roles=[BIAS])(train_cg.parameters)
weights = VariableFilter(roles=[WEIGHT])(train_cg.variables)
l2_norm = sum([(W ** 2).sum() for W in weights])
l2_norm.name = 'l2_norm'
l2_regularization = regularization * l2_norm
l2_regularization.name = 'l2_regularization'
test_cost = test_cost + l2_regularization
test_cost.name = 'cost_with_regularization'
# Training version of cost
train_cost_without_regularization = train_cost
train_cost_without_regularization.name = 'cost_without_regularization'
train_cost = train_cost + regularization * l2_norm
train_cost.name = 'cost_with_regularization'
cifar10_train = CIFAR10(("train",))
cifar10_train_stream = RandomPadCropFlip(
NormalizeBatchLevels(DataStream.default_stream(
cifar10_train, iteration_scheme=ShuffledScheme(
cifar10_train.num_examples, batch_size)),
which_sources=('features',)),
(32, 32), pad=4, which_sources=('features',))
test_batch_size = 500
cifar10_test = CIFAR10(("test",))
cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream(
cifar10_test,
iteration_scheme=ShuffledScheme(
cifar10_test.num_examples, test_batch_size)),
which_sources=('features',))
momentum = Momentum(0.01, 0.9)
# Create a step rule that doubles the learning rate of biases, like Caffe.
# scale_bias = Restrict(Scale(2), biases)
# step_rule = CompositeRule([scale_bias, momentum])
# from theano.compile.nanguardmode import NanGuardMode
# Train with simple SGD
algorithm = GradientDescent(
cost=train_cost, parameters=train_cg.parameters,
step_rule=momentum)
algorithm.add_updates(extra_updates)
#.........这里部分代码省略.........
示例9: main
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def main(nvis, nhid, encoding_lstm_dim, decoding_lstm_dim, T=1):
x = tensor.matrix('features')
# Construct and initialize model
encoding_mlp = MLP([Tanh()], [None, None])
decoding_mlp = MLP([Tanh()], [None, None])
encoding_lstm = LSTM(dim=encoding_lstm_dim)
decoding_lstm = LSTM(dim=decoding_lstm_dim)
draw = DRAW(nvis=nvis, nhid=nhid, T=T, encoding_mlp=encoding_mlp,
decoding_mlp=decoding_mlp, encoding_lstm=encoding_lstm,
decoding_lstm=decoding_lstm, biases_init=Constant(0),
weights_init=Orthogonal())
draw.push_initialization_config()
encoding_lstm.weights_init = IsotropicGaussian(std=0.001)
decoding_lstm.weights_init = IsotropicGaussian(std=0.001)
draw.initialize()
# Compute cost
cost = -draw.log_likelihood_lower_bound(x).mean()
cost.name = 'nll_upper_bound'
model = Model(cost)
# Datasets and data streams
mnist_train = BinarizedMNIST('train')
train_loop_stream = ForceFloatX(DataStream(
dataset=mnist_train,
iteration_scheme=SequentialScheme(mnist_train.num_examples, 100)))
train_monitor_stream = ForceFloatX(DataStream(
dataset=mnist_train,
iteration_scheme=SequentialScheme(mnist_train.num_examples, 500)))
mnist_valid = BinarizedMNIST('valid')
valid_monitor_stream = ForceFloatX(DataStream(
dataset=mnist_valid,
iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500)))
mnist_test = BinarizedMNIST('test')
test_monitor_stream = ForceFloatX(DataStream(
dataset=mnist_test,
iteration_scheme=SequentialScheme(mnist_test.num_examples, 500)))
# Get parameters and monitoring channels
computation_graph = ComputationGraph([cost])
params = VariableFilter(roles=[PARAMETER])(computation_graph.variables)
monitoring_channels = dict([
('avg_' + channel.tag.name, channel.mean()) for channel in
VariableFilter(name='.*term$')(computation_graph.auxiliary_variables)])
for name, channel in monitoring_channels.items():
channel.name = name
monitored_quantities = monitoring_channels.values() + [cost]
# Training loop
step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95)
algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule)
algorithm.add_updates(computation_graph.updates)
main_loop = MainLoop(
model=model, data_stream=train_loop_stream, algorithm=algorithm,
extensions=[
Timing(),
SerializeMainLoop('vae.pkl', save_separately=['model']),
FinishAfter(after_n_epochs=200),
DataStreamMonitoring(
monitored_quantities, train_monitor_stream, prefix="train",
updates=computation_graph.updates),
DataStreamMonitoring(
monitored_quantities, valid_monitor_stream, prefix="valid",
updates=computation_graph.updates),
DataStreamMonitoring(
monitored_quantities, test_monitor_stream, prefix="test",
updates=computation_graph.updates),
ProgressBar(),
Printing()])
main_loop.run()
示例10: ComputationGraph
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
batch_size=m)))
# uwaga: dyskryminator bierze 2m probek, pierwsze m to generowane, kolejne m to z danych
# observables.append(cost_discriminator)
generator_cost = theano.shared(value=np.array(0., dtype=np.float32), name='g_cost')
discriminator_cost = theano.shared(value=np.array(0., dtype=np.float32), name='d_cost')
generator_step_norm = theano.shared(value=np.array(0., dtype=np.float32), name='g_step_norm')
generator_grad_norm = theano.shared(value=np.array(0., dtype=np.float32), name='g_grad_norm')
discriminator_step_norm = theano.shared(value=np.array(0., dtype=np.float32), name='d_step_norm')
discriminator_grad_norm = theano.shared(value=np.array(0., dtype=np.float32), name='d_grad_norm')
discriminator_descent.add_updates([
(discriminator_cost, ComputationGraph(cost_discriminator).outputs[0]),
(discriminator_step_norm, discriminator_descent.total_step_norm),
(discriminator_grad_norm, discriminator_descent.total_gradient_norm)])
generator_descent.add_updates([
(generator_cost, ComputationGraph(cost_generator).outputs[0]),
(generator_step_norm, generator_descent.total_step_norm),
(generator_grad_norm, generator_descent.total_gradient_norm)])
observables = []
observables.append(generator_cost)
observables.append(discriminator_cost)
observables.append(generator_step_norm)
observables.append(generator_grad_norm)
observables.append(discriminator_step_norm)
observables.append(discriminator_grad_norm)
示例11: train_model
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_model(cost, cross_entropy, updates,
train_stream, valid_stream, args, gate_values=None):
step_rule = learning_algorithm(args)
cg = ComputationGraph(cost)
# ADD REGULARIZATION
# WEIGHT NOISE
weight_noise = args.weight_noise
if weight_noise > 0:
weights = VariableFilter(roles=[WEIGHT])(cg.variables)
cg_train = apply_noise(cg, weights, weight_noise)
cost = cg_train.outputs[0]
cost.name = "cost_with_weight_noise"
cg = ComputationGraph(cost)
logger.info(cg.parameters)
algorithm = GradientDescent(cost=cost, step_rule=step_rule,
params=cg.parameters)
algorithm.add_updates(updates)
# extensions to be added
extensions = []
if args.load_path is not None:
extensions.append(Load(args.load_path))
outputs = [
variable for variable in cg.variables if variable.name == "presoft"]
if args.generate:
extensions.append(TextGenerationExtension(
outputs=outputs,
generation_length=args.generated_text_lenght,
initial_text_length=args.initial_text_length,
every_n_batches=args.monitoring_freq,
ploting_path=os.path.join(args.save_path, 'prob_plot.png'),
softmax_sampling=args.softmax_sampling,
dataset=args.dataset,
updates=updates,
interactive_mode=args.interactive_mode))
extensions.extend([
TrainingDataMonitoring([cost], prefix='train',
every_n_batches=args.monitoring_freq,
after_epoch=True),
DataStreamMonitoring([cost, cross_entropy],
valid_stream, args.mini_batch_size_valid,
state_updates=updates,
prefix='valid',
before_first_epoch=not(args.visualize_gates),
every_n_batches=args.monitoring_freq),
ResetStates([v for v, _ in updates], every_n_batches=100),
ProgressBar()])
# Creating directory for saving model.
if not args.interactive_mode:
if not os.path.exists(args.save_path):
os.makedirs(args.save_path)
else:
raise Exception('Directory already exists')
early_stopping = EarlyStopping('valid_cross_entropy',
args.patience, args.save_path,
every_n_batches=args.monitoring_freq)
# Visualizing extensions
if args.interactive_mode:
extensions.append(InteractiveMode())
if args.visualize_gates and (gate_values is not None):
if args.rnn_type == "lstm":
extensions.append(VisualizeGateLSTM(gate_values, updates,
args.dataset,
ploting_path=None))
elif args.rnn_type == "soft":
extensions.append(VisualizeGateSoft(gate_values, updates,
args.dataset,
ploting_path=None))
else:
assert(False)
extensions.append(early_stopping)
extensions.append(Printing(every_n_batches=args.monitoring_freq))
main_loop = MainLoop(
model=Model(cost),
data_stream=train_stream,
algorithm=algorithm,
extensions=extensions
)
main_loop.run()
示例12: train_model
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train_model(cost, unregularized_cost, updates,
train_stream, valid_stream, args, gate_values=None):
step_rule = learning_algorithm(args)
cg = ComputationGraph(cost)
# ADD REGULARIZATION
# WEIGHT NOISE
weight_noise = args.weight_noise
if weight_noise > 0:
weights = VariableFilter(roles=[WEIGHT])(cg.variables)
cg_train = apply_noise(cg, weights, weight_noise)
cost = cg_train.outputs[0]
cost.name = "cost_with_weight_noise"
cg = ComputationGraph(cost)
logger.info(cg.parameters)
# Define algorithm
algorithm = GradientDescent(cost=cost, step_rule=step_rule,
parameters=cg.parameters)
# Add the updates to carry the hidden state
algorithm.add_updates(updates)
# Extensions to be added
extensions = []
# Load from a dumped model
if args.load_path is not None:
extensions.append(Load(args.load_path))
# Generation extension
if args.generate:
extensions.append(TextGenerationExtension(
cost=cost,
generation_length=args.generated_text_lenght,
initial_text_length=args.initial_text_length,
every_n_batches=1,
ploting_path=os.path.join(args.save_path, 'prob_plot.png'),
softmax_sampling=args.softmax_sampling,
dataset=args.dataset,
updates=updates,
interactive_mode=args.interactive_mode))
# Training and Validation score monitoring
extensions.extend([
TrainingDataMonitoring([cost], prefix='train',
every_n_batches=args.monitoring_freq),
DataStreamMonitoring([cost, unregularized_cost],
valid_stream, args.mini_batch_size_valid,
args.dataset,
state_updates=updates,
prefix='valid',
before_first_epoch=(args.visualize == "nothing"),
every_n_batches=args.monitoring_freq)])
# Creating directory for saving model.
if not args.interactive_mode:
if not os.path.exists(args.save_path):
os.makedirs(args.save_path)
elif 'test' in args.save_path:
print "Rewriting in " + args.save_path
else:
raise Exception('Directory already exists')
# Early stopping
extensions.append(EarlyStopping('valid_' + unregularized_cost.name,
args.patience, args.save_path,
every_n_batches=args.monitoring_freq))
# Printing
extensions.append(ProgressBar())
extensions.append(Printing(every_n_batches=args.monitoring_freq))
# Reset the initial states
if args.dataset == "sine":
reset_frequency = 1
else:
reset_frequency = 100
extensions.append(ResetStates([v for v, _ in updates],
every_n_batches=reset_frequency))
# Visualizing extensions
if args.interactive_mode:
extensions.append(InteractiveMode())
main_loop = MainLoop(
model=Model(cost),
data_stream=train_stream,
algorithm=algorithm,
extensions=extensions
)
main_loop.run()
示例13: Model
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
# Build datastream
train_stream = datastream.setup_datastream(config.dataset,
config.num_seqs,
config.seq_len,
config.seq_div_size)
# Build model
m = config.Model(config)
# Train the model
cg = Model(m.sgd_cost)
algorithm = GradientDescent(cost=m.sgd_cost,
step_rule=config.step_rule,
parameters=cg.parameters)
algorithm.add_updates(m.states)
monitor_vars = list(set(v for p in m.monitor_vars for v in p))
extensions = [
ProgressBar(),
TrainingDataMonitoring(
monitor_vars,
prefix='train', every_n_batches=config.monitor_freq),
Printing(every_n_batches=config.monitor_freq, after_epoch=False),
ResetStates([v for v, _ in m.states], after_epoch=True)
]
if plot_avail:
plot_channels = [['train_' + v.name for v in p] for p in m.monitor_vars]
extensions.append(
Plot(document='text_'+model_name,
示例14: train
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
def train(ladder, batch_size=100, labeled_samples=100,
unlabeled_samples=50000, valid_set_size=10000,
num_epochs=150, valid_batch_size=100, lrate_decay=0.67,
save_path='results/mnist_100_full0'):
# Setting Logger
log_path = os.path.join(save_path, 'log.txt')
fh = logging.FileHandler(filename=log_path)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
logger.info('Logging into %s' % log_path)
# Training
all_params = ComputationGraph([ladder.costs.total]).parameters
logger.info('Found the following parameters: %s' % str(all_params))
training_algorithm = GradientDescent(
cost=ladder.costs.total, params=all_params,
step_rule=Adam(learning_rate=ladder.lr))
# Fetch all batch normalization updates. They are in the clean path.
# In addition to actual training, also do BN variable approximations
bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
training_algorithm.add_updates(bn_updates)
monitored_variables = [
ladder.costs.class_corr, ladder.costs.class_clean,
ladder.error, training_algorithm.total_gradient_norm,
ladder.costs.total] + ladder.costs.denois.values()
data = get_mnist_data_dict(unlabeled_samples=unlabeled_samples,
valid_set_size=valid_set_size)
train_data_stream = make_datastream(
data.train, data.train_ind, batch_size,
n_labeled=labeled_samples,
n_unlabeled=unlabeled_samples)
valid_data_stream = make_datastream(
data.valid, data.valid_ind, valid_batch_size,
n_labeled=len(data.valid_ind),
n_unlabeled=len(data.valid_ind))
train_monitoring = TrainingDataMonitoring(
variables=monitored_variables,
prefix="train",
after_epoch=True)
valid_monitoring = DataStreamMonitoring(
variables=monitored_variables,
data_stream=valid_data_stream,
prefix="valid",
after_epoch=True)
main_loop = MainLoop(
algorithm=training_algorithm,
data_stream=train_data_stream,
model=Model(ladder.costs.total),
extensions=[
train_monitoring,
valid_monitoring,
FinishAfter(after_n_epochs=num_epochs),
SaveParams(None, all_params, save_path, after_epoch=True),
SaveLog(save_path, after_training=True),
LRDecay(lr=ladder.lr,
decay_first=num_epochs * lrate_decay,
decay_last=num_epochs,
after_epoch=True),
Printing()])
main_loop.run()
示例15: main
# 需要导入模块: from blocks.algorithms import GradientDescent [as 别名]
# 或者: from blocks.algorithms.GradientDescent import add_updates [as 别名]
#.........这里部分代码省略.........
bn_cost.name = 'cost'
bn_error_rate = MisclassificationRate().apply(y, bn_p[-1])
bn_error_rate.name = 'error_rate'
# ------------------------------------------------------------
bn_cg = ComputationGraph([bn_cost, bn_error_rate])
# Prepare algorithm
algorithm = GradientDescent(
cost=bn_cg.outputs[0],
on_unused_sources='ignore',
parameters=bn_cg.parameters,
step_rule=CompositeRule([
RemoveNotFinite(),
StepClipping(10.),
Adam(learning_rate)
])
)
pop_updates = get_batch_normalization_updates(bn_cg)
update_params = [conv1_bn.population_mean, conv1_bn.population_stdev, conv2_bn.population_mean, conv2_bn.population_stdev, conv3_bn.population_mean,
conv3_bn.population_stdev, conv4_bn.population_mean, conv4_bn.population_stdev, conv5_bn.population_mean, conv5_bn.population_stdev,
conv6_bn.population_mean, conv6_bn.population_stdev, conv_mlp_bn.population_mean, conv_mlp_bn.population_stdev,
loc_mlp_bn.population_mean, loc_mlp_bn.population_stdev, classification_mlp1_bn.population_mean, classification_mlp1_bn.population_stdev,
classification_mlp2_bn.population_mean, classification_mlp2_bn.population_stdev]
update_values = [m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, m_c_bn, s_c_bn, m_l_bn, s_l_bn,
m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn]
pop_updates.extend([(p, m) for p, m in zip(update_params, update_values)])
decay_rate = 0.05
extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates]
algorithm.add_updates(extra_updates)
# ------------------------------------------------------------------------
# Setup monitors
p, l = edram.calculate_test(x, x_coarse)
cost_where, cost_y = compute_cost(p, wr, y, l)
cost = cost_y + cost_where
cost = cost.sum(axis=0)
cost = cost.mean()
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y, p[-1])
error_rate.name = 'error_rate'
monitors = [cost, error_rate]
plotting_extensions = []
# Live plotting...
if live_plotting:
plot_channels = [
['train_cost', 'test_cost'],
['train_error_rate', 'test_error_rate'],
]
plotting_extensions = [
Plot(subdir, channels=plot_channels, server_url='http://155.69.150.60:80/')
]
# ------------------------------------------------------------
mnist_cluttered_train = MNISTCluttered(which_sets=['train'], sources=('features', 'locations', 'labels'))
mnist_cluttered_test = MNISTCluttered(which_sets=['test'], sources=('features', 'locations', 'labels'))
main_loop = MainLoop(