本文整理汇总了Python中blocks.model.Model.get_parameter_dict方法的典型用法代码示例。如果您正苦于以下问题:Python Model.get_parameter_dict方法的具体用法?Python Model.get_parameter_dict怎么用?Python Model.get_parameter_dict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.model.Model
的用法示例。
在下文中一共展示了Model.get_parameter_dict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_model_handles_brickless_parameteres
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def test_model_handles_brickless_parameteres():
x = tensor.matrix('x')
v = shared_floatx(numpy.zeros((10, 10)), name='V')
add_role(v, PARAMETER)
y = x.dot(v)
model = Model(y)
assert list(model.get_parameter_dict().items()) == [('V', v)]
示例2: evaluate
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def evaluate(model, load_path):
with open(load_path + '/trained_params_best.npz') as f:
loaded = np.load(f)
blocks_model = Model(model.cost)
params_dicts = blocks_model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find('/')
param_name = param_name[slash_index + 1:]
assert param.get_value().shape == loaded[param_name].shape
param.set_value(loaded[param_name])
train_data_stream, valid_data_stream = get_cmv_v2_streams(100)
# T x B x F
data = train_data_stream.get_epoch_iterator().next()
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.location, model.scale],
on_unused_input='ignore',
allow_input_downcast=True)
res = f(data[1], data[0])
for i in range(10):
visualize_attention(data[0][:, i, :],
res[0][:, i, :], res[1][:, i, :], prefix=str(i))
示例3: evaluate
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def evaluate(model, load_path):
with open(load_path + '/trained_params_best.npz') as f:
loaded = np.load(f)
blocks_model = Model(model)
params_dicts = blocks_model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
assert param.get_value().shape == loaded[param_name].shape
param.set_value(loaded[param_name])
示例4: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def main():
import configurations
from stream import DStream
logger = logging.getLogger(__name__)
cfig = getattr(configurations, 'get_config_penn')()
rnnlm = Rnnlm(cfig['vocabsize'], cfig['nemb'], cfig['nhids'])
rnnlm.weights_init = IsotropicGaussian(0.1)
rnnlm.biases_init = Constant(0.)
rnnlm.push_initialization_config()
rnnlm.generator.transition.weights_init = Orthogonal()
sentence = tensor.lmatrix('sentence')
sentence_mask = tensor.matrix('sentence_mask')
batch_cost = rnnlm.cost(sentence, sentence_mask).sum()
batch_size = sentence.shape[1].copy(name='batch_size')
cost = aggregation.mean(batch_cost, batch_size)
cost.name = "sequence_log_likelihood"
logger.info("Cost graph is built")
model = Model(cost)
parameters = model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in parameters.items()],
width=120))
for brick in model.get_top_bricks():
brick.initialize()
cg = ComputationGraph(cost)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))
gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
step_norm = aggregation.mean(algorithm.total_step_norm)
monitored_vars = [cost, gradient_norm, step_norm]
train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
before_first_epoch=True, prefix='tra')
extensions = [train_monitor, Timing(), Printing(after_batch=True),
FinishAfter(after_n_epochs=1000),
Printing(every_n_batches=1)]
train_stream = DStream(datatype='train', config=cfig)
main_loop = MainLoop(model=model,
data_stream=train_stream,
algorithm=algorithm,
extensions=extensions)
main_loop.run()
示例5: test_model
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def test_model():
x = tensor.matrix('x')
mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
h1 = mlp1.apply(x)
h2 = mlp2.apply(h1)
model = Model(h2)
assert model.get_top_bricks() == [mlp1, mlp2]
# The order of parameters returned is deterministic but
# not sensible.
assert list(model.get_parameter_dict().items()) == [
('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]
# Test getting and setting parameter values
mlp3 = MLP([Tanh()], [10, 10])
mlp3.allocate()
model3 = Model(mlp3.apply(x))
parameter_values = {
'/mlp/linear_0.W': 2 * numpy.ones((10, 10),
dtype=theano.config.floatX),
'/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
model3.set_parameter_values(parameter_values)
assert numpy.all(
mlp3.linear_transformations[0].parameters[0].get_value() == 2)
assert numpy.all(
mlp3.linear_transformations[0].parameters[1].get_value() == 3)
got_parameter_values = model3.get_parameter_values()
assert len(got_parameter_values) == len(parameter_values)
for name, value in parameter_values.items():
assert_allclose(value, got_parameter_values[name])
# Test exception is raised if parameter shapes don't match
def helper():
parameter_values = {
'/mlp/linear_0.W': 2 * numpy.ones((11, 11),
dtype=theano.config.floatX),
'/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
model3.set_parameter_values(parameter_values)
assert_raises(ValueError, helper)
# Test name conflict handling
mlp4 = MLP([Tanh()], [10, 10])
def helper():
Model(mlp4.apply(mlp3.apply(x)))
assert_raises(ValueError, helper)
示例6: evaluate
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def evaluate(model, load_path, configs):
with open(load_path + "trained_params_best.npz") as f:
loaded = np.load(f)
blocks_model = Model(model.cost)
params_dicts = blocks_model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find("/")
param_name = param_name[slash_index + 1 :]
assert param.get_value().shape == loaded[param_name].shape
param.set_value(loaded[param_name])
inps = ComputationGraph(model.error_rate).inputs
eval_function = theano.function(inps, [model.error_rate, model.probabilities])
_, vds = configs["get_streams"](100)
data = vds.get_epoch_iterator().next()
print "Valid_ER: " + str(eval_function(data[0], data[2], data[1])[0])
return eval_function
示例7: evaluate
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def evaluate(ladder, load_path):
with open(load_path + '/trained_params_best.npz') as f:
loaded = np.load(f)
model = Model(ladder.costs.total)
params_dicts = model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find('/')
param_name = param_name[slash_index + 1:]
assert param.get_value().shape == loaded[param_name].shape
param.set_value(loaded[param_name])
test_data_stream, test_data_stream = get_mixed_streams(10000)
test_data = test_data_stream.get_epoch_iterator().next()
test_data_input = test_data[10]
test_data_target = test_data[0]
print 'Compiling ...'
cg = ComputationGraph([ladder.costs.total])
eval_ = theano.function(cg.inputs, ladder.error)
print 'Test_set_Error: ' + str(eval_(test_data_input, test_data_target))
import ipdb
ipdb.set_trace()
示例8: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def main():
nclasses = 27
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=1)
parser.add_argument("--length", type=int, default=180)
parser.add_argument("--num-epochs", type=int, default=100)
parser.add_argument("--batch-size", type=int, default=64)
parser.add_argument("--learning-rate", type=float, default=1e-3)
parser.add_argument("--epsilon", type=float, default=1e-5)
parser.add_argument("--num-hidden", type=int, default=1000)
parser.add_argument("--baseline", action="store_true")
parser.add_argument("--initialization", choices="identity glorot orthogonal uniform".split(), default="identity")
parser.add_argument("--initial-gamma", type=float, default=1e-1)
parser.add_argument("--initial-beta", type=float, default=0)
parser.add_argument("--cluster", action="store_true")
parser.add_argument("--activation", choices=list(activations.keys()), default="tanh")
parser.add_argument("--optimizer", choices="sgdmomentum adam rmsprop", default="rmsprop")
parser.add_argument("--continue-from")
parser.add_argument("--evaluate")
parser.add_argument("--dump-hiddens")
args = parser.parse_args()
np.random.seed(args.seed)
blocks.config.config.default_seed = args.seed
if args.continue_from:
from blocks.serialization import load
main_loop = load(args.continue_from)
main_loop.run()
sys.exit(0)
graphs, extensions, updates = construct_graphs(args, nclasses)
### optimization algorithm definition
if args.optimizer == "adam":
optimizer = Adam(learning_rate=args.learning_rate)
elif args.optimizer == "rmsprop":
optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=0.9)
elif args.optimizer == "sgdmomentum":
optimizer = Momentum(learning_rate=args.learning_rate, momentum=0.99)
step_rule = CompositeRule([StepClipping(1.0), optimizer])
algorithm = GradientDescent(
cost=graphs["training"].outputs[0], parameters=graphs["training"].parameters, step_rule=step_rule
)
algorithm.add_updates(updates["training"])
model = Model(graphs["training"].outputs[0])
extensions = extensions["training"] + extensions["inference"]
# step monitor
step_channels = []
step_channels.extend(
[
algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
for name, param in model.get_parameter_dict().items()
]
)
step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
step_channels.extend(graphs["training"].outputs)
logger.warning("constructing training data monitor")
extensions.append(TrainingDataMonitoring(step_channels, prefix="iteration", after_batch=True))
# parameter monitor
extensions.append(
DataStreamMonitoring(
[param.norm(2).copy(name="parameter.norm:%s" % name) for name, param in model.get_parameter_dict().items()],
data_stream=None,
after_epoch=True,
)
)
validation_interval = 500
# performance monitor
for situation in "training inference".split():
if situation == "inference" and not args.evaluate:
# save time when we don't need the inference graph
continue
for which_set in "train valid test".split():
logger.warning("constructing %s %s monitor" % (which_set, situation))
channels = list(graphs[situation].outputs)
extensions.append(
DataStreamMonitoring(
channels,
prefix="%s_%s" % (which_set, situation),
every_n_batches=validation_interval,
data_stream=get_stream(
which_set=which_set, batch_size=args.batch_size, num_examples=10000, length=args.length
),
)
)
extensions.extend(
[
TrackTheBest("valid_training_error_rate", "best_valid_training_error_rate"),
DumpBest("best_valid_training_error_rate", "best.zip"),
#.........这里部分代码省略.........
示例9: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
#.........这里部分代码省略.........
batch_cost,
features.shape[1])
cost.name = "sequence_log_likelihood"
char_cost = aggregation.mean(
batch_cost, features_mask.sum())
char_cost.name = 'character_log_likelihood'
ppl = 2 ** (cost / numpy.log(2))
ppl.name = 'ppl'
bits_per_char = char_cost / tensor.log(2)
bits_per_char.name = 'bits_per_char'
length = features.shape[0]
length.name = 'length'
model = Model(batch_cost)
if load_params:
params = load_parameter_values(save_path)
model.set_parameter_values(params)
if mode == "train":
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_parameters().items()],
width=120))
train_stream = train_dataset.get_example_stream()
train_stream = Mapping(train_stream, _truncate)
train_stream = Batch(train_stream,
iteration_scheme=ConstantScheme(batch_size))
train_stream = Padding(train_stream)
train_stream = Mapping(train_stream, _transpose)
parameters = model.get_parameter_dict()
maxnorm_subjects = VariableFilter(roles=[WEIGHT])(parameters.values())
algorithm = GradientDescent(
cost=batch_cost,
parameters=parameters.values(),
step_rule=CompositeRule([StepClipping(1000.),
AdaDelta(epsilon=1e-8) #, Restrict(VariableClipping(1.0, axis=0), maxnorm_subjects)
]))
ft = features[:6, 0]
ft.name = 'feature_example'
observables = [cost, ppl, char_cost, length, bits_per_char]
for name, param in parameters.items():
num_elements = numpy.product(param.get_value().shape)
norm = param.norm(2) / num_elements ** 0.5
grad_norm = algorithm.gradients[param].norm(2) / num_elements ** 0.5
step_norm = algorithm.steps[param].norm(2) / num_elements ** 0.5
stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm)
stats.name = name + '_stats'
observables.append(stats)
track_the_best_bpc = TrackTheBest('valid_bits_per_char')
root_path, extension = os.path.splitext(save_path)
this_step_monitoring = TrainingDataMonitoring(
observables + [ft], prefix="this_step", after_batch=True)
average_monitoring = TrainingDataMonitoring(
observables + [algorithm.total_step_norm,
algorithm.total_gradient_norm],
prefix="average",
every_n_batches=10)
valid_monitoring = DataStreamMonitoring(
observables, prefix="valid",
every_n_batches=1500, before_training=False,
示例10: LSTMModel
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
model = LSTMModel(len(vocabs['word']), n_mem, len(vocabs['rel']))
cg = ComputationGraph(model.cost)
bricks_model = Model(model.cost)
for brick in bricks_model.get_top_bricks():
brick.initialize()
model.lookup.W.set_value(vocabs['word'].get_embeddings().astype(theano.config.floatX))
if dropout:
pass
# logger.info('Applying dropout of {}'.format(dropout))
# lstm_dropout = [v for v in cg.intermediary_variables if v.name in {'W_cell_to_in', 'W_cell_to_out'}]
# cg = apply_dropout(cg, lstm_dropout, drop_prob=dropout)
# summary of what's going on
parameters = bricks_model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape, value.get_value().mean()) for key, value
in parameters.items()],
width=120))
algorithm = GradientDescent(cost=model.cost, parameters=cg.parameters, step_rule=Adam())
# Fetch variables useful for debugging
observables = [model.cost, model.acc, algorithm.total_step_norm, algorithm.total_gradient_norm ]
for name, parameter in parameters.items():
observables.append(parameter.norm(2).copy(name=name + "_norm"))
observables.append(algorithm.gradients[parameter].norm(2).copy(name=name + "_grad_norm"))
train_monitor = TrainingDataMonitoring(variables=observables, prefix="train", after_batch=True)
示例11: CompositeRule
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
step_rule = CompositeRule([clipping, rms_prop, rm_non_finite])
algorithm = GradientDescent(
cost=cost,
parameters=params,
step_rule=step_rule)
# train_stream, valid_stream = get_seq_mnist_streams(
# h_dim, batch_size, update_prob)
train_stream = get_stream('train', batch_size, h_dim, False)
train_stream_evaluation = get_stream('train', batch_size, h_dim, True)
valid_stream = get_stream('valid', batch_size, h_dim, True)
if load_path:
with open(load_path + '/trained_params_best.npz') as f:
loaded = np.load(f)
params_dicts = model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find('/')
param_name = param_name[slash_index + 1:]
if param.get_value().shape == loaded[param_name].shape:
print param
param.set_value(loaded[param_name])
else:
print param_name
f = theano.function([x, drops, is_for_test, y], error_rate)
data_train = train_stream.get_epoch_iterator(as_dict=True).next()
data_train_eval = train_stream_evaluation.get_epoch_iterator(
as_dict=True).next()
示例12: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def main(mode, save_path, num_batches, data_path=None):
reverser = WordReverser(100, len(char2code), name="reverser")
if mode == "train":
# Data processing pipeline
dataset_options = dict(dictionary=char2code, level="character",
preprocess=_lower)
if data_path:
dataset = TextFile(data_path, **dataset_options)
else:
dataset = OneBillionWord("training", [99], **dataset_options)
data_stream = dataset.get_example_stream()
data_stream = Filter(data_stream, _filter_long)
data_stream = Mapping(data_stream, reverse_words,
add_sources=("targets",))
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(10))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
# Initialization settings
reverser.weights_init = IsotropicGaussian(0.1)
reverser.biases_init = Constant(0.0)
reverser.push_initialization_config()
reverser.encoder.weights_init = Orthogonal()
reverser.generator.transition.weights_init = Orthogonal()
# Build the cost computation graph
chars = tensor.lmatrix("features")
chars_mask = tensor.matrix("features_mask")
targets = tensor.lmatrix("targets")
targets_mask = tensor.matrix("targets_mask")
batch_cost = reverser.cost(
chars, chars_mask, targets, targets_mask).sum()
batch_size = chars.shape[1].copy(name="batch_size")
cost = aggregation.mean(batch_cost, batch_size)
cost.name = "sequence_log_likelihood"
logger.info("Cost graph is built")
# Give an idea of what's going on
model = Model(cost)
parameters = model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in parameters.items()],
width=120))
# Initialize parameters
for brick in model.get_top_bricks():
brick.initialize()
# Define the training algorithm.
cg = ComputationGraph(cost)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))
# Fetch variables useful for debugging
generator = reverser.generator
(energies,) = VariableFilter(
applications=[generator.readout.readout],
name_regex="output")(cg.variables)
(activations,) = VariableFilter(
applications=[generator.transition.apply],
name=generator.transition.apply.states[0])(cg.variables)
max_length = chars.shape[0].copy(name="max_length")
cost_per_character =
aggregation.mean(batch_cost, batch_size * max_length).copy(
name="character_log_likelihood")
min_energy = energies.min().copy(name="min_energy")
max_energy = energies.max().copy(name="max_energy")
mean_activation = abs(activations).mean() .copy(name="mean_activation")
observables = [
cost, min_energy, max_energy, mean_activation,
batch_size, max_length, cost_per_character,
algorithm.total_step_norm, algorithm.total_gradient_norm]
for name, parameter in parameters.items():
observables.append( parameter.norm(2)
.copy(name=name + "_norm"))
observables.append( algorithm.gradients[parameter].norm(2)
.copy(name=name + "_grad_norm"))
# Construct the main loop and start training!
average_monitoring = TrainingDataMonitoring(
observables, prefix="average", every_n_batches=10)
main_loop = MainLoop(
model=model,
data_stream=data_stream,
algorithm=algorithm,
extensions=[
Timing(),
TrainingDataMonitoring(observables, after_batch=True),
average_monitoring,
FinishAfter(after_n_batches=num_batches)
# This shows a way to handle NaN emerging during
# training: simply finish it.
.add_condition(["after_batch"], _is_nan),
# Saving the model and the log separately is convenient,
# because loading the whole pickle takes quite some time.
Checkpoint(save_path, every_n_batches=500,
#.........这里部分代码省略.........
示例13: train
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
#.........这里部分代码省略.........
step_rule = CompositeRule([clipping, sgd])
else:
raise NotImplementedError
algorithm = GradientDescent(step_rule=step_rule,
cost=cost_train,
parameters=cg_train.parameters)
# theano_func_kwargs={"mode": theano.compile.MonitorMode(post_func=detect_nan)})
algorithm.add_updates(init_updates)
def cond_number(x):
_, _, sing_vals = T.nlinalg.svd(x, True, True)
sing_mags = abs(sing_vals)
return T.max(sing_mags) / T.min(sing_mags)
def rms(x):
return (x*x).mean().sqrt()
whysplode_cond = []
whysplode_rms = []
for i, p in enumerate(init_updates):
v = p.get_value()
if p.get_value().shape == 2:
whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
for i, p in enumerate(cg_train.parameters):
v = p.get_value()
if p.get_value().shape == 2:
whysplode_cond.append(cond_number(p).copy('ini%d:%s_cond(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
whysplode_rms.append(rms(p).copy('ini%d:%s_rms(%s)'%(i, p.name, "x".join(map(str, p.get_value().shape)))))
observed_vars = [cost_train, cost, bpc, perp, learning_rate,
aggregation.mean(algorithm.total_gradient_norm).copy("gradient_norm_mean")] # + whysplode_rms
parameters = model.get_parameter_dict()
for name, param in parameters.iteritems():
observed_vars.append(param.norm(2).copy(name=name + "_norm"))
observed_vars.append(
algorithm.gradients[param].norm(2).copy(name=name + "_grad_norm"))
train_monitor = TrainingDataMonitoring(
variables=observed_vars,
prefix="train", after_epoch=True
)
dev_inits = [p.clone() for p in init_updates]
cg_dev = ComputationGraph([cost, bpc, perp] + init_updates.values()).replace(zip(init_updates.keys(), dev_inits))
dev_cost, dev_bpc, dev_perp = cg_dev.outputs[:3]
dev_init_updates = OrderedDict(zip(dev_inits, cg_dev.outputs[3:]))
dev_monitor = DataStreamMonitoring(
variables=[dev_cost, dev_bpc, dev_perp],
data_stream=valid_stream, prefix="dev",
updates=dev_init_updates
)
# noone does this
if 'load_path' in kwargs:
with open(kwargs['load_path']) as f:
loaded = np.load(f)
model = Model(cost_train)
params_dicts = model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find('/')
示例14: evaluate
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
def evaluate(model, load_path, plot):
with open(load_path + 'trained_params_best.npz') as f:
loaded = np.load(f)
blocks_model = Model(model.cost)
params_dicts = blocks_model.get_parameter_dict()
params_names = params_dicts.keys()
for param_name in params_names:
param = params_dicts[param_name]
# '/f_6_.W' --> 'f_6_.W'
slash_index = param_name.find('/')
param_name = param_name[slash_index + 1:]
assert param.get_value().shape == loaded[param_name].shape
param.set_value(loaded[param_name])
if plot:
train_data_stream, valid_data_stream = get_streams(20)
# T x B x F
data = train_data_stream.get_epoch_iterator().next()
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.location, model.scale],
on_unused_input='ignore',
allow_input_downcast=True)
res = f(data[1], data[0])
for i in range(10):
visualize_attention(data[0][:, i, :],
res[0][:, i, :], res[1][:, i, :],
image_shape=(512, 512), prefix=str(i))
plot_curves(path=load_path,
to_be_plotted=['train_categoricalcrossentropy_apply_cost',
'valid_categoricalcrossentropy_apply_cost'],
yaxis='Cross Entropy',
titles=['train', 'valid'],
main_title='CE')
plot_curves(path=load_path,
to_be_plotted=['train_learning_rate',
'train_learning_rate'],
yaxis='lr',
titles=['train', 'train'],
main_title='lr')
plot_curves(path=load_path,
to_be_plotted=['train_total_gradient_norm',
'valid_total_gradient_norm'],
yaxis='GradientNorm',
titles=['train', 'valid'],
main_title='GradientNorm')
for grad in ['_total_gradient_norm',
'_total_gradient_norm',
'_/lstmattention.W_patch_grad_norm',
'_/lstmattention.W_state_grad_norm',
'_/lstmattention.initial_cells_grad_norm',
'_/lstmattention.initial_location_grad_norm',
'_/lstmattention/lstmattention_mlp/linear_0.W_grad_norm',
'_/lstmattention/lstmattention_mlp/linear_1.W_grad_norm',
'_/mlp/linear_0.W_grad_norm',
'_/mlp/linear_1.W_grad_norm']:
plot_curves(path=load_path,
to_be_plotted=['train' + grad,
'valid' + grad],
yaxis='GradientNorm',
titles=['train',
'valid'],
main_title=grad.replace(
"_", "").replace("/", "").replace(".", ""))
plot_curves(path=load_path,
to_be_plotted=[
'train_misclassificationrate_apply_error_rate',
'valid_misclassificationrate_apply_error_rate'],
yaxis='Error rate',
titles=['train', 'valid'],
main_title='Error')
print 'plot printed'
示例15: RMSProp
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_parameter_dict [as 别名]
RMSProp(learning_rate=args.learning_rate, decay_rate=0.5),
])
algorithm = GradientDescent(cost=graphs["training"].outputs[0],
parameters=graphs["training"].parameters,
step_rule=step_rule)
algorithm.add_updates(updates["training"])
model = Model(graphs["training"].outputs[0])
extensions = extensions["training"] + extensions["inference"]
# step monitor (after epoch to limit the log size)
step_channels = []
step_channels.extend([
algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
for name, param in model.get_parameter_dict().items()])
step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
step_channels.extend(graphs["training"].outputs)
logger.warning("constructing training data monitor")
extensions.append(TrainingDataMonitoring(
step_channels, prefix="iteration", after_batch=False))
# parameter monitor
extensions.append(DataStreamMonitoring(
[param.norm(2).copy(name="parameter.norm:%s" % name)
for name, param in model.get_parameter_dict().items()],
data_stream=None, after_epoch=True))
# performance monitor
for situation in "training".split(): # add inference