本文整理汇总了Python中theano.compat.python2x.OrderedDict.values方法的典型用法代码示例。如果您正苦于以下问题:Python OrderedDict.values方法的具体用法?Python OrderedDict.values怎么用?Python OrderedDict.values使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类theano.compat.python2x.OrderedDict
的用法示例。
在下文中一共展示了OrderedDict.values方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def main():
var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W')
updates = [(var, add_uniform(input=var, noise_level=.02))]
stats = get_stats(var)
l1 = stats.pop('l1')
l2 = stats.pop('l2')
min = stats.pop('min')
max = stats.pop('max')
var = stats.pop('var')
std = stats.pop('std')
mean = stats.pop('mean')
mean_monitor = Monitor('mean', mean, train=True, valid=True, out_service=FileService('outs/mean.txt'))
var_monitor = Monitor('var', var, out_service=FileService('outs/var.txt'))
w_channel = MonitorsChannel('W', monitors=mean_monitor)
stat_channel = MonitorsChannel('stats', monitors=[var_monitor])
monitors = [w_channel, stat_channel]
train_collapsed_raw = collapse_channels(monitors, train=True)
train_collapsed = OrderedDict([(item[0], item[1]) for item in train_collapsed_raw])
train_services = OrderedDict([(item[0], item[2]) for item in train_collapsed_raw])
valid_collapsed_raw = collapse_channels(monitors, valid=True)
valid_collapsed = OrderedDict([(item[0], item[1]) for item in valid_collapsed_raw])
valid_services = OrderedDict([(item[0], item[2]) for item in valid_collapsed_raw])
log.debug('compiling...')
f = theano.function(inputs=[], outputs=train_collapsed.values(), updates=updates)
f2 = theano.function(inputs=[], outputs=valid_collapsed.values(), updates=updates)
log.debug('done')
t1=time.time()
for epoch in range(10):
t=time.time()
log.debug(epoch)
vals = f()
m = OrderedDict(zip(train_collapsed.keys(), vals))
for name, service in train_services.items():
if name in m:
service.write(m[name], TRAIN)
log.debug('----- '+make_time_units_string(time.time()-t))
for epoch in range(10):
t = time.time()
log.debug(epoch)
vals = f2()
m = OrderedDict(zip(valid_collapsed.keys(), vals))
for name, service in valid_services.items():
if name in m:
service.write(m[name], VALID)
log.debug('----- ' + make_time_units_string(time.time() - t))
log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
示例2: main
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def main():
w = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W')
updates = [(w, add_uniform(input=w, noise_level=.02))]
stats = get_stats(w)
l1 = stats.pop('l1')
l2 = stats.pop('l2')
min = stats.pop('min')
max = stats.pop('max')
var = stats.pop('var')
std = stats.pop('std')
mean = stats.pop('mean')
mean_monitor = Monitor('mean', mean, train=True, valid=True)
stat_monitor = Monitor('max', max)
w_channel = MonitorsChannel('W', monitors=mean_monitor)
stat_channel = MonitorsChannel('stats', monitors=[stat_monitor])
monitors = [w_channel, stat_channel]
train_collapsed = collapse_channels(monitors, train=True)
train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed])
valid_collapsed = collapse_channels(monitors, valid=True)
valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed])
plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True)
log.debug('compiling...')
f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates)
f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates)
log.debug('done')
t1=time.time()
for epoch in range(100):
t=time.time()
log.debug(epoch)
vals = f()
m = OrderedDict(zip(train_collapsed.keys(), vals))
plot.update_plots(epoch, m)
time.sleep(0.02)
log.debug('----- '+make_time_units_string(time.time()-t))
for epoch in range(100):
t = time.time()
log.debug(epoch)
vals = f2()
m = OrderedDict(zip(valid_collapsed.keys(), vals))
plot.update_plots(epoch, m)
time.sleep(0.02)
log.debug('----- ' + make_time_units_string(time.time() - t))
log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
示例3: get_gradients
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def get_gradients(self, model, data, ** kwargs):
cost = self.expr(model=model, data=data, **kwargs)
params = list(model.get_params())
grads = T.grad(cost, params, disconnected_inputs='ignore')
gradients = OrderedDict(izip(params, grads))
if self.gradient_clipping:
norm_gs = 0.
for grad in gradients.values():
norm_gs += (grad ** 2).sum()
not_finite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
norm_gs = T.sqrt(norm_gs)
norm_gs = T.switch(T.ge(norm_gs, self.max_magnitude),
self.max_magnitude / norm_gs,
1.)
for param, grad in gradients.items():
gradients[param] = T.switch(not_finite,
.1 * param,
grad * norm_gs)
updates = OrderedDict()
return gradients, updates
示例4: get_params
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def get_params(self):
"""
This returns the list of theano shared variables that will be trained by the :class:`Optimizer`.
These parameters are used in the gradient.
This includes all of the parameters in every model in the Prototype, without duplication.
Returns
-------
dict(str: SharedVariable)
Dictionary of {string_name: theano shared variables} to be trained with an :class:`Optimizer`.
These are the parameters to be trained.
"""
params = OrderedDict()
model_index = 0
for model in self.models:
if isinstance(model, Model):
model_params = model.get_params()
# append the parameters only if they aren't already in the list!
for name, param in model_params.items():
if param not in list(params.values()):
name = model._classname + '_%d_' % model_index + name
params[name] = param
model_index += 1
return params
示例5: __init__
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def __init__(self, valid=None, invalid=None, valid_equivalent=None):
'''
Check if variables can be expressed without using variables in invalid.
init_valid_equivalent provides a dictionary mapping some invalid
variables to valid ones that can be used instead.
'''
if valid is None:
valid = []
if invalid is None:
invalid = []
if valid_equivalent is None:
valid_equivalent = OrderedDict()
# Nodes that are valid to have in the graph computing outputs
self.valid = set(valid)
# Nodes that are NOT valid to have in the graph computing outputs
self.invalid = set(invalid)
# Mapping from invalid variables to equivalent valid ones.
self.valid_equivalent = valid_equivalent.copy()
self.valid.update(valid_equivalent.values())
self.invalid.update(valid_equivalent.keys())
示例6: get_lr_scalers
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def get_lr_scalers(self):
rval = OrderedDict()
params = self.get_params()
for layer in self.layers[:-1]:
contrib = layer.get_lr_scalers()
assert isinstance(contrib, OrderedDict)
# No two layers can contend to scale a parameter
assert not any([key in rval for key in contrib])
# Don't try to scale anything that's not a parameter
assert all([key in params for key in contrib])
rval.update(contrib)
for layer in self.layers[-1]:
contrib = layer.get_lr_scalers()
assert isinstance(contrib, OrderedDict)
# No two layers can contend to scale a parameter
assert not any([key in rval for key in contrib])
# Don't try to scale anything that's not a parameter
assert all([key in params for key in contrib])
rval.update(contrib)
assert all([isinstance(val, float) for val in rval.values()])
return rval
示例7: get_funcs
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def get_funcs(self, learning_rate, grads, inp, cost, errors, lr_scalers=None):
"""
Provides the updates for learning with gradient descent + momentum.
Parameters
----------
learning_rate : float
Learning rate coefficient.
grads : dict
A dictionary mapping from the model's parameters to their
gradients.
lr_scalers : dict
A dictionary mapping from the model's parameters to a learning
rate multiplier.
"""
gshared = OrderedDict({p: sharedX(p.get_value() * 0.,
name='%s_grad' % p.name)
for p, g in grads.iteritems()})
gsup = [(gs, g) for gs, g in zip(gshared.values(), grads.values())]
get_norms = lambda x: T.sqrt(sum(map(lambda y: (y**2).sum(), x)))
gnorm = get_norms(grads.values())
pnorm = get_norms(grads.keys())
f_grad_shared = theano.function(inp,
[cost, errors, gnorm, pnorm],
updates=gsup)
updates = OrderedDict()
for param, grad in gshared.keys():
vel = sharedX(param.get_value() * 0.)
assert param.dtype == vel.dtype
assert grad.dtype == param.dtype
if param.name is not None:
vel.name = 'vel_' + param.name
scaled_lr = learning_rate * lr_scalers.get(param, 1.)
updates[vel] = self.momentum * vel - scaled_lr * grad
inc = updates[vel]
if self.nesterov_momentum:
inc = self.momentum * inc - scaled_lr * grad
assert inc.dtype == vel.dtype
updates[param] = param + inc
f_update = theano.function([learning_rate],
[],
updates=updates,
on_unused_input='ignore')
return f_grad_shared, f_update
示例8: get_updates
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
def get_updates(self, grads):
"""
.. todo::
WRITEME
"""
updates = OrderedDict()
g_tt = OrderedDict()
cnt = sharedX(0, 'counter')
for p, g in grads.items():
lr_scaler = self.lr_scalers.get(str(p), 1.)
m = sharedX(p.get_value() * 0.)
v = sharedX(p.get_value() * 0.)
b1 = self.b1 * self.lambd**cnt
m_t = b1 * m + (1 - b1) * g
v_t = self.b2 * v + (1 - self.b2) * g**2
m_t_hat = m_t / (1. - self.b1**(cnt + 1))
v_t_hat = v_t / (1. - self.b2**(cnt + 1))
g_t = m_t_hat / (T.sqrt(v_t_hat) + self.e)
p_t = p - lr_scaler * self.lr * g_t
g_tt[p] = g_t
updates[m] = m_t
updates[v] = v_t
updates[p] = p_t
if self.post_clip:
g_norm = sum([T.sqr(x/self.batch_size).sum()
for x in g_tt.values()])
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
for p, g in g_tt.items():
lr_scaler = self.lr_scalers.get(str(p), 1.)
p_t = p - lr_scaler * self.lr * g * scaler
updates[p] = p_t
updates[cnt] = cnt + 1
return updates
示例9: Optimizer
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
#.........这里部分代码省略.........
# It tells how to update the params each training epoch
gradient_updates = self.get_updates(gradients)
# Combine the updates from the model also if applicable
train_updates = self.model.get_updates()
if train_updates:
train_updates.update(gradient_updates)
else:
train_updates = gradient_updates
# Compile the training function!
log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_costs),
str(type(self.model)))
t = time.time()
f_learn = function(inputs=[data_idx, data_end_idx],
updates=train_updates,
outputs=train_cost,
givens=train_givens,
name='f_learn_%d' % i)
log.info('f_learn compilation took %s', make_time_units_string(time.time() - t))
self.train_functions.append(f_learn)
# grab the expression(s) to use to monitor different model values during training
log.debug("Compiling monitor functions...")
monitor_t = time.time()
self.monitors = OrderedDict(self.model.get_monitors())
self.monitor_names = self.monitors.keys()
if len(self.monitors.keys()) > 0:
self.train_monitor_function = function(
inputs=[data_idx, data_end_idx],
updates=self.model.get_updates(),
outputs=self.monitors.values(),
givens=train_givens,
name="train_monitor_function"
)
if len(self.monitors.keys()) > 0:
self.valid_monitor_function = function(
inputs=[data_idx, data_end_idx],
updates=self.model.get_updates(),
outputs=self.monitors.values(),
givens=valid_givens,
name="valid_monitor_function"
)
if len(self.monitors.keys()) > 0:
self.test_monitor_function = function(
inputs=[data_idx, data_end_idx],
updates=self.model.get_updates(),
outputs=self.monitors.values(),
givens=test_givens,
name="test_monitor_function"
)
log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t))
self.noise_switches = raise_to_list(self.model.get_noise_switch())
##################
# start training #
##################
# make sure to deal with a list of train_cost functions - for layer-wise pretraining!
# this list of training functions was created during __init__()
start_time = time.time()
for func_i, train_function in enumerate(self.train_functions):
log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------",
str(type(self.model)), func_i + 1, len(self.train_functions), self.n_epoch, str(continue_training))
示例10: Optimizer
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
class Optimizer(object):
"""
Default interface for an optimizer implementation - this provides the necessary parameter updates when
training a model on a dataset using an online stochastic process. The base framework for performing
stochastic gradient descent.
"""
def __init__(self, dataset, loss=None, model=None,
epochs=1000, batch_size=100, min_batch_size=1,
save_freq=10, stop_threshold=None, stop_patience=50,
learning_rate=1e-3, lr_decay=None, lr_decay_factor=None,
grad_clip=None, hard_clip=False,
**kwargs):
"""
Initialize the Optimizer.
Parameters
----------
dataset : Dataset
The :class:`opendeep.data.Dataset` to use when training the Model.
loss : Loss
The :class:`opendeep.optimization.loss.Loss` function to compare the model to a 'target' result.
model : Model
The :class:`opendeep.models.Model` to train. Needed if the Optimizer isn't being passed to a
Model's .train() method.
epochs : int
How many training iterations over the dataset to go.
batch_size : int
How many examples from the training dataset to use in parallel.
min_batch_size : int
The minimum number of examples required at a time (for things like time series, this would be > 1).
save_freq : int, optional
How many epochs to train between each new save of the Model's parameters.
stop_threshold : float, optional
The factor by how much the best validation training score needs to improve to determine early stopping.
stop_patience : int, optional
The patience or number of epochs to wait after the stop_threshold has been reached before stopping.
learning_rate : float
The multiplicative amount to adjust parameters based on their gradient values.
lr_decay : str
The decay function to use for changing the learning rate over epochs. See
`opendeep.utils.decay` for classes of decay and documentation.
lr_decay_factor : float
The amount of decay to use for the ``lr_decay`` type of decay.
grad_clip : float, optional
Whether to clip gradients. This will clip the norm of the gradients either with a hard cutoff or rescaling.
hard_clip : bool
Whether to use a hard cutoff or rescaling for clipping gradients.
"""
log.info("Initializing optimizer %s", str(self.__class__.__name__))
# Deal with early stopping None initializations (no early stopping).
if not stop_threshold:
stop_threshold = numpy.inf
if not save_freq:
save_freq = 1000000
if not stop_patience:
stop_patience = 1
# Put all init parameters in self.args so we can log the initial configuration.
self.args = locals().copy()
self.args.pop('self')
kwargs = self.args.pop('kwargs')
self.args = add_kwargs_to_dict(kwargs, self.args)
# log the arguments
log.info("Optimizer config args: %s", str(self.args))
# if the optimizer wasn't initialized with a Model (train() being called from the model class itself),
# just return. (This seems kinda hacky but hey, people wanted .train() to happen from Model and there
# wasn't really a better way unless the epoch looping logic was in that method for Model. That wasn't
# the best option because other methods besides stochastic ones can exist for optimizers in the future.
# TODO: fix this up - feels like a hack just to make model.train() work...
if not model:
return
# Otherwise, things are proceeding as normal. Carry on...
assert isinstance(model, Model), "Optimizer input model needs to be a Model class! " \
"Found %s" % str(model.__class__.__name__)
assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be a Dataset class! " \
"Found %s" % str(dataset.__class__.__name__)
# deal with loss expression/targets
if loss is not None:
assert isinstance(loss, Loss), "Optimizer input loss needs to be a Loss class! " \
"Found %s" % str(loss.__class__.__name__)
if isinstance(loss, Loss):
self.loss_targets = loss.get_targets()
self.loss_expression = loss.get_loss()
else:
assert model.get_loss() is not None, "No Loss specified, and the model does not have one implemented."
if isinstance(model.get_loss(), tuple):
self.loss_targets = raise_to_list(model.get_loss()[0])
self.loss_expression = model.get_loss()[1]
else:
self.loss_targets = None
self.loss_expression = model.get_loss()
model_inputs = raise_to_list(model.get_inputs())
n_model_inputs = len(model_inputs)
model_targets = self.loss_targets or []
for input in model_inputs:
if input in model_targets:
#.........这里部分代码省略.........
示例11: RMSProp
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
class RMSProp(LearningRule):
"""
Implements the RMSProp learning rule as described by Hinton in `lecture 6
<http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`
of the Coursera Neural Networks for Machine Learning course.
In short, Hinton suggests "[the] magnitude of the gradient can be very
different for different weights and can change during learning. This
makes it hard to choose a global learning rate." RMSProp solves this
problem by "[dividing] the learning rate for a weight by a running
average of the magnitudes of recent gradients for that weight."
Parameters
----------
decay : float, optional
Decay constant similar to that used in AdaDelta and Momentum methods.
max_scaling: float, optional
Restrict the RMSProp gradient scaling coefficient to values
below `max_scaling`.
Notes
-----
An instance of this LearningRule should only be used with one
TrainingAlgorithm, and its get_updates method should be called
only once. This is required in order to make the monitoring
channels correctly report the moving averages.
"""
def __init__(self, decay=0.9, max_scaling=1e5):
assert 0. <= decay < 1.
assert max_scaling > 0
self.decay = sharedX(decay, 'decay')
self.epsilon = 1. / max_scaling
self.mean_square_grads = OrderedDict()
@wraps(LearningRule.add_channels_to_monitor)
def add_channels_to_monitor(self, monitor, monitoring_dataset):
"""
The channels added are the min, mean, and max of the
mean_square_grad of each parameter.
"""
channel_mapping = {
'_min': T.min,
'_max': T.max,
'_mean': T.mean
}
for mean_square_grad in self.mean_square_grads.values():
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_square_grad.name + suffix),
ipt=None,
val=op(mean_square_grad),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
return
@wraps(LearningRule.get_updates)
def get_updates(self, learning_rate, grads, lr_scalers=None):
"""
Notes
-----
This method has the side effect of storing the moving average
of the square gradient in `self.mean_square_grads`. This is
necessary in order for the monitoring channels to be able
to track the value of these moving averages.
Therefore, this method should only get called once for each
instance of RMSProp.
"""
updates = OrderedDict()
for param in grads:
# mean_squared_grad := E[g^2]_{t-1}
mean_square_grad = sharedX(param.get_value() * 0.)
if param.name is None:
raise ValueError("Model parameters must be named.")
mean_square_grad.name = 'mean_square_grad_' + param.name
if param.name in self.mean_square_grads:
warnings.warn("Calling get_updates more than once on the "
"gradients of `%s` may make monitored values "
"incorrect." % param.name)
# Store variable in self.mean_square_grads for monitoring.
self.mean_square_grads[param.name] = mean_square_grad
# Accumulate gradient
new_mean_squared_grad = (self.decay * mean_square_grad +
(1 - self.decay) * T.sqr(grads[param]))
# Compute update
scaled_lr = lr_scalers.get(param, 1.) * learning_rate
rms_grad_t = T.sqrt(new_mean_squared_grad)
rms_grad_t = T.maximum(rms_grad_t, self.epsilon)
delta_x_t = - scaled_lr * grads[param] / rms_grad_t
# Apply update
#.........这里部分代码省略.........
示例12: Monitor
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
class Monitor(object):
"""
A class for monitoring Models while they are being trained.
A monitor object records the number of minibatches and number of
examples the model has trained, as well as any number of "channels"
that track quantities of interest (examples: the objective
function, measures of hidden unit activity, reconstruction error,
sum of squared second derivatives, average norm of the weight
vectors, etc.)
Parameters
----------
model : `pylearn2.models.model.Model`
"""
def __init__(self, model):
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
self.theano_function_mode = None
# Initialize self._nested_data_specs, self._data_specs_mapping,
# and self._flat_data_specs
self._build_data_specs()
def _build_data_specs(self):
"""
Computes a nested data_specs for input and all channels
Also computes the mapping to flatten it. This function is
called from redo_theano.
"""
# Ask the model what it needs
m_space, m_source = self.model.get_monitoring_data_specs()
input_spaces = [m_space]
input_sources = [m_source]
for channel in self.channels.values():
space = channel.data_specs[0]
assert isinstance(space, Space)
input_spaces.append(space)
input_sources.append(channel.data_specs[1])
nested_space = CompositeSpace(input_spaces)
nested_source = tuple(input_sources)
self._nested_data_specs = (nested_space, nested_source)
self._data_specs_mapping = DataSpecsMapping(self._nested_data_specs)
flat_space = self._data_specs_mapping.flatten(nested_space,
return_tuple=True)
flat_source = self._data_specs_mapping.flatten(nested_source,
return_tuple=True)
self._flat_data_specs = (CompositeSpace(flat_space), flat_source)
def set_theano_function_mode(self, mode):
"""
.. todo::
WRITEME
Parameters
----------
mode : theano.compile.Mode
Theano functions for the monitoring channels will be
compiled and run using this mode.
"""
if self.theano_function_mode != mode:
self._dirty = True
self.theano_function_mode = mode
def add_dataset(self, dataset, mode='sequential', batch_size=None,
num_batches=None, seed=None):
"""
Determines the data used to calculate the values of each channel.
Parameters
----------
dataset : object
A `pylearn2.datasets.Dataset` object.
mode : str or object, optional
Iteration mode; see the docstring of the `iterator` method
on `pylearn2.datasets.Dataset` for details.
batch_size : int, optional
The size of an individual batch. Optional if `mode` is
'sequential' and `num_batches` is specified (batch size
will be calculated based on full dataset size).
num_batches : int, optional
The total number of batches. Unnecessary if `mode` is
#.........这里部分代码省略.........
示例13: Monitor
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
class Monitor(object):
"""
A class for monitoring Models while they are being trained.
A monitor object records the number of minibatches and number of examples
the model has trained, as well as any number of "channels" that track
quantities of interest (examples: the objective function, measures of
hidden unit activity, reconstruction error, sum of squared second
derivatives, average norm of the weight vectors, etc.)
"""
def __init__(self, model):
"""
Makes a monitor for `model`. Assumes the model has not been
trained at all yet.
Parameters
----------
model : pylearn2.models.model.Model instance
"""
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
# Determine whether the model should use topological or vector form of
# examples. If the model acts on a space with more than the batch index
# and channel dimension, the model has topological dimensions, so the
# topological view of the data should be used.
vector = model.get_input_space().make_theano_batch(name='monitoring_input')
if isinstance(vector.type, theano.sparse.SparseType):
self.topo = False
else:
self.topo = len(vector.type.broadcastable) > 2
self.require_label = False
self.theano_function_mode = None
def set_theano_function_mode(self, mode):
if self.theano_function_mode != mode:
self._dirty = True
self.theano_function_mode = mode
def add_dataset(self, dataset, mode='sequential', batch_size=None,
num_batches=None, seed = None):
"""
Determines the data used to calculate the values of each channel.
Parameters
----------
dataset : object
A `pylearn2.datasets.Dataset` object.
mode : str or object, optional
Iteration mode; see the docstring of the `iterator` method
on `pylearn2.datasets.Dataset` for details.
batch_size : int, optional
The size of an individual batch. Optional if `mode` is
'sequential' and `num_batches` is specified (batch size
will be calculated based on full dataset size).
num_batches : int, optional
The total number of batches. Unnecessary if `mode` is
'sequential' and `batch_size` is specified (number of
batches will be calculated based on full dataset size).
"""
# The user can ommit using lists if only one dataset is set
if not isinstance(dataset, list):
dataset = [dataset]
if not isinstance(mode, list):
mode = [mode]
if not isinstance(batch_size, list):
batch_size = [batch_size]
if not isinstance(num_batches, list):
num_batches = [num_batches]
if seed is None:
seed = [ None ] * len(dataset)
if not isinstance(seed, list):
seed = [ seed ]
if len(mode) != len(dataset):
raise ValueError("Received "+str(len(dataset))+" dataset but " + str(len(mode)) + " modes.")
if any([len(l) != len(dataset) for l in [batch_size, seed]]):
raise ValueError("make sure each dataset has its iteration " + \
"batch size and number of batches.")
for (d, m, b, n, sd) in safe_izip(dataset, mode, batch_size, num_batches, seed):
try:
it = d.iterator(mode=m, batch_size=b,
num_batches=n,
topo=self.topo,
targets=self.require_label,
rng = sd)
except ValueError as exc:
raise ValueError("invalid iteration parameters in "
"Monitor.add_dataset: " + str(exc))
#.........这里部分代码省略.........
示例14: Optimizer
# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import values [as 别名]
class Optimizer(object):
"""
Default interface for an optimizer implementation - this provides the necessary parameter updates when
training a model on a dataset using an online stochastic process.
"""
def __init__(self, model, dataset,
n_epoch=1000, batch_size=100, minimum_batch_size=1,
save_frequency=10, early_stop_threshold=.9995, early_stop_length=30,
learning_rate=1e-3, lr_decay='exponential', lr_factor=1,
**kwargs):
"""
Initialize the Optimizer.
Parameters
----------
model : Model
The Model to train.
dataset : Dataset
The Dataset to use when training the Model.
n_epoch : int
how many training iterations over the dataset to go.
batch_size : int
How many examples from the training dataset to use in parallel.
minimum_batch_size : int
The minimum number of examples required at a time (for things like time series, this would be > 1).
save_frequency : int
How many epochs to train between each new save of the Model's parameters.
early_stop_threshold : float
The factor by how much the best validation training score needs to improve to determine early stopping.
early_stop_length : int
The patience or number of epochs to wait after the early_stop_threshold has been reached before stopping.
learning_rate : float
The multiplicative amount to adjust parameters based on their gradient values.
lr_decay : str
The type of decay function to use for changing the learning rate over epochs. See
`opendeep.utils.decay` for options.
lr_factor : float
The amount to use for the decay function when changing the learning rate over epochs. See
`opendeep.utils.decay` for its effect for given decay functions.
"""
log.info("Initializing optimizer %s", str(type(self)))
if early_stop_threshold is None:
early_stop_threshold = 1.
if save_frequency is None:
save_frequency = 1000000
if early_stop_length is None:
early_stop_length = 100
self.args = locals().copy()
self.args.pop('self')
kwargs = self.args.pop('kwargs')
self.args = add_kwargs_to_dict(kwargs, self.args)
# log the arguments
log.info("optimizer config args: %s", str(self.args))
assert isinstance(model, Model), "Optimizer input model needs to be an opendeep Model class!"
assert isinstance(dataset, Dataset), "Optimizer input dataset needs to be an opendeep Dataset class!"
self.model = model
self.dataset = dataset
# Learning rate - how drastic of a step do the parameters change
self.learning_rate = sharedX(learning_rate, 'learning_rate')
self.lr_scalers = self.model.get_lr_scalers()
if lr_decay:
self.learning_rate_decay = get_decay_function(lr_decay,
self.learning_rate,
self.learning_rate.get_value(),
lr_factor)
else:
self.learning_rate_decay = False
self.noise_switches = raise_to_list(self.model.get_noise_switch())
self.batch_size = batch_size
self.minimum_batch_size = minimum_batch_size
self.n_epoch = n_epoch
self.save_frequency = save_frequency
self.early_stop_threshold = early_stop_threshold
self.early_stop_length = early_stop_length
def _get_batch_indices(self, data_lengths):
"""
Computes the tuples of (start_index, end_index) that represent the appropriate slices of the concatenated
dataset with regards to the given data_lengths. This allows for lists of data lengths to represent sequences,
so that the concatenated batches returned do not overstep the start of a new sequence.
Parameters
----------
data_lengths : list(int) or int
List of num_examples for each dataset (the length of the datasets - this is a list in the case of
sequences).
Returns
-------
list((int, int))
List of tuples (start, end) representing the batch slices for the total dataset if it were concatenated.
"""
batch_indices = []
start_idx = 0
for len in raise_to_list(data_lengths):
#.........这里部分代码省略.........