本文整理匯總了Python中pylearn2.compat.OrderedDict類的典型用法代碼示例。如果您正苦於以下問題:Python OrderedDict類的具體用法?Python OrderedDict怎麽用?Python OrderedDict使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了OrderedDict類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_gradients
def get_gradients(self, model, data, ** kwargs):
indiv_results = []
composite_specs, mapping = self.get_composite_specs_and_mapping(model)
nested_data = mapping.nest(data)
for cost, cost_data in safe_zip(self.costs, nested_data):
result = cost.get_gradients(model, cost_data, ** kwargs)
indiv_results.append(result)
grads = OrderedDict()
updates = OrderedDict()
params = model.get_params()
for coeff, packed in zip(self.coeffs, indiv_results):
g, u = packed
for param in g:
if param not in params:
raise ValueError("A shared variable (" +
str(param) +
") that is not a parameter appeared "
"a cost gradient dictionary.")
for param in g:
assert param.ndim == g[param].ndim
v = coeff * g[param]
if param not in grads:
grads[param] = v
else:
grads[param] = grads[param] + v
assert grads[param].ndim == param.ndim
assert not any([state in updates for state in u])
assert not any([state in params for state in u])
updates.update(u)
return grads, updates
示例2: get_monitoring_channels
def get_monitoring_channels(self, model, data, ** kwargs):
self.get_data_specs(model)[0].validate(data)
rval = OrderedDict()
composite_specs, mapping = self.get_composite_specs_and_mapping(model)
nested_data = mapping.nest(data)
for i, cost in enumerate(self.costs):
cost_data = nested_data[i]
try:
channels = cost.get_monitoring_channels(model, cost_data,
**kwargs)
rval.update(channels)
except TypeError:
reraise_as(Exception('SumOfCosts.get_monitoring_channels '
'encountered TypeError while calling {0}'
'.get_monitoring_channels'.format(
type(cost))))
value = cost.expr(model, cost_data, ** kwargs)
if value is not None:
name = ''
if hasattr(value, 'name') and value.name is not None:
name = '_' + value.name
rval['term_' + str(i) + name] = value
return rval
示例3: get_gradients
def get_gradients(self, model, data, **kwargs):
cost = self._cost(model, data, **kwargs)
params = list(model.get_params())
grads = T.grad(cost, params, disconnected_inputs='ignore',
consider_constant=[self.sampler.particles])
gradients = OrderedDict(izip(params, grads))
updates = OrderedDict()
sampler_updates = self.sampler.updates()
updates.update(sampler_updates)
return gradients, updates
示例4: __init__
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., svd=True, nonlinearity=tensor.tanh):
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
示例5: __init__
def __init__(self, base_learning_rule, decay=0.9):
self.base = base_learning_rule
# hack to allow MomentumAdjustor to access momentum value
if hasattr(self.base, 'momentum'):
self.momentum = self.base.momentum
self.decay = decay
self.mean_updates = OrderedDict()
示例6: __init__
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., nonlinearity=tensor.tanh,
weight_noise=False, **kwargs):
self._std_dev = kwargs.pop('noise_std_dev', .075)
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
if not self.weight_noise:
self._std_dev = None
示例7: get_lr_scalers
def get_lr_scalers(self):
"""
.. todo::
WRITEME
"""
rval = OrderedDict()
params = self.get_params()
for layer in self.hidden_layers + [self.visible_layer]:
contrib = layer.get_lr_scalers()
# No two layers can contend to scale a parameter
assert not any([key in rval for key in contrib])
# Don't try to scale anything that's not a parameter
assert all([key in params for key in contrib])
rval.update(contrib)
assert all([isinstance(val, float) for val in rval.values()])
return rval
示例8: __init__
def __init__(
self,
decrease_rate=0.5,
increase_rate=1.2,
min_rate=1e-6,
max_rate=50
):
assert increase_rate > 1.
assert decrease_rate < 1.
self.decrease_rate = sharedX(decrease_rate, 'decrease_rate')
self.increase_rate = sharedX(increase_rate, 'increase_rate')
self.min_rate = min_rate
self.max_rate = max_rate
self.zeros = OrderedDict()
示例9: __init__
def __init__(self, model):
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
self.theano_function_mode = None
# Initialize self._nested_data_specs, self._data_specs_mapping,
# and self._flat_data_specs
self._build_data_specs()
示例10: get_lr_scalers
def get_lr_scalers(self, model_idx=-1):
scaler = OrderedDict()
for model in self.models:
scaler.update(model.get_lr_scalers())
return scaler
示例11: DROP_RPROP
class DROP_RPROP(LearningRule):
def __init__(
self,
decrease_rate=0.5,
increase_rate=1.2,
min_rate=1e-6,
max_rate=50
):
assert increase_rate > 1.
assert decrease_rate < 1.
self.decrease_rate = sharedX(decrease_rate, 'decrease_rate')
self.increase_rate = sharedX(increase_rate, 'increase_rate')
self.min_rate = min_rate
self.max_rate = max_rate
self.zeros = OrderedDict()
def add_channels_to_monitor(self, monitor, monitoring_dataset):
monitor.add_channel(
'rprop_decrease_rate',
ipt=None,
val=self.decrease_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
monitor.add_channel(
'rprop_increase_rate',
ipt=None,
val=self.increase_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
for zero in self.zeros.values():
monitor.add_channel(
zero.name,
ipt=None,
val=T.sum(zero),
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
def get_updates(self, learning_rate, grads, lr_scalers=None,
global_error=None,masks=None):
updates = OrderedDict()
for param, grad in grads.iteritems():
# Create required shared variables
lr = lr_scalers.get(param, learning_rate.get_value())
delta = sharedX(
np.zeros_like(param.get_value()) + lr,
borrow=True
)
previous_grad = sharedX(
np.zeros_like(param.get_value()),
borrow=True
)
zeros = sharedX(
np.zeros_like(param.get_value()),
borrow=True
)
layer_name = re.sub('_W$','',param.name)
if re.match(r'.*_W$',param.name) and layer_name in masks:
mask = masks[layer_name]
masked_grad = T.gt(T.dot(mask.T,T.dot(mask,grad)),0.)
else:
masked_grad = 1. #T.ones_like(grad)
# Name variables according to the parameter name
if param.name is not None:
delta.name = 'delta_'+param.name
zeros.name = 'zeros_' + param.name
previous_grad.name = 'previous_grad_' + param.name
self.zeros[param] = zeros
temp = grad * previous_grad
delta_inc = T.switch(
T.neq(grad,0.),
T.clip(
T.switch(
T.eq(temp, 0.),
delta,
T.switch(
T.lt(temp, 0.),
delta*self.decrease_rate,
delta*self.increase_rate
)
),
self.min_rate,
self.max_rate
),
delta
)
previous_grad_inc = T.switch(
T.gt(masked_grad,0.),
T.switch(
T.gt(temp,0.),
grad,
#.........這裏部分代碼省略.........
示例12: RMSProp
class RMSProp(LearningRule):
"""
Implements the RMSProp learning rule.
The RMSProp learning rule is described by Hinton in `lecture 6
<http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`
of the Coursera Neural Networks for Machine Learning course.
In short, Hinton suggests "[the] magnitude of the gradient can be very
different for different weights and can change during learning. This
makes it hard to choose a global learning rate." RMSProp solves this
problem by "[dividing] the learning rate for a weight by a running
average of the magnitudes of recent gradients for that weight."
Parameters
----------
decay : float, optional
Decay constant similar to that used in AdaDelta and Momentum methods.
max_scaling: float, optional
Restrict the RMSProp gradient scaling coefficient to values
below `max_scaling`.
Notes
-----
An instance of this LearningRule should only be used with one
TrainingAlgorithm, and its get_updates method should be called
only once. This is required in order to make the monitoring
channels correctly report the moving averages.
"""
def __init__(self, decay=0.9, max_scaling=1e5):
assert 0. <= decay < 1.
assert max_scaling > 0
self.decay = sharedX(decay, 'decay')
self.epsilon = 1. / max_scaling
self.mean_square_grads = OrderedDict()
@wraps(LearningRule.add_channels_to_monitor)
def add_channels_to_monitor(self, monitor, monitoring_dataset):
"""
The channels added are the min, mean, and max of the
mean_square_grad of each parameter.
"""
channel_mapping = {
'_min': T.min,
'_max': T.max,
'_mean': T.mean
}
for mean_square_grad in self.mean_square_grads.values():
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_square_grad.name + suffix),
ipt=None,
val=op(mean_square_grad),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
return
def get_updates(self, learning_rate, grads, lr_scalers=None):
"""
Provides the symbolic (theano) description of the updates needed to
perform this learning rule. See Notes for side-effects.
Parameters
----------
learning_rate : float
Learning rate coefficient.
grads : dict
A dictionary mapping from the model's parameters to their
gradients.
lr_scalers : dict
A dictionary mapping from the model's parameters to a learning
rate multiplier.
Returns
-------
updates : OrderdDict
A dictionary mapping from the old model parameters, to their new
values after a single iteration of the learning rule.
Notes
-----
This method has the side effect of storing the moving average
of the square gradient in `self.mean_square_grads`. This is
necessary in order for the monitoring channels to be able
to track the value of these moving averages.
Therefore, this method should only get called once for each
instance of RMSProp.
"""
updates = OrderedDict()
for param in grads:
# mean_squared_grad := E[g^2]_{t-1}
mean_square_grad = sharedX(param.get_value() * 0.)
if param.name is None:
#.........這裏部分代碼省略.........
示例13: Monitor
class Monitor(object):
"""
A class for monitoring Models while they are being trained.
A monitor object records the number of minibatches and number of
examples the model has trained, as well as any number of "channels"
that track quantities of interest (examples: the objective
function, measures of hidden unit activity, reconstruction error,
sum of squared second derivatives, average norm of the weight
vectors, etc.)
Parameters
----------
model : `pylearn2.models.model.Model`
Attributes
----------
on_channel_conflict : string
`error` : this is a behavior when there is conlfict
on creating a channel twice
`copy_history` : this is a behavior when creating a
new channel and transfering history of old_monitor
`overwrite` : this is a behavior when creating a
new channel without taking an account of old_monitor
"""
def __init__(self, model):
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
self.theano_function_mode = None
self.on_channel_conflict = 'error'
# Initialize self._nested_data_specs, self._data_specs_mapping,
# and self._flat_data_specs
self._build_data_specs()
def _build_data_specs(self):
"""
Computes a nested data_specs for input and all channels
Also computes the mapping to flatten it. This function is
called from redo_theano.
"""
# Ask the model what it needs
m_space, m_source = self.model.get_monitoring_data_specs()
input_spaces = [m_space]
input_sources = [m_source]
for channel in self.channels.values():
space = channel.data_specs[0]
assert isinstance(space, Space)
input_spaces.append(space)
input_sources.append(channel.data_specs[1])
nested_space = CompositeSpace(input_spaces)
nested_source = tuple(input_sources)
self._nested_data_specs = (nested_space, nested_source)
self._data_specs_mapping = DataSpecsMapping(self._nested_data_specs)
flat_space = self._data_specs_mapping.flatten(nested_space,
return_tuple=True)
flat_source = self._data_specs_mapping.flatten(nested_source,
return_tuple=True)
self._flat_data_specs = (CompositeSpace(flat_space), flat_source)
def set_theano_function_mode(self, mode):
"""
.. todo::
WRITEME
Parameters
----------
mode : theano.compile.Mode
Theano functions for the monitoring channels will be
compiled and run using this mode.
"""
if self.theano_function_mode != mode:
self._dirty = True
self.theano_function_mode = mode
def add_dataset(self, dataset, mode='sequential', batch_size=None,
num_batches=None, seed=None):
"""
Determines the data used to calculate the values of each channel.
Parameters
----------
#.........這裏部分代碼省略.........
示例14: Recurrent
class Recurrent(Layer):
"""
A recurrent neural network layer using the hyperbolic tangent
activation function, passing on all hidden states or a selection
of them to the next layer.
The hidden state is initialized to zeros.
Parameters
----------
dim : int
The number of elements in the hidden layer
layer_name : str
The name of the layer. All layers in an MLP must have a unique name.
irange : float
Initializes each weight randomly in U(-irange, irange)
irange : float
The input-to-hidden weight matrix is initialized with weights in
the uniform interval (-irange, irange). The hidden-to-hidden
matrix weights are sampled in the same manner, unless the argument
svd is set to True (see below).
indices : slice, list of integers or integer, optional
If specified this layer will return only the given hidden
states. If an integer is given, it will not return a
SequenceSpace. Otherwise, it will return a SequenceSpace of
fixed length. Note that a SequenceSpace of fixed length
can be flattened by using the FlattenerLayer.
Note: For now only [-1] is supported.
init_bias : float, optional
Set an initial bias to be added at each time step. Defaults to 0.
nonlinearity : theano.function, optional
weight_noise : bool, optional
Additive Gaussian noise applied to parameters
"""
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., nonlinearity=tensor.tanh,
weight_noise=False, **kwargs):
self._std_dev = kwargs.pop('noise_std_dev', .075)
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
if not self.weight_noise:
self._std_dev = None
@wraps(Layer.set_input_space)
def set_input_space(self, space):
if ((not isinstance(space, SequenceSpace) and
not isinstance(space, SequenceDataSpace)) or
not isinstance(space.space, VectorSpace)):
raise ValueError("Recurrent layer needs a SequenceSpace("
"VectorSpace) or SequenceDataSpace(VectorSpace)\
as input but received %s instead"
% (space))
self.input_space = space
if self.indices is not None:
if len(self.indices) > 1:
raise ValueError("Only indices = [-1] is supported right now")
self.output_space = CompositeSpace(
[VectorSpace(dim=self.dim) for _
in range(len(self.indices))]
)
else:
assert self.indices == [-1], "Only indices = [-1] works now"
self.output_space = VectorSpace(dim=self.dim)
else:
if isinstance(self.input_space, SequenceSpace):
self.output_space = SequenceSpace(VectorSpace(dim=self.dim))
elif isinstance(self.input_space, SequenceDataSpace):
self.output_space =\
SequenceDataSpace(VectorSpace(dim=self.dim))
# Initialize the parameters
rng = self.mlp.rng
if self.irange is None:
raise ValueError("Recurrent layer requires an irange value in "
"order to initialize its weight matrices")
input_dim = self.input_space.dim
# W is the input-to-hidden matrix
W = rng.uniform(-self.irange, self.irange, (input_dim, self.dim))
# U is the hidden-to-hidden transition matrix
U = rng.randn(self.dim, self.dim)
U, _ = scipy.linalg.qr(U)
# b is the bias
b = np.zeros((self.dim,))
self._params = [
sharedX(W, name=(self.layer_name + '_W')),
sharedX(U, name=(self.layer_name + '_U')),
sharedX(b + self.init_bias,
name=(self.layer_name + '_b'))
]
#.........這裏部分代碼省略.........
示例15: __init__
def __init__(self, objective, params, inputs=None,
param_constrainers=None, max_iter=-1,
lr_scalers=None, verbose=0, tol=None,
init_alpha=None, min_init_alpha=1e-3,
reset_alpha=True, conjugate=False,
reset_conjugate=True, gradients=None,
gradient_updates=None, line_search_mode=None,
accumulate=False, theano_function_mode=None):
self.__dict__.update(locals())
del self.self
if line_search_mode is None:
if init_alpha is None:
init_alpha = (.001, .005, .01, .05, .1)
else:
assert line_search_mode == 'exhaustive'
if init_alpha is None:
init_alpha = (.5, 1.)
self.init_alpha = tuple([float(elem) for elem in init_alpha])
if inputs is None:
inputs = []
if param_constrainers is None:
param_constrainers = []
obj = objective
self.verbose = verbose
param_to_grad_sym = OrderedDict()
param_to_grad_shared = OrderedDict()
updates = OrderedDict()
if self.gradient_updates is not None:
updates.update(self.gradient_updates)
self.params = [param for param in params]
for param in params:
if self.gradients is not None and param in self.gradients:
g = self.gradients[param]
else:
g = grad(objective, param)
param_to_grad_sym[param] = g
if param.name is not None:
param_name = param.name
else:
param_name = 'anon_param'
grad_name = 'BatchGradientDescent.grad_' + param_name
grad_shared = sharedX(param.get_value() * 0., name=grad_name)
param_to_grad_shared[param] = grad_shared
updates[grad_shared] = g
self.param_to_grad_shared = param_to_grad_shared
if self.verbose:
logger.info('batch gradient class compiling gradient function')
t1 = time.time()
if self.accumulate:
self._compute_grad = Accumulator(inputs, updates=updates)
else:
self._compute_grad = function(
inputs,
updates=updates,
mode=self.theano_function_mode,
name='BatchGradientDescent._compute_grad')
if self.verbose:
t2 = time.time()
logger.info('done. Took {0}'.format(t2-t1))
if self.verbose:
logger.info('batch gradient class compiling objective function')
if self.accumulate:
self.obj = Accumulator(inputs, obj)
else:
self.obj = function(inputs, obj, mode=self.theano_function_mode,
name='BatchGradientDescent.obj')
if self.verbose:
logger.info('done')
self.param_to_cache = OrderedDict()
alpha = T.scalar(name='alpha')
alpha.tag.test_value = np.cast[alpha.dtype](.01)
cache_updates = OrderedDict()
goto_updates = OrderedDict()
for param in params:
if param.name is None:
param_name = 'anon_param'
else:
param_name = param.name
cache_name = 'BatchGradientDescent.param_to_cache[%s]' % param_name
self.param_to_cache[param] = sharedX(param.get_value(borrow=False),
name=cache_name)
cache_updates[self.param_to_cache[param]] = param
cached = self.param_to_cache[param]
g = self.param_to_grad_shared[param]
if lr_scalers is not None and param in lr_scalers:
#.........這裏部分代碼省略.........