本文整理汇总了Python中theano.tensor.minimum函数的典型用法代码示例。如果您正苦于以下问题:Python minimum函数的具体用法?Python minimum怎么用?Python minimum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了minimum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get
def get(self, y_p, i, g):
W_att_re = self.item("W_att_re", i)
b_att_re = self.item("b_att_re", i)
B = self.item("B", i)
C = self.item("C", i)
I = self.item("I", i)
beam_size = T.minimum(numpy.int32(abs(self.attrs['beam'])), C.shape[0])
loc = T.cast(T.maximum(T.minimum(T.sum(I,axis=0) * self.n / self.bound - beam_size / 2, T.sum(I,axis=0) - beam_size), 0),'int32')
if self.attrs['beam'] > 0:
beam_idx = (self.custom_vars[('P_%d' % i)][loc].dimshuffle(1,0).flatten() > 0).nonzero()
I = I.reshape((I.shape[0]*I.shape[1],))[beam_idx].reshape((beam_size,I.shape[1]))
C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[beam_idx].reshape((beam_size,C.shape[1],C.shape[2]))
B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[beam_idx].reshape((beam_size,B.shape[1],B.shape[2]))
if self.attrs['template'] != self.layer.unit.n_out:
z_p = T.dot(y_p, W_att_re) + b_att_re
else:
z_p = y_p
if self.attrs['momentum'] == 'conv1d':
from theano.tensor.nnet import conv
att = self.item('att', i)
F = self.item("F", i)
v = T.dot(T.sum(conv.conv2d(border_mode='full',
input=att.dimshuffle(1, 'x', 0, 'x'),
filters=F).dimshuffle(2,3,0,1),axis=1)[F.shape[2]/2:-F.shape[2]/2+1],self.item("U",i))
v = I * v / v.sum(axis=0,keepdims=True)
z_p += T.sum(C * v,axis=0)
if g > 0:
z_p += self.glimpses[i][-1]
h_p = T.tanh(z_p)
return B, C, I, h_p, self.item("W_att_in", i), self.item("b_att_in", i)
示例2: advanced_indexing
def advanced_indexing(volume, *indices_list, **kwargs):
""" Performs advanced indexing on `volume`.
This function exists because in Theano<=0.9 advanced indexing is
only supported along the first dimension.
Notes
-----
Assuming `volume` is C contiguous.
"""
strides = kwargs.get("strides")
if strides is None:
shapes = T.cast(volume.shape[:len(indices_list)], dtype=theano.config.floatX)
strides = T.concatenate([T.ones((1,)), T.cumprod(shapes[::-1])[:-1]], axis=0)[::-1]
shapes = T.cast(volume.shape, dtype=theano.config.floatX)
indices = T.maximum(0, T.minimum(indices_list[-1], shapes[len(indices_list)-1]-1))
for i in range(len(indices_list)-1):
clipped_idx = T.maximum(0, T.minimum(indices_list[i], shapes[i]-1))
indices += clipped_idx * strides[i]
# indices = T.sum(T.stack(indices_list, axis=1)*strides[:len(indices_list)], axis=1)
indices = T.cast(indices, dtype="int32")
return volume.reshape((-1, volume.shape[-1]))[indices]
示例3: call
def call(self, X):
if type(X) is not list or len(X) != 2:
raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
frame, position = X[0], X[1]
# Reshaping the input to exclude the time dimension
frameShape = K.shape(frame)
positionShape = K.shape(position)
(chans, height, width) = frameShape[-3:]
targetDim = positionShape[-1]
frame = K.reshape(frame, (-1, chans, height, width))
position = K.reshape(position, (-1, ) + (targetDim, ))
# Applying the attention
hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
rX = Data.linspace(-1.0, 1.0, width)
rY = Data.linspace(-1.0, 1.0, height)
FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
m = m + self.alpha - THT.gt(m, 0.) * self.alpha
frame = frame * m.dimshuffle(0, 'x', 1, 2)
# Reshaping the frame to include time dimension
output = K.reshape(frame, frameShape)
return output
示例4: lp_norm
def lp_norm(self, n, k, r, c, z):
'''
Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
:param n:
:param k:
:param r:
:param c:
:param z:
:return:
'''
ds0, ds1 = self.pool_size
st0, st1 = self.stride
pad_h = self.pad[0]
pad_w = self.pad[1]
row_st = r * st0
row_end = T.minimum(row_st + ds0, self.img_rows)
row_st = T.maximum(row_st, self.pad[0])
row_end = T.minimum(row_end, self.x_m2d + pad_h)
col_st = c * st1
col_end = T.minimum(col_st + ds1, self.img_cols)
col_st = T.maximum(col_st, self.pad[1])
col_end = T.minimum(col_end, self.x_m1d + pad_w)
Lp = T.pow(
T.mean(T.pow(
T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
1 + T.log(1 + T.exp(self.P))
)),
1 / (1 + T.log(1 + T.exp(self.P)))
)
return T.set_subtensor(z[n, k, r, c], Lp)
示例5: _output
def _output(self, input, *args, **kwargs):
k = (self.alpha - 1).reshape(self.filter_shape)
if self.affected_channels == self.n_channel:
return input + T.minimum(0, input) * k
else:
affected = input[:, :self.affected_channels]
unaffected = input[:, self.affected_channels:]
affected = affected + T.minimum(0, affected) * k
return T.concatenate([affected, unaffected], axis=1)
示例6: _interpolate
def _interpolate(im, x, y, out_height, out_width):
# *_f are floats
num_batch, height, width, channels = im.shape
height_f = T.cast(height, theano.config.floatX)
width_f = T.cast(width, theano.config.floatX)
# clip coordinates to [-1, 1]
x = T.clip(x, -1, 1)
y = T.clip(y, -1, 1)
# scale coordinates from [-1, 1] to [0, width/height - 1]
x = (x + 1) / 2 * (width_f - 1)
y = (y + 1) / 2 * (height_f - 1)
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing. for
# indexing, we need to take care they do not extend past the image.
x0_f = T.floor(x)
y0_f = T.floor(y)
x1_f = x0_f + 1
y1_f = y0_f + 1
x0 = T.cast(x0_f, 'int64')
y0 = T.cast(y0_f, 'int64')
x1 = T.cast(T.minimum(x1_f, width_f - 1), 'int64')
y1 = T.cast(T.minimum(y1_f, height_f - 1), 'int64')
# The input is [num_batch, height, width, channels]. We do the lookup in
# the flattened input, i.e [num_batch*height*width, channels]. We need
# to offset all indices to match the flat version
dim2 = width
dim1 = width*height
base = T.repeat(
T.arange(num_batch, dtype='int64')*dim1, out_height*out_width)
base_y0 = base + y0*dim2
base_y1 = base + y1*dim2
idx_a = base_y0 + x0
idx_b = base_y1 + x0
idx_c = base_y0 + x1
idx_d = base_y1 + x1
# use indices to lookup pixels for all samples
im_flat = im.reshape((-1, channels))
Ia = im_flat[idx_a]
Ib = im_flat[idx_b]
Ic = im_flat[idx_c]
Id = im_flat[idx_d]
# calculate interpolated values
wa = ((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x')
wb = ((x1_f-x) * (y-y0_f)).dimshuffle(0, 'x')
wc = ((x-x0_f) * (y1_f-y)).dimshuffle(0, 'x')
wd = ((x-x0_f) * (y-y0_f)).dimshuffle(0, 'x')
output = T.sum([wa*Ia, wb*Ib, wc*Ic, wd*Id], axis=0)
assert str(output.dtype) == theano.config.floatX, str(output.dtype)
return output
示例7: _interpolate
def _interpolate(im, x, y, out_height, out_width, dtype = 'float32'):
# *_f are floats
num_batch, height, width, channels = im.shape
height_f = T.cast(height, dtype = dtype)
width_f = T.cast(width, dtype = dtype)
# scale coordinates from [-1, 1] to [0, width/height - 1]
idx = ((x >= 0) & (x <= 1) & (y >= 0) & (y <= 1)).nonzero()[0]
# x = (x + 1) / 2 * (width_f - 1)
# y = (y + 1) / 2 * (height_f - 1)
x = x * (width_f - 1)
y = y * (height_f - 1)
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing. for
# indexing, we need to take care they do not extend past the image.
x0_f = T.floor(x)
y0_f = T.floor(y)
x1_f = x0_f + 1
y1_f = y0_f + 1
x0 = T.cast(x0_f, 'int64')
y0 = T.cast(y0_f, 'int64')
x1 = T.cast(T.minimum(x1_f, width_f - 1), 'int64')
y1 = T.cast(T.minimum(y1_f, height_f - 1), 'int64')
# The input is [num_batch, height, width, channels]. We do the lookup in
# the flattened input, i.e [num_batch*height*width, channels]. We need
# to offset all indices to match the flat version
dim2 = width
dim1 = width*height
base = T.repeat(
T.arange(num_batch, dtype='int64')*dim1, out_height*out_width)
base_y0 = base + y0*dim2
base_y1 = base + y1*dim2
idx_a = base_y0 + x0
idx_b = base_y1 + x0
idx_c = base_y0 + x1
idx_d = base_y1 + x1
# use indices to lookup pixels for all samples
im_flat = im.reshape((-1, channels))
Ia = im_flat[idx_a[idx]]
Ib = im_flat[idx_b[idx]]
Ic = im_flat[idx_c[idx]]
Id = im_flat[idx_d[idx]]
# calculate interpolated values
wa = ((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x')[idx, :]
wb = ((x1_f-x) * (y-y0_f)).dimshuffle(0, 'x')[idx, :]
wc = ((x-x0_f) * (y1_f-y)).dimshuffle(0, 'x')[idx, :]
wd = ((x-x0_f) * (y-y0_f)).dimshuffle(0, 'x')[idx, :]
output = T.sum([wa*Ia, wb*Ib, wc*Ic, wd*Id], axis=0)
# out = T.zeros_like(((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x'))
out = T.zeros_like(im_flat)
return T.set_subtensor(out[idx, :], output)
示例8: _interpolate
def _interpolate(im, x, y, out_height, out_width, num_b):
_, height, width, channels = im.shape
# *_f are floats
height_f = T.cast(height, theano.config.floatX)
width_f = T.cast(width, theano.config.floatX)
# clip coordinates to [-1, 1]
x = T.clip(x, -1, 1)
y = T.clip(y, -1, 1)
# scale coordinates from [-1, 1] to [0, width/height - 1]
x = (x + 1) / 2 * (width_f - 1)
y = (y + 1) / 2 * (height_f - 1)
# obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
# we need those in floatX for interpolation and in int64 for indexing. for
# indexing, we need to take care they do not extend past the image.
x0_f = T.floor(x)
y0_f = T.floor(y)
x1_f = x0_f + 1
y1_f = y0_f + 1
# KMYI: we cast only at the end to maximize GPU usage
x0 = T.floor(x0_f)
y0 = T.floor(y0_f)
x1 = T.floor(T.minimum(x1_f, width_f - 1))
y1 = T.floor(T.minimum(y1_f, height_f - 1))
dim2 = width_f
dim1 = width_f * height_f
base = T.repeat(
T.arange(num_b,
dtype=theano.config.floatX) * dim1,
out_height * out_width)
base_y0 = base + y0 * dim2
base_y1 = base + y1 * dim2
idx_a = base_y0 + x0
idx_b = base_y1 + x0
idx_c = base_y0 + x1
idx_d = base_y1 + x1
# use indices to lookup pixels for all samples
im_flat = im.reshape((-1, channels))
Ia = im_flat[T.cast(idx_a, 'int64')]
Ib = im_flat[T.cast(idx_b, 'int64')]
Ic = im_flat[T.cast(idx_c, 'int64')]
Id = im_flat[T.cast(idx_d, 'int64')]
# calculate interpolated values
wa = ((x1_f - x) * (y1_f - y)).dimshuffle(0, 'x')
wb = ((x1_f - x) * (y - y0_f)).dimshuffle(0, 'x')
wc = ((x - x0_f) * (y1_f - y)).dimshuffle(0, 'x')
wd = ((x - x0_f) * (y - y0_f)).dimshuffle(0, 'x')
output = T.sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0)
return output
示例9: _log_add_3
def _log_add_3(log_a, log_b, log_c):
"""Theano expression for log(a+b+c) given log(a), log(b), log(c)."""
smaller = T.minimum(log_a, log_b)
larger = T.maximum(log_a, log_b)
largest = T.maximum(larger, log_c)
larger = T.minimum(larger, log_c)
return largest + T.log1p(
T.exp(smaller - largest) +
T.exp(larger - largest)
)
示例10: past_weight_grad_step
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
"""
Do an efficient update of the weights given the two spike-update.
(This still runs FING SLOWLY!)
:param xs: An (n_in) vector
:param es: An (n_out) vector
:param kp_x:
:param kd_x:
:param kp_e:
:param kd_e:
:param shapes: (n_in, n_out)
:return:
"""
kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
n_in, n_out = shape
rx = kd_x/(kp_x+kd_x)
re = kd_e/(kp_e+kd_e)
tx_last = create_shared_variable(np.zeros(n_in)+1)
te_last = create_shared_variable(np.zeros(n_out)+1)
x_last = create_shared_variable(np.zeros(n_in))
e_last = create_shared_variable(np.zeros(n_out))
x_spikes = tt.neq(xs, 0)
e_spikes = tt.neq(es, 0)
x_spike_ixs, = tt.nonzero(x_spikes)
e_spike_ixs, = tt.nonzero(e_spikes)
if dws is None:
dws = tt.zeros(shape)
t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last) # (n_x_spikes, n_out)
dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
* rx**(tx_last[x_spike_ixs, None]-t_last)
* re**(te_last[None, :]-t_last)
* geoseries_sum(re*rx, t_end=t_last, t_start=1)
)
new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
new_tx_last = tt.switch(x_spikes, 0, tx_last)
t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs]) # (n_in, n_e_spikes)
dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
* rx**(new_tx_last[:, None]-t_last)
* re**(te_last[None, e_spike_ixs]-t_last)
* geoseries_sum(re*rx, t_end=t_last, t_start=1)
)
add_update(x_last, new_x_last)
add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
add_update(tx_last, new_tx_last+1)
add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
return dws
示例11: perform
def perform(self, x):
Pmax = self.params[0]
Pmin = self.params[1]
if x.ndim==3:
Pmin = Pmin.dimshuffle('x', 'x', 0)
Pmax = Pmax.dimshuffle('x', 'x', 0)
return T.minimum(T.maximum(Pmin, x), Pmax)
else:
Pmin = Pmin.dimshuffle('x', 0)
Pmax = Pmax.dimshuffle('x', 0)
return T.minimum(T.maximum(Pmin, x), Pmax)
示例12: clip_boxes
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
"""
# x1 >= 0
boxes = T.set_subtensor(boxes[:, 0::4], T.maximum(T.minimum(boxes[:, 0::4], im_shape[1] - 1), 0))
# y1 >= 0
boxes = T.set_subtensor(boxes[:, 1::4], T.maximum(T.minimum(boxes[:, 1::4], im_shape[0] - 1), 0))
# x2 < im_shape[1]
boxes = T.set_subtensor(boxes[:, 2::4], T.maximum(T.minimum(boxes[:, 2::4], im_shape[1] - 1), 0))
# y2 < im_shape[0]
boxes = T.set_subtensor(boxes[:, 3::4], T.maximum(T.minimum(boxes[:, 3::4], im_shape[0] - 1), 0))
return boxes
示例13: create_activation
def create_activation(activation):
'''Given an activation description, return a callable that implements it.
Parameters
----------
activation : string
A string description of an activation function to use.
Returns
-------
activation : callable(float) -> float
A callable activation function.
'''
def compose(a, b):
c = lambda z: b(a(z))
c.__theanets_name__ = '%s(%s)' % (b.__theanets_name__, a.__theanets_name__)
return c
if '+' in activation:
return functools.reduce(
compose, (create_activation(a) for a in activation.split('+')))
options = {
'tanh': TT.tanh,
'linear': lambda z: z,
'logistic': TT.nnet.sigmoid,
'sigmoid': TT.nnet.sigmoid,
'softplus': TT.nnet.softplus,
'softmax': softmax,
# rectification
'relu': lambda z: TT.maximum(0, z),
'trel': lambda z: TT.maximum(0, TT.minimum(1, z)),
'trec': lambda z: TT.maximum(1, z),
'tlin': lambda z: z * (abs(z) > 1),
# modifiers
'rect:max': lambda z: TT.minimum(1, z),
'rect:min': lambda z: TT.maximum(0, z),
# normalization
'norm:dc': lambda z: z - z.mean(axis=-1, keepdims=True),
'norm:max': lambda z: z / TT.maximum(TT.cast(1e-7, FLOAT), abs(z).max(axis=-1, keepdims=True)),
'norm:std': lambda z: z / TT.maximum(TT.cast(1e-7, FLOAT), TT.std(z, axis=-1, keepdims=True)),
'norm:z': lambda z: (z - z.mean(axis=-1, keepdims=True)) / TT.maximum(TT.cast(1e-7, FLOAT), z.std(axis=-1, keepdims=True)),
}
for k, v in options.items():
v.__theanets_name__ = k
try:
return options[activation.lower()]
except KeyError:
raise KeyError('unknown activation {}'.format(activation))
示例14: __step
def __step(img, prev_bbox, state, timestep):
conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half')
act1 = NN.relu(conv1)
flat1 = TT.reshape(act1, (-1, conv1_output_dim))
gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
gru_h = (1 - gru_z) * state + gru_z * gru_h_
bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)
bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row
bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col
bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row
bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col
x = TT.arange(img_row, dtype=T.config.floatX)
y = TT.arange(img_col, dtype=T.config.floatX)
mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4)
my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4)
bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1)
new_cls1_f = cls_f
new_cls1_b = cls_b
mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2)
new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask))
new_featmaps.name = 'new_featmaps'
new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask))
new_probmaps.name = 'new_probmaps'
train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col)))
train_featmaps.name = 'train_featmaps'
train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1])
train_probmaps.name = 'train_probmaps'
for _ in range(0, 5):
train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col))
train_convmaps.name = 'train_convmaps'
train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col))
train_convmaps_selected.name = 'train_convmaps_selected'
train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x'))
train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean()
train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b])
new_cls1_f -= train_grad_cls1_f * 0.1
new_cls1_b -= train_grad_cls1_b * 0.1
return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
示例15: get_constraint_updates
def get_constraint_updates(self):
constraint_updates = OrderedDict()
if self.flags['scalar_lambd']:
constraint_updates[self.lambd] = T.mean(self.lambd) * T.ones_like(self.lambd)
# constraint filters to have unit norm
if self.flags['wv_norm'] in ('unit', 'max_unit'):
wv = constraint_updates.get(self.Wv, self.Wv)
wv_norm = T.sqrt(T.sum(wv**2, axis=0))
if self.flags['wv_norm'] == 'unit':
constraint_updates[self.Wv] = wv / wv_norm
elif self.flags['wv_norm'] == 'max_unit':
constraint_updates[self.Wv] = wv / wv_norm * T.minimum(wv_norm, 1.0)
constraint_updates[self.scalar_norms] = T.maximum(1.0, self.scalar_norms)
## clip parameters to maximum values (if applicable)
for (k,v) in self.clip_max.iteritems():
assert k in [param.name for param in self.params()]
param = constraint_updates.get(k, getattr(self, k))
constraint_updates[param] = T.clip(param, param, v)
## clip parameters to minimum values (if applicable)
for (k,v) in self.clip_min.iteritems():
assert k in [param.name for param in self.params()]
param = constraint_updates.get(k, getattr(self, k))
constraint_updates[param] = T.clip(constraint_updates.get(param, param), v, param)
return constraint_updates