本文整理汇总了Python中theano.tensor.sgn函数的典型用法代码示例。如果您正苦于以下问题:Python sgn函数的具体用法?Python sgn怎么用?Python sgn使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sgn函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mean_field_fancy_step
def mean_field_fancy_step(self, V, P, Mu):
iterm = T.dot(T.dot(P*Mu,self.W.T*self.beta),self.W)
normalized_V = self.beta * (V-self.b)
main_term = T.dot(normalized_V, self.W)
iA = self.w * P*Mu - iterm
full_A = iA + main_term+self.a
Mu1 = full_A / self.gamma
Q = self.Q_from_A( full_A)
iMu = iA / self.gamma
#if this is negative, we are ammplifying so we use default damping
#if this is positive, we are flipping, use max(0,lambda(tau))
discriminant = T.sgn(Mu-iMu) * Mu/(1e-10+abs(Mu-iMu))
Lambda = self.tau * discriminant - T.sgn(Mu-iMu) * iMu/(1e-10+abs(Mu-iMu))
mask = discriminant <= 0
fancy_damp = mask*self.s_default_damping_factor + (1.-mask)*T.maximum(0.,Lambda)
return Q, Mu1, fancy_damp
示例2: irprop_minus_updates
def irprop_minus_updates(params, grads):
# IRPROP- parameters
updates = []
deltas = 0.1*numpy.ones(len(params))
last_params = params
positiveStep = 1.2
negativeStep = 0.5
maxStep = 1.
minStep = math.exp(-6)
for param, gparam, delta, last_gparam in zip(params, grads, deltas, last_params):
# calculate change
change = T.sgn(gparam * last_gparam)
if T.gt(change, 0) :
delta = T.minimum(delta * positiveStep, maxStep)
elif T.lt(change, 0):
delta = T.maximum(delta * negativeStep, minStep)
last_gparam = 0
# update the weights
updates.append((param, param - T.sgn(gparam) * delta))
# store old change
last_gparam = gparam
return updates
示例3: get_updates
def get_updates(self, v):
# Contrastive divergence
chain_end, updates_CD = self.CD(self, chain_start=v, cdk=self.CDk)
# [Expected] negative log-likelihood
cost = T.mean(self.free_energy(v), axis=0) - T.mean(self.free_energy(chain_end), axis=0)
# L2 Regularization
if isinstance(self.regularize, L2Regularization):
cost += self.regularization
# Gradients (use automatic differentiation)
# We must not compute the gradient through the gibbs sampling, i.e. use consider_constant
gparams = T.grad(cost, self.parameters, consider_constant=[chain_end])
gradients = dict(zip(self.parameters, gparams))
# Get learning rates for all params given their gradient.
lr, updates_lr = self.learning_rate(gradients)
updates = OrderedDict()
updates.update(updates_CD) # Add updates from CD
updates.update(updates_lr) # Add updates from learning_rate
# Updates parameters
for param, gparam in gradients.items():
updates[param] = param - lr[param] * gradients[param]
if isinstance(self.regularize, L1Regularization):
updates[self.b] = T.sgn(updates[self.b]) * T.maximum(abs(updates[self.b]) - lr[self.b]*self.regularize.decay, 0)
updates[self.W] = T.sgn(updates[self.W]) * T.maximum(abs(updates[self.W]) - lr[self.W]*self.regularize.decay, 0)
return updates
示例4: irprop_plus_trainer
def irprop_plus_trainer(x, y, w, parameters, loss, random_stream,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
"""IRPROP+ is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428
:param positive_step: factor, by which the step is increased when continuing going in the direction
:param negative_step: factor, by which the step is increased when changing direction to opposite
:param min_step: minimal change of weight during iteration
:param max_step: maximal change of weight during iteration
"""
loss_value = loss(x, y, w)
prev_loss_value = theano.shared(1e10)
shareds = [prev_loss_value]
updates = []
for name, param in parameters.items():
old_derivative = theano.shared(param.get_value() * 0.)
delta = theano.shared(param.get_value() * 0. + 1e-3)
new_derivative = T.grad(loss_value, param)
shift_if_bad_step = T.where(new_derivative * old_derivative < 0, delta * T.sgn(old_derivative), 0)
shift = ifelse(loss_value > prev_loss_value, shift_if_bad_step, 0. * param)
# unfortunately we can't do it this way: param += shift
new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
new_delta = T.clip(new_delta, min_step, max_step)
updates.append([param, param + shift - new_delta * T.sgn(new_derivative)])
updates.append([delta, new_delta])
new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
updates.append([old_derivative, new_old_derivative])
shareds.extend([old_derivative, delta])
updates.append([prev_loss_value, loss_value])
return shareds, updates
示例5: irprop_plus_trainer
def irprop_plus_trainer(x, y, w, parameters, loss, random_stream,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
"""IRPROP+ trainer, see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.1332"""
loss_value = loss(x, y, w)
prev_loss_value = theano.shared(1e10)
shareds = []
updates = []
for name, param in parameters.iteritems():
old_derivative = theano.shared(param.get_value() * 0.)
delta = theano.shared(param.get_value() * 0. + 1e-3)
new_derivative = T.grad(loss_value, param)
shift_if_bad_step = T.where(new_derivative * old_derivative < 0, delta * T.sgn(old_derivative), 0)
# THIS doesn't work!
shift = ifelse(loss_value > prev_loss_value, shift_if_bad_step, 0. * param)
# unfortunately we can't do it this way: param += shift
new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
new_delta = T.clip(new_delta, min_step, max_step)
updates.append([param, param + shift - new_delta * T.sgn(new_derivative)])
updates.append([delta, new_delta])
new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
updates.append([old_derivative, new_old_derivative])
shareds.extend([old_derivative, delta, prev_loss_value])
updates.append([prev_loss_value, loss_value])
return shareds, updates
示例6: rprop
def rprop(param,learning_rate,gparam,mask,updates,current_cost,previous_cost,
eta_plus=1.2,eta_minus=0.5,max_delta=50, min_delta=10e-6):
previous_grad = sharedX(numpy.ones(param.shape.eval()),borrow=True)
delta = sharedX(learning_rate * numpy.ones(param.shape.eval()),borrow=True)
previous_inc = sharedX(numpy.zeros(param.shape.eval()),borrow=True)
zero = T.zeros_like(param)
one = T.ones_like(param)
change = previous_grad * gparam
new_delta = T.clip(
T.switch(
T.eq(gparam,0.),
delta,
T.switch(
T.gt(change,0.),
delta*eta_plus,
T.switch(
T.lt(change,0.),
delta*eta_minus,
delta
)
)
),
min_delta,
max_delta
)
new_previous_grad = T.switch(
T.eq(mask * gparam,0.),
previous_grad,
T.switch(
T.gt(change,0.),
gparam,
T.switch(
T.lt(change,0.),
zero,
gparam
)
)
)
inc = T.switch(
T.eq(mask * gparam,0.),
zero,
T.switch(
T.gt(change,0.),
- T.sgn(gparam) * new_delta,
T.switch(
T.lt(change,0.),
zero,
- T.sgn(gparam) * new_delta
)
)
)
updates.append((previous_grad,new_previous_grad))
updates.append((delta,new_delta))
updates.append((previous_inc,inc))
return param + inc * mask
示例7: Update
def Update(params, gradients, velocities):
global MOMENTUM
global LEARNING_RATE
global LEARNING_RATE_DECAY
param_updates = [ (v, v * MOMENTUM - LEARNING_RATE * T.sgn(g) * T.clip(T.abs_(g), 0.0001, 9.8)) for g, v in zip(gradients, velocities) ]
for i in range(0, len(gradients)):
velocities[i] = velocities[i] * MOMENTUM - LEARNING_RATE * T.sgn(gradients[i]) * T.clip(T.abs_(gradients[i]), 0.5, 9.8)
param_updates.extend([ (p, p + v) for p, v in zip(params, velocities) ])
LEARNING_RATE *= LEARNING_RATE_DECAY
return param_updates
示例8: get_updates
def get_updates(self, params, cost):
grads_rprop = []
grads_history = []
grads_rprop_new = []
shapes = []
grads = T.grad(cost, params)
for param, grad in zip(params, grads):
shape = param.shape.eval()
shapes.append(shape)
#grad = tt.grad(loss, wrt=param)
#grads.append(grad)
# Save gradients histories for RProp.
grad_hist = theano.shared(param.get_value() * 0.0 + 1.0,
name="rpop_hist_%s" % param)
grads_history.append(
grad_hist
)
# Create variables where rprop rates will be stored.
grad_rprop = theano.shared(param.get_value() * 0.0 + self.lr,
name="rprop_%s" % param)
grads_rprop.append(grad_rprop)
# Compute the new RProp coefficients.
rprop_sign = T.sgn(grad_hist * grad)
grad_rprop_new = grad_rprop * (
T.eq(rprop_sign, 1) * self.plus
+ T.neq(rprop_sign, 1) * self.minus
)
grads_rprop_new.append(grad_rprop_new)
updates = [
# Update parameters according to the RProp update rule.
(p, p - rg * T.sgn(g))
for p, g, rg in zip(params, grads, grads_rprop_new)
] + [
# Save current gradient for the next step..
(hg, g) for hg, g in zip(
grads_history, grads)
] + [
# Save the new rprop grads.
(rg, rg_new) for rg, rg_new in zip(
grads_rprop, grads_rprop_new)
]
return updates
示例9: symGivens2
def symGivens2(a, b):
"""
Stable Symmetric Givens rotation plus reflection
Parameters
a: (theano scalar) first element of a two-vector [a; b]
b: (theano scalar) second element of a two-vector [a; b]
Returns
c cosine(theta), where theta is the implicit angle of
rotation (counter-clockwise) in a plane-rotation
s sine(theta)
d two-norm of [a; b]
Description:
This method gives c and s such that
[ c s ][a] = [d],
[ s -c ][b] [0]
where d = two norm of vector [a, b],
c = a / sqrt(a^2 + b^2) = a / d,
s = b / sqrt(a^2 + b^2) = b / d.
The implementation guards against overflow in computing
sqrt(a^2 + b^2).
SEE ALSO:
(1) Algorithm 4.9, stable *unsymmetric* Givens
rotations in Golub and van Loan's book Matrix
Computations, 3rd edition.
(2) MATLAB's function PLANEROT.
Observations:
Implementing this function as a single op in C might improve speed
considerably ..
"""
c_branch1 = T.switch(T.eq(a, constantX(0)), constantX(1), T.sgn(a))
c_branch21 = (a / b) * T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2)
c_branch22 = T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2)
c_branch2 = T.switch(T.eq(a, constantX(0)), constantX(0), T.switch(T.gt(abs(b), abs(a)), c_branch21, c_branch22))
c = T.switch(T.eq(b, constantX(0)), c_branch1, c_branch2)
s_branch1 = T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2)
s_branch2 = (b / a) * T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2)
s = T.switch(
T.eq(b, constantX(0)),
constantX(0),
T.switch(T.eq(a, constantX(0)), T.sgn(b), T.switch(T.gt(abs(b), abs(a)), s_branch1, s_branch2)),
)
d_branch1 = b / (T.sgn(b) / T.sqrt(constantX(1) + (a / b) ** 2))
d_branch2 = a / (T.sgn(a) / T.sqrt(constantX(1) + (b / a) ** 2))
d = T.switch(
T.eq(b, constantX(0)),
abs(a),
T.switch(T.eq(a, constantX(0)), abs(b), T.switch(T.gt(abs(b), abs(a)), d_branch1, d_branch2)),
)
return c, s, d
示例10: relevance_conv_z
def relevance_conv_z(out_relevances, inputs, weights, bias=None):
norms_for_relevances = conv2d(inputs, weights)
if bias is not None:
norms_for_relevances += bias.dimshuffle("x", 0, "x", "x")
# stabilize
# prevent division by 0 and division by small numbers
eps = 1e-4
norms_for_relevances += T.sgn(norms_for_relevances) * eps
norms_for_relevances += T.eq(norms_for_relevances, 0) * eps
normed_relevances = out_relevances / norms_for_relevances
# upconv
in_relevances = conv2d(normed_relevances, weights.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
in_relevances_proper = in_relevances * inputs
if bias is not None:
bias_relevance = bias.dimshuffle("x", 0, "x", "x") * normed_relevances
# Divide bias by weight size before convolving back
# mean across channel, 0, 1 dims (hope this is correct?)
fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
bias_rel_in = conv2d(
fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
)
in_relevances_proper += bias_rel_in
return in_relevances_proper
示例11: fd3
def fd3(mlp, fdm, params, globalLR1, globalLR2, momentParam1, momentParam2):
cost1 = mlp.classError1 + mlp.penalty
gradT1reg = T.grad(cost1, mlp.paramsT2)
updateT1 = []; updateT2 = []; onlyT2param = []
# take opt from Adam?
if params.opt2 in ['adam']: opt2 = adam()
else: opt2 = None
# update W - (1) + (3)
for param, uC1, uC2 in zip(mlp.paramsT1, fdm.updateC1T1, fdm.updateC2T1):
updateT1 += [(param, param + uC1 - uC2)]
# compute grad T2 of C1, update T2 - [(4) - (2) ] / lr1
for param, grad, gT2 in zip(mlp.paramsT2, gradT1reg, fdm.gradC1T2):
if params.T2onlySGN:
grad_proxi = T.sgn((grad - gT2)/step*globalLR1)
else:
grad_proxi = (grad - gT2)/step*globalLR1
tempUp, tempPair, _ = update_fun(param, T.reshape(grad_proxi, param.shape), None,
'T2', {}, opt2, params,
globalLR1, globalLR2, momentParam1, momentParam2)
updateT2 += tempUp
onlyT2param += tempPair
debugs = [check for (_, check) in onlyT2param]
return updateT1 + updateT2, debugs
示例12: irprop_minus_trainer
def irprop_minus_trainer(x, y, w, parameters, loss, random_stream,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
"""IRPROP- is batch trainer, for details see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428 .
This is default trainer, very stable for classification.
:param positive_step: factor, by which the step is increased when continuing going in the direction
:param negative_step: factor, by which the step is increased when changing direction to opposite
:param min_step: minimal change of weight during iteration
:param max_step: maximal change of weight during iteration
"""
shareds = []
updates = []
loss_value = loss(x, y, w)
for name, param in parameters.items():
old_derivative = theano.shared(param.get_value() * 0.)
delta = theano.shared(param.get_value() * 0. + 1e-3)
shareds.extend([old_derivative, delta])
new_derivative = T.grad(loss_value, param)
new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
new_delta = T.clip(new_delta, min_step, max_step)
updates.append([param, param - new_delta * T.sgn(new_derivative)])
updates.append([delta, new_delta])
new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
updates.append([old_derivative, new_old_derivative])
return shareds, updates
示例13: __abs__
def __abs__(self, other):
assert hasattr(self, 'out'), 'all layers need a default output'
new_obj = utils.copy(self)
new_obj.out = abs(new_obj.out)
if hasattr(new_obj, 'grads'):
new_obj.grads = [TT.sgn(new_obj.out) * x for x in new_obj.grads]
return new_obj
示例14: get_state
def get_state(self):
st = super(LatentTypeWithTuningCurve, self).get_state()
# The filters are non-identifiable as we can negate both the
# temporal and the spatial filters and get the same net effect.
# By convention, choose the sign that results in the most
# positive temporal filter.
sign = T.sgn(T.sum(self.stim_resp_t, axis=0))
T.addbroadcast(sign, 0)
# Similarly, we can trade a constant between the spatial and temporal
# pieces. By convention, set the temporal filter to norm 1.
Z = T.sqrt(T.sum(self.stim_resp_t**2, axis=0))
T.addbroadcast(Z, 0)
# Compute the normalized temporal response
stim_resp_t = sign*(1.0/Z)*self.stim_resp_t
# Finally, reshape the spatial component as necessary
if self.spatial_ndim == 2:
stim_resp_x = sign*Z*self.stim_resp_x
stim_resp_x = T.reshape(stim_resp_x,
self.spatial_shape + (self.R,))
else:
stim_resp_x = sign*Z*self.stim_resp_x
st.update({'stim_response_x' : stim_resp_x,
'stim_response_t' : stim_resp_t})
return st
示例15: irprop_star_trainer
def irprop_star_trainer(x, y, w, parameters, loss, random_stream,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6):
""" IRPROP* trainer (own experimental modification, not recommended for usage) """
shareds = []
updates = []
loss_value = loss(x, y, w)
for name, param in parameters.items():
param_shape = param.get_value().shape
n = numpy.prod(param_shape).astype(int)
new_derivative_ = T.grad(loss_value, param).flatten()
lnewder, rnewder = new_derivative_.reshape([n, 1]), new_derivative_.reshape([1, n])
new_derivative_plus = lnewder + rnewder
new_derivative_minus = lnewder - rnewder
new_param = param
for new_derivative in [new_derivative_plus, new_derivative_minus]:
delta = theano.shared(numpy.zeros([n, n], dtype=floatX) + 1e-3)
old_derivative = theano.shared(numpy.zeros([n, n], dtype=floatX))
new_delta = T.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
new_delta = T.clip(new_delta, min_step, max_step)
updates.append([delta, new_delta])
new_old_derivative = T.where(new_derivative * old_derivative < 0, 0, new_derivative)
updates.append([old_derivative, new_old_derivative])
new_param = new_param - (new_delta * T.sgn(new_derivative)).sum(axis=1).reshape(param.shape)
shareds.extend([old_derivative, delta])
updates.append([param, new_param])
return shareds, updates