本文整理汇总了Python中theano.tensor.isinf函数的典型用法代码示例。如果您正苦于以下问题:Python isinf函数的具体用法?Python isinf怎么用?Python isinf使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了isinf函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: updates
def updates(self, cost):
grad = T.grad(cost, self.param)
grad2 = hessian_diagonal(cost, self.param, grad=grad)
# calculate memory constants
tau_rec = 1.0 / self.tau
tau_inv_rec = 1.0 - tau_rec
# new moving average of gradient
g_avg_new = tau_inv_rec * self.g_avg + tau_rec * grad
# new moving average of squared gradient
v_avg_new = tau_inv_rec * self.v_avg + tau_rec * grad**2
# new moving average of hessian diagonal
h_avg_new = tau_inv_rec * self.h_avg + tau_rec * T.abs_(grad2)
rate_unsafe = (g_avg_new ** 2) / (v_avg_new * h_avg_new)
rate = T.switch(T.isinf(rate_unsafe) | T.isnan(rate_unsafe), self.learning_rate, rate_unsafe)
tau_unsafe = (1 - (g_avg_new ** 2) / v_avg_new) * self.tau + 1
tau_new = T.switch(T.isnan(tau_unsafe) | T.isinf(tau_unsafe), self.tau, tau_unsafe)
return [(self.g_avg, g_avg_new),
(self.v_avg, v_avg_new),
(self.h_avg, h_avg_new),
(self.tau, tau_new),
(self.last_grad, grad),
(self.last_grad2, grad2),
(self.last_rate, rate),
(self.param, self.param - rate * grad)]
示例2: get_updates
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
epsilon=1e-8, grads=None):
# Gradients
if grads is None:
grads = tensor.grad(loss, self.trainables)
# Clipping
norm = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
m = theanotools.clipping_multiplier(norm, max_norm)
grads = [m*g for g in grads]
# Safeguard against numerical instability
new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
tensor.or_(norm < 0, norm > 1e10))
grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]
# Safeguard against numerical instability
#cond = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
#grads = [tensor.switch(cond, np.float32(0), g) for g in grads]
# New values
t = self.time + 1
lr_t = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
vars_t = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
steps = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
for m_t, v_t in zip(means_t, vars_t)]
# Updates
updates = [(x, x - step) for x, step in zip(self.trainables, steps)]
updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
updates += [(self.time, t)]
return norm, grads, updates
示例3: adamgc_
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.0)
i = shared(floatX(0.0))
i_t = i + 1.0
fix1 = 1.0 - (1.0 - b1) ** i_t
fix2 = 1.0 - (1.0 - b2) ** i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.0)
v = shared(p.get_value() * 0.0)
m_t = (b1 * g) + ((1.0 - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1.0 - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
# e_t = shared(p.get_value() * 0.)
# de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05 #*p_t
# p_t = p_t + de_t
# updates.append((e_t, e_t + de_t))
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
示例4: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
"""
for p, g in grads.items():
grads[p] = g / self.batch_size
g_norm = 0.
for g in grads.values():
g_norm += (g**2).sum()
"""
g_norm = 0.
for p, g in grads.items():
g /= self.batch_size
grads[p] = g
g_norm += (g**2).sum()
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
mainloop.grads = grads
示例5: recurrence
def recurrence(log_p_curr, log_p_prev, skip_mask=None):
if skip_mask is None:
skip_mask = T.ones_like(log_p_curr[:, 1:-2:2])
# normalise and bring back to p space
k = T.max(log_p_prev, axis=1, keepdims=True)
norm_p_prev = T.switch(
T.isinf(log_p_prev), 0, T.exp(log_p_prev - k)) # set -inf to 0
# previous
_result = norm_p_prev
# add shift of previous
_result = T.inc_subtensor(_result[:, 1:], norm_p_prev[:, :-1])
# add skips of previous
_result = T.inc_subtensor(_result[:, 3::2],
T.switch(skip_mask,norm_p_prev[:, 1:-2:2],0))
# current
# log(p) should be 0 for first 2 terms
result = T.switch(
T.eq(_result, 0),
-np.inf,
log_p_curr + T.log(_result) + k
)
return result
示例6: get_gradients
def get_gradients(self, model, data, ** kwargs):
cost = self.expr(model=model, data=data, **kwargs)
params = list(model.get_params())
grads = T.grad(cost, params, disconnected_inputs='ignore')
gradients = OrderedDict(izip(params, grads))
if self.gradient_clipping:
norm_gs = 0.
for grad in gradients.values():
norm_gs += (grad ** 2).sum()
not_finite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
norm_gs = T.sqrt(norm_gs)
norm_gs = T.switch(T.ge(norm_gs, self.max_magnitude),
self.max_magnitude / norm_gs,
1.)
for param, grad in gradients.items():
gradients[param] = T.switch(not_finite,
.1 * param,
grad * norm_gs)
updates = OrderedDict()
return gradients, updates
示例7: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
g_norm = 0.
for p, g in grads.items():
g /= T.cast(self.batch_size, dtype=theano.config.floatX)
grads[p] = g
g_norm += (g**2).sum()
if self.check_nan:
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
if self.check_nan:
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
else:
for p, g in grads.items():
grads[p] = g * scaler
mainloop.grads = grads
示例8: compute_updates
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Clip stuff
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates = Adam(grads)
else:
raise Exception("Updater not understood!")
return updates
示例9: minimize
def minimize(self, loss, momentum, rescale):
super(RMSPropOptimizer, self).minimize(loss)
grads = self.gradparams
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
params = self.params
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - self.lr * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * self.lr * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例10: adamgc
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
示例11: surface_pts
def surface_pts(self, rayField):
rf = self.w2o(rayField)
distance = self.distance(rayField)
stabilized = T.switch(T.isinf(distance), 1000, distance)
return rf.origin + (stabilized.dimshuffle(0, 1, 'x') * rays)
示例12: updates
def updates(self, cost, params, learning_rate = 0.1, momentum= 0.95, rescale=5.):
grads = T.grad(cost, params)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1e-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例13: compute_step
def compute_step(self, parameter, previous_step):
step_sum = tensor.sum(previous_step)
not_finite = (tensor.isnan(step_sum) +
tensor.isinf(step_sum))
step = tensor.switch(
not_finite > 0, (1 - self.scaler) * parameter, previous_step)
return step, []
示例14: graves_rmsprop_updates
def graves_rmsprop_updates(self, params, grads, learning_rate=1e-4, alpha=0.9, epsilon=1e-4, chi=0.95):
"""
Alex Graves' RMSProp [1]_.
.. math ::
n_{i} &= \chi * n_i-1 + (1 - \chi) * grad^{2}\\
g_{i} &= \chi * g_i-1 + (1 - \chi) * grad\\
\Delta_{i} &= \alpha * Delta_{i-1} - learning_rate * grad /
sqrt(n_{i} - g_{i}^{2} + \epsilon)\\
w_{i} &= w_{i-1} + \Delta_{i}
References
----------
.. [1] Graves, Alex.
"Generating Sequences With Recurrent Neural Networks", p.23
arXiv:1308.0850
"""
updates = []
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param, grad)
old_square = self.running_square_[n]
old_avg = self.running_avg_[n]
old_memory = self.memory_[n]
new_square = chi * old_square + (1. - chi) * grad ** 2
new_avg = chi * old_avg + (1. - chi) * grad
new_memory = alpha * old_memory - learning_rate * grad / T.sqrt(new_square - \
new_avg ** 2 + epsilon)
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((old_memory, new_memory))
updates.append((param, param + new_memory))
return updates
示例15: shade
def shade(self, shape, lights, camera):
# See: http://en.wikipedia.org/wiki/Phong_reflection_model#Description
# Since our material params are 1d we calculate bw shadings first and
# convert to color after
light = lights[0]
material = shape.material
normals = shape.normals(camera.rays)
ambient_light = material.ka
# diffuse (lambertian)
diffuse_shadings = material.kd*T.tensordot(normals, -light.normed_dir(), 1)
# specular
rm = 2.0*(T.tensordot(normals, -light.normed_dir(), 1).dimshuffle(
0, 1, 'x'))*normals + light.normed_dir()
specular_shadings = material.ks*(T.tensordot(rm, camera.look_at, 1) ** material.shininess)
# phong
phong_shadings = ambient_light + diffuse_shadings + specular_shadings
colorized = phong_shadings.dimshuffle(0, 1, 'x') * material.color.dimshuffle('x', 'x', 0) * light.intensity.dimshuffle('x', 'x', 0)
clipped = T.clip(colorized, 0, 1)
distances = shape.distance(camera.rays)
return broadcasted_switch(T.isinf(distances), [0., 0., 0.], clipped)