本文整理汇总了Python中theano.tensor.isinf方法的典型用法代码示例。如果您正苦于以下问题:Python tensor.isinf方法的具体用法?Python tensor.isinf怎么用?Python tensor.isinf使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类theano.tensor
的用法示例。
在下文中一共展示了tensor.isinf方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sgdmgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def sgdmgc(cost, params, lr=1.0, alpha=0.1, max_magnitude=5.0, infDecay=0.1):
"""SGD with momentum and gradient clipping"""
grads = T.grad(cost=cost, wrt=params)
updates = []
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
for p, g in zip(params, grads):
v = shared(p.get_value() * 0.)
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
v_new = v * (1.0 - alpha) - alpha * lr * g
updates.append((v, v_new))
updates.append((p, p + v_new ))
return updates, norm
示例2: gradient_descent
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def gradient_descent(self, params, gparams, learning_rate):
"""Momentum GD with gradient clipping."""
#grad = T.grad(loss, self.params)
self.momentum_velocity_ = [0.] * len(gparams)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams)))
updates = OrderedDict()
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
scaling_den = T.maximum(5.0, grad_norm)
for n, (param, grad) in enumerate(zip(params, gparams)):
grad = T.switch(not_finite, 0.1 * param,
grad * (5.0 / scaling_den))
velocity = self.momentum_velocity_[n]
update_step = self.momentum * velocity - learning_rate * grad
self.momentum_velocity_[n] = update_step
updates[param] = param + update_step
return updates
##################### calculate total loss #######################
# only loss D
示例3: gradient_descent
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def gradient_descent(self, loss):
"""Momentum GD with gradient clipping."""
grad = T.grad(loss, self.params)
self.momentum_velocity_ = [0.] * len(grad)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
updates = OrderedDict()
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
scaling_den = T.maximum(5.0, grad_norm)
for n, (param, grad) in enumerate(zip(self.params, grad)):
grad = T.switch(not_finite, 0.1 * param,
grad * (5.0 / scaling_den))
velocity = self.momentum_velocity_[n]
update_step = self.momentum * velocity - self.learning_rate * grad
self.momentum_velocity_[n] = update_step
updates[param] = param + update_step
return updates
示例4: isvalid
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def isvalid(x):
return T.all(T.logical_not(T.logical_or(T.isnan(x), T.isinf(x))))
示例5: gradient_clipping
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def gradient_clipping(grads, tparams, clip_c=10):
g2 = 0.
for g in grads:
g2 += (g**2).sum()
g2 = tensor.sqrt(g2)
not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
new_grads = []
for p, g in zip(tparams.values(), grads):
new_grads.append(tensor.switch(g2 > clip_c,
g * (clip_c / g2),
g))
return new_grads, not_finite, tensor.lt(clip_c, g2)
示例6: compute_updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Clip stuff
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates = Adam(grads)
else:
raise Exception("Updater not understood!")
return updates
示例7: sgdgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def sgdgc(cost, params, lr=1.0, max_magnitude=5.0, infDecay=0.1):
"""SGD with gradient clipping"""
grads = T.grad(cost=cost, wrt=params)
updates = []
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
#not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
for p, g in zip(params, grads):
#g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
updates.append((p, p - lr * g * adj_norm_gs))
return updates, norm
示例8: adamgc_
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
#e_t = shared(p.get_value() * 0.)
#de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05 #*p_t
#p_t = p_t + de_t
#updates.append((e_t, e_t + de_t))
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
示例9: adamgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
#--------------------------------------------------------------------------------------------------
示例10: compute_step
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def compute_step(self, parameter, previous_step):
step_sum = tensor.sum(previous_step)
not_finite = (tensor.isnan(step_sum) +
tensor.isinf(step_sum))
step = tensor.switch(
not_finite > 0, (1 - self.scaler) * parameter, previous_step)
return step, []
示例11: updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def updates(self, params, grads, learning_rate, momentum, rescale=5.):
grad_norm = tensor.sqrt(sum(map(lambda x: tensor.sqr(x).sum(), grads)))
not_finite = tensor.or_(tensor.isnan(grad_norm),
tensor.isinf(grad_norm))
grad_norm = tensor.sqrt(grad_norm)
scaling_num = rescale
scaling_den = tensor.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = tensor.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * tensor.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = tensor.sqrt(new_square - new_avg ** 2)
rms_grad = tensor.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例12: remove_nans
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def remove_nans(x):
return T.switch(T.isnan(x) + T.isinf(x), 0, x)
示例13: compute_activation
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def compute_activation(scaled_time_vector):
compare = tt.isinf(scaled_time_vector)
subvector = scaled_time_vector[(1-compare).nonzero()]
activation_from_time = tt.log(subvector.sum())
return activation_from_time
示例14: compute_updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Gradient clipping
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.W_emb in grads:
if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
assert not self.fix_encoder_parameters
# Keep pretrained word embeddings fixed
logger.debug("Will use mask to fix pretrained word embeddings")
grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
elif self.fix_encoder_parameters:
# If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
else:
logger.debug("Will train all word embeddings")
optimizer_variables = []
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates, optimizer_variables = Adam(grads, self.lr)
else:
raise Exception("Updater not understood!")
return updates, optimizer_variables
# Batch training function.
示例15: rmsprop
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import isinf [as 别名]
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):
grads = T.grad(cost=cost, wrt=params)
running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates