本文整理汇总了Python中theano.tensor.or_方法的典型用法代码示例。如果您正苦于以下问题:Python tensor.or_方法的具体用法?Python tensor.or_怎么用?Python tensor.or_使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类theano.tensor
的用法示例。
在下文中一共展示了tensor.or_方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sgdmgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def sgdmgc(cost, params, lr=1.0, alpha=0.1, max_magnitude=5.0, infDecay=0.1):
"""SGD with momentum and gradient clipping"""
grads = T.grad(cost=cost, wrt=params)
updates = []
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
for p, g in zip(params, grads):
v = shared(p.get_value() * 0.)
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
v_new = v * (1.0 - alpha) - alpha * lr * g
updates.append((v, v_new))
updates.append((p, p + v_new ))
return updates, norm
示例2: gradient_descent
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def gradient_descent(self, params, gparams, learning_rate):
"""Momentum GD with gradient clipping."""
#grad = T.grad(loss, self.params)
self.momentum_velocity_ = [0.] * len(gparams)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams)))
updates = OrderedDict()
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
scaling_den = T.maximum(5.0, grad_norm)
for n, (param, grad) in enumerate(zip(params, gparams)):
grad = T.switch(not_finite, 0.1 * param,
grad * (5.0 / scaling_den))
velocity = self.momentum_velocity_[n]
update_step = self.momentum * velocity - learning_rate * grad
self.momentum_velocity_[n] = update_step
updates[param] = param + update_step
return updates
##################### calculate total loss #######################
# only loss D
示例3: gradient_descent
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def gradient_descent(self, loss):
"""Momentum GD with gradient clipping."""
grad = T.grad(loss, self.params)
self.momentum_velocity_ = [0.] * len(grad)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
updates = OrderedDict()
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
scaling_den = T.maximum(5.0, grad_norm)
for n, (param, grad) in enumerate(zip(self.params, grad)):
grad = T.switch(not_finite, 0.1 * param,
grad * (5.0 / scaling_den))
velocity = self.momentum_velocity_[n]
update_step = self.momentum * velocity - self.learning_rate * grad
self.momentum_velocity_[n] = update_step
updates[param] = param + update_step
return updates
示例4: gradient_clipping
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def gradient_clipping(grads, tparams, clip_c=10):
g2 = 0.
for g in grads:
g2 += (g**2).sum()
g2 = tensor.sqrt(g2)
not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
new_grads = []
for p, g in zip(tparams.values(), grads):
new_grads.append(tensor.switch(g2 > clip_c,
g * (clip_c / g2),
g))
return new_grads, not_finite, tensor.lt(clip_c, g2)
示例5: compute_updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Clip stuff
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates = Adam(grads)
else:
raise Exception("Updater not understood!")
return updates
示例6: sgdgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def sgdgc(cost, params, lr=1.0, max_magnitude=5.0, infDecay=0.1):
"""SGD with gradient clipping"""
grads = T.grad(cost=cost, wrt=params)
updates = []
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
#not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
for p, g in zip(params, grads):
#g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
updates.append((p, p - lr * g * adj_norm_gs))
return updates, norm
示例7: adamgc_
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
#e_t = shared(p.get_value() * 0.)
#de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05 #*p_t
#p_t = p_t + de_t
#updates.append((e_t, e_t + de_t))
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
示例8: adamgc
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
#--------------------------------------------------------------------------------------------------
示例9: __init__
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def __init__(self, low=0.0, high=1.0):
"""Constructor.
Parameters
----------
* `low` [float]:
The lower bound.
* `high` [float]:
The upper bound
"""
super(Uniform, self).__init__(low=low, high=high)
# pdf
self.pdf_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
0.,
1. / (self.high - self.low)).ravel()
self._make(self.pdf_, "pdf")
# -log pdf
self.nll_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
np.inf,
T.log(self.high - self.low)).ravel()
self._make(self.nll_, "nll")
# cdf
self.cdf_ = T.switch(
T.lt(self.X, self.low),
0.,
T.switch(
T.lt(self.X, self.high),
(self.X - self.low) / (self.high - self.low),
1.)).ravel()
self._make(self.cdf_, "cdf")
# ppf
self.ppf_ = self.p * (self.high - self.low) + self.low
self._make(self.ppf_, "ppf", args=[self.p])
示例10: updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def updates(self, params, grads, learning_rate, momentum, rescale=5.):
grad_norm = tensor.sqrt(sum(map(lambda x: tensor.sqr(x).sum(), grads)))
not_finite = tensor.or_(tensor.isnan(grad_norm),
tensor.isinf(grad_norm))
grad_norm = tensor.sqrt(grad_norm)
scaling_num = rescale
scaling_den = tensor.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = tensor.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * tensor.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = tensor.sqrt(new_square - new_avg ** 2)
rms_grad = tensor.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例11: get_output_for
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def get_output_for(self, input, deterministic=False, **kwargs):
if deterministic or self.p == 0:
return T.ones_like(self.retain, dtype=input.dtype)
else:
# Using theano constant to prevent upcasting
# one = T.constant(1)
# retain_prob = one - self.p
# if self.rescale:
# input /= retain_prob
# use nonsymbolic shape for dropout mask if possible
mask_shape = self.input_shape
if any(s is None for s in mask_shape):
mask_shape = input.shape
# apply dropout, respecting shared axes
if self.shared_axes:
shared_axes = tuple(a if a >= 0 else a + input.ndim
for a in self.shared_axes)
mask_shape = tuple(1 if a in shared_axes else s
for a, s in enumerate(mask_shape))
mask = self._srng.binomial(mask_shape, p=self.retain,
dtype=input.dtype)
mask = T.or_(mask, self.previous_mask)
if self.shared_axes:
bcast = tuple(bool(s == 1) for s in mask_shape)
mask = T.patternbroadcast(mask, bcast)
return mask
示例12: compute_updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Gradient clipping
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.W_emb in grads:
if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
assert not self.fix_encoder_parameters
# Keep pretrained word embeddings fixed
logger.debug("Will use mask to fix pretrained word embeddings")
grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
elif self.fix_encoder_parameters:
# If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
else:
logger.debug("Will train all word embeddings")
optimizer_variables = []
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates, optimizer_variables = Adam(grads, self.lr)
else:
raise Exception("Updater not understood!")
return updates, optimizer_variables
# Batch training function.
示例13: rmsprop
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):
grads = T.grad(cost=cost, wrt=params)
running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
for p in params]
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例14: compute_updates
# 需要导入模块: from theano import tensor [as 别名]
# 或者: from theano.tensor import or_ [as 别名]
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Gradient clipping
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
assert not self.fix_encoder_parameters
# Keep pretrained word embeddings fixed
logger.debug("Will use mask to fix pretrained word embeddings")
grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
elif self.fix_encoder_parameters:
# If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
else:
logger.debug("Will train all word embeddings")
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates = Adam(grads, self.lr)
else:
raise Exception("Updater not understood!")
return updates
# Batch training function.