本文整理汇总了Python中theano.tensor.or_函数的典型用法代码示例。如果您正苦于以下问题:Python or_函数的具体用法?Python or_怎么用?Python or_使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了or_函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_updates
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
epsilon=1e-8, grads=None):
# Gradients
if grads is None:
grads = tensor.grad(loss, self.trainables)
# Clipping
norm = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
m = theanotools.clipping_multiplier(norm, max_norm)
grads = [m*g for g in grads]
# Safeguard against numerical instability
new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
tensor.or_(norm < 0, norm > 1e10))
grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]
# Safeguard against numerical instability
#cond = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
#grads = [tensor.switch(cond, np.float32(0), g) for g in grads]
# New values
t = self.time + 1
lr_t = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
vars_t = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
steps = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
for m_t, v_t in zip(means_t, vars_t)]
# Updates
updates = [(x, x - step) for x, step in zip(self.trainables, steps)]
updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
updates += [(self.time, t)]
return norm, grads, updates
示例2: __init__
def __init__(self, random_state=None, low=0.0, high=1.0):
super(Uniform, self).__init__(low=low, high=high,
random_state=random_state,
optimizer=None)
# pdf
self.pdf_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
0.,
1. / (self.high - self.low)).ravel()
self.make_(self.pdf_, "pdf")
# -log pdf
self.nnlf_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
np.inf,
T.log(self.high - self.low)).ravel()
self.make_(self.nnlf_, "nnlf")
# cdf
self.cdf_ = T.switch(
T.lt(self.X, self.low),
0.,
T.switch(
T.lt(self.X, self.high),
(self.X - self.low) / (self.high - self.low),
1.)).ravel()
self.make_(self.cdf_, "cdf")
# ppf
self.ppf_ = self.p * (self.high - self.low) + self.low
self.make_(self.ppf_, "ppf", args=[self.p])
示例3: get_train
def get_train(U_Ot, U_R, lenW, n_facts):
def phi_x1(x_t, L):
return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
def phi_x2(x_t, L):
return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
def phi_y(x_t, L):
return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)
def phi_t(x_t, y_t, yp_t, L):
return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), 1, 0), T.switch(T.lt(x_t,yp_t), 1, 0), T.switch(T.lt(y_t,yp_t), 1, 0))], axis=0)
def s_Ot(xs, y_t, yp_t, L):
result, updates = theano.scan(
lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_Ot.T),
T.dot(U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
sequences=[xs, T.arange(T.shape(xs)[0])])
return result.sum()
def sR(xs, y_t, L, V):
result, updates = theano.scan(
lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_R.T),
T.dot(U_R, phi_y(y_t, V))),
sequences=[xs, T.arange(T.shape(xs)[0])])
return result.sum()
x_t = T.iscalar('x_t')
m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
r_t = T.iscalar('r_t')
gamma = T.scalar('gamma')
L = T.fmatrix('L') # list of messages
V = T.fmatrix('V') # vocab
r_args = T.stack(*m)
cost_arr = [0] * 2 * (len(m)-1)
updates_arr = [0] * 2 * (len(m)-1)
for i in xrange(len(m)-1):
cost_arr[2*i], updates_arr[2*i] = theano.scan(
lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)),
sequences=[L, T.arange(T.shape(L)[0])])
cost_arr[2*i+1], updates_arr[2*i+1] = theano.scan(
lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)),
sequences=[L, T.arange(T.shape(L)[0])])
cost1, u1 = theano.scan(
lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
sequences=[V, T.arange(T.shape(V)[0])])
cost = cost1.sum()
for c in cost_arr:
cost += c.sum()
g_uo, g_ur = T.grad(cost, [U_Ot, U_R])
train = theano.function(
inputs=[r_t, gamma, L, V] + m + f,
outputs=[cost],
updates=[(U_Ot, U_Ot-alpha*g_uo), (U_R, U_R-alpha*g_ur)])
return train
示例4: get_output_for
def get_output_for(self, input, deterministic=False, **kwargs):
if deterministic or self.p == 0:
return T.ones_like(self.retain, dtype=input.dtype)
else:
# Using theano constant to prevent upcasting
# one = T.constant(1)
# retain_prob = one - self.p
# if self.rescale:
# input /= retain_prob
# use nonsymbolic shape for dropout mask if possible
mask_shape = self.input_shape
if any(s is None for s in mask_shape):
mask_shape = input.shape
# apply dropout, respecting shared axes
if self.shared_axes:
shared_axes = tuple(a if a >= 0 else a + input.ndim
for a in self.shared_axes)
mask_shape = tuple(1 if a in shared_axes else s
for a, s in enumerate(mask_shape))
mask = self._srng.binomial(mask_shape, p=self.retain,
dtype=input.dtype)
mask = T.or_(mask, self.previous_mask)
if self.shared_axes:
bcast = tuple(bool(s == 1) for s in mask_shape)
mask = T.patternbroadcast(mask, bcast)
return mask
示例5: compute_cost_log_in_parallel
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends):
mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2))))
initial_state = T.log(T.zeros_like(labels))
initial_state = T.set_subtensor(initial_state[:,0], 0)
def select_probabilities(rnn_outputs, label):
return rnn_outputs[:,label]
rnn_outputs, _ = theano.map(select_probabilities, [original_rnn_outputs, labels])
rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2)))
def forward_step(probabilities, last_probabilities):
all_forward_probabilities = T.stack(
last_probabilities + probabilities,
log_shift_matrix(last_probabilities, 1) + probabilities,
log_shift_matrix(last_probabilities, 2) + probabilities + mask,
)
result = func(all_forward_probabilities, 0)
return result
forward_probabilities, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = initial_state)
forward_probabilities = forward_probabilities.dimshuffle((1,0,2))
def compute_cost(forward_probabilities, x_end, y_end):
return -func(forward_probabilities[x_end-1,y_end-2:y_end])
return theano.map(compute_cost, [forward_probabilities, x_ends, y_ends])[0]
示例6: truncated_normal
def truncated_normal(size, avg, std, lbound, ubound, theano_rng, dtype):
def phi(x):
erfarg = (x - avg) / (std * SQRT2)
rval = 0.5 * (1. + T.erf(erfarg))
return rval.astype(dtype)
def phi_inv(phi_x):
erfinv_input = T.clip(2. * phi_x - 1., -1.+1e-6, 1.-1e-6)
rval = avg + std * SQRT2 * T.erfinv(erfinv_input)
return rval.astype(dtype)
# center lower and upper bounds based on mean
u = theano_rng.uniform(size=size, dtype=dtype)
cdf_range = phi(ubound) - phi(lbound)
sample = phi_inv(phi(lbound) + u * cdf_range)
# if avg >> ubound, return ubound
# if avg << lbound, return lbound
# else return phi(lbound) + u * [phi(ubound) - phi(lbound)]
rval = T.switch(
T.or_(sample < lbound, sample > ubound),
T.switch(avg >= ubound, ubound, lbound),
sample)
return rval
示例7: adamgc_
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.0)
i = shared(floatX(0.0))
i_t = i + 1.0
fix1 = 1.0 - (1.0 - b1) ** i_t
fix2 = 1.0 - (1.0 - b2) ** i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.0)
v = shared(p.get_value() * 0.0)
m_t = (b1 * g) + ((1.0 - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1.0 - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
# e_t = shared(p.get_value() * 0.)
# de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05 #*p_t
# p_t = p_t + de_t
# updates.append((e_t, e_t + de_t))
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
示例8: theano_metrics
def theano_metrics(y_pred, y_true, n_classes, void_labels):
"""
Returns the intersection I and union U (to compute the jaccard I/U) and the accuracy.
:param y_pred: tensor of predictions. shape (b*0*1, c) with c = n_classes
:param y_true: groundtruth, shape (b,0,1) or (b,c,0,1) with c=1
:param n_classes: int
:param void_labels: list of indexes of void labels
:return: return tensors I and U of size (n_classes), and scalar acc
"""
# Put y_pred and y_true under the same shape
y_true = T.flatten(y_true)
y_pred = T.argmax(y_pred, axis=1)
# We use not_void in case the prediction falls in the void class of the groundtruth
for i in range(len(void_labels)):
if i == 0:
not_void = T.neq(y_true, void_labels[i])
else:
not_void = not_void * T.neq(y_true, void_labels[i])
I = T.zeros(n_classes)
U = T.zeros(n_classes)
for i in range(n_classes):
y_true_i = T.eq(y_true, i)
y_pred_i = T.eq(y_pred, i)
I = T.set_subtensor(I[i], T.sum(y_true_i * y_pred_i))
U = T.set_subtensor(U[i], T.sum(T.or_(y_true_i, y_pred_i) * not_void))
accuracy = T.sum(I) / T.sum(not_void)
return I, U, accuracy
示例9: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
"""
for p, g in grads.items():
grads[p] = g / self.batch_size
g_norm = 0.
for g in grads.values():
g_norm += (g**2).sum()
"""
g_norm = 0.
for p, g in grads.items():
g /= self.batch_size
grads[p] = g
g_norm += (g**2).sum()
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
mainloop.grads = grads
示例10: mcmc
def mcmc(ll, *frvs):
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)]))
loglik = -full_log_likelihood(full_observations)
proposals = free_RVs_prop
H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik
# -- this should be an inner loop
g = []
g.append(tensor.grad(loglik, frvs))
proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)]
rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)]
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)]))
new_loglik = -full_log_likelihood(full_observations)
gnew = []
gnew.append(tensor.grad(new_loglik, rvsp))
proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)]
# --
Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik
dH = Hnew - H
accept = tensor.or_(dH < 0., U < tensor.exp(-dH))
return [tensor.switch(accept, -new_loglik, ll)] + \
[tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \
{}, theano.scan_module.until(accept)
示例11: graves_rmsprop_updates
def graves_rmsprop_updates(self, params, grads, learning_rate=1e-4, alpha=0.9, epsilon=1e-4, chi=0.95):
"""
Alex Graves' RMSProp [1]_.
.. math ::
n_{i} &= \chi * n_i-1 + (1 - \chi) * grad^{2}\\
g_{i} &= \chi * g_i-1 + (1 - \chi) * grad\\
\Delta_{i} &= \alpha * Delta_{i-1} - learning_rate * grad /
sqrt(n_{i} - g_{i}^{2} + \epsilon)\\
w_{i} &= w_{i-1} + \Delta_{i}
References
----------
.. [1] Graves, Alex.
"Generating Sequences With Recurrent Neural Networks", p.23
arXiv:1308.0850
"""
updates = []
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param, grad)
old_square = self.running_square_[n]
old_avg = self.running_avg_[n]
old_memory = self.memory_[n]
new_square = chi * old_square + (1. - chi) * grad ** 2
new_avg = chi * old_avg + (1. - chi) * grad
new_memory = alpha * old_memory - learning_rate * grad / T.sqrt(new_square - \
new_avg ** 2 + epsilon)
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((old_memory, new_memory))
updates.append((param, param + new_memory))
return updates
示例12: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
g_norm = 0.
for p, g in grads.items():
g /= T.cast(self.batch_size, dtype=theano.config.floatX)
grads[p] = g
g_norm += (g**2).sum()
if self.check_nan:
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
if self.check_nan:
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
else:
for p, g in grads.items():
grads[p] = g * scaler
mainloop.grads = grads
示例13: tnormal_icdf
def tnormal_icdf(size, avg, std, lbound, ubound, theano_rng, dtype):
"""
Alternative Method:
sample = -Phi_inv(Phi(-lbound)*(1-u) + Phi(-ubound)*u)
"""
def Phi(x):
erfarg = (x - avg) / (std * SQRT2)
rval = 0.5 * (1. + T.erf(erfarg))
return rval.astype(dtype)
def Phi_inv(y, eps=3e-8):
""" eps was calibrated for cublas.erfinv using float32 """
temp = 2. * y - 1.
erfinv_input = T.clip(temp, -1+eps, 1-eps)
rval = avg + std * SQRT2 * T.erfinv(erfinv_input)
return rval.astype(dtype)
# center lower and upper bounds based on mean
u = theano_rng.uniform(size=size, dtype=dtype)
# Inverse CDF method. When method becomes numerically unstable, we simply
# return the bounds based on whether avg < lbound, or ubound < avg.
cdf_range = Phi(ubound) - Phi(lbound)
sample = T.switch(
T.or_(
T.lt(cdf_range, 3e-8),
T.gt(cdf_range, 1-3e-8)),
T.switch(
T.lt(avg, lbound),
lbound,
ubound),
Phi_inv(Phi(lbound) + u * cdf_range))
return sample
示例14: minimize
def minimize(self, loss, momentum, rescale):
super(RMSPropOptimizer, self).minimize(loss)
grads = self.gradparams
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
params = self.params
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - self.lr * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * self.lr * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
示例15: adamgc
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm