本文整理汇总了Python中theano.tensor.inc_subtensor函数的典型用法代码示例。如果您正苦于以下问题:Python inc_subtensor函数的具体用法?Python inc_subtensor怎么用?Python inc_subtensor使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了inc_subtensor函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: adadelta
def adadelta(self, param, grad, updates, sample_idx = None, epsilon = 1e-6):
v1 = np.float32(self.adapt_params[0])
v2 = np.float32(1.0 - self.adapt_params[0])
acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
upd = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
if sample_idx is None:
acc_new = v1 * acc + v2 * (grad**2)
updates[acc] = acc_new
grad_scaling = (upd + epsilon) / (acc_new + epsilon)
upd_new = v1 * upd + v2 * grad_scaling * (grad**2)
updates[upd] = upd_new
else:
acc_s = acc[sample_idx]
# acc_new = v1 * acc_s + v2 * (grad**2) #Faster, but inaccurate when an index occurs multiple times
# updates[acc] = T.set_subtensor(acc_s, acc_new) #Faster, but inaccurate when an index occurs multiple times
updates[acc] = T.inc_subtensor(T.set_subtensor(acc_s, acc_s * v1)[sample_idx], v2 * (grad**2)) #Slower, but accurate when an index occurs multiple times
acc_new = updates[acc][sample_idx] #Slower, but accurate when an index occurs multiple times
upd_s = upd[sample_idx]
grad_scaling = (upd_s + epsilon) / (acc_new + epsilon)
# updates[upd] = T.set_subtensor(upd_s, v1 * upd_s + v2 * grad_scaling * (grad**2)) #Faster, but inaccurate when an index occurs multiple times
updates[upd] = T.inc_subtensor(T.set_subtensor(upd_s, upd_s * v1)[sample_idx], v2 * grad_scaling * (grad**2)) #Slower, but accurate when an index occurs multiple times
gradient_scaling = T.cast(T.sqrt(grad_scaling), theano.config.floatX)
if self.learning_rate != 1.0:
print('Warn: learning_rate is not 1.0 while using adadelta. Setting learning_rate to 1.0')
self.learning_rate = 1.0
return grad * gradient_scaling #Ok, checked
示例2: log_likelihood
def log_likelihood(self):
Users = self.U[:, :-1]
Middle = self.S
Items = self.V[:-1, :]
UserBiases = self.U[:, -1].reshape((-1, 1))
ItemBiases = self.V[-1, :].reshape((-1, 1))
A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :])
A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1]))
A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1]))
B = A * self.counts
loglik = T.sum(B)
A = T.exp(A)
A += 1
A = T.log(A)
A = (self.counts + 1) * A
loglik -= T.sum(A)
# L2 regularization
loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1]))
# Return negation of LogLikelihood cause we will minimize cost
return -loglik
示例3: fac_vis
def fac_vis(self, x_phid, x_shid):
# calculate probability of visible units
# fac_vis[view][node, sample, statistic]
facv_vis = [T.zeros((self.n_vis_nodes[view],
self.n_samples,
self.vis[view].n_statistics),
dtype=theano.config.floatX)
for view in range(self.n_views)]
fv_shid = self.shid.f(x_shid)
for view in range(self.n_views):
fv_phid = self.phid[view].f(x_phid[view])
for statistic in range(self.vis[view].n_statistics):
facv_vis[view] = T.set_subtensor(facv_vis[view][:, :, statistic],
self.bias_vis[view][:, statistic].dimshuffle(0, 'x'))
if self.vis[view].fixed_bias[statistic]:
facv_vis[view] = T.set_subtensor(facv_vis[view][:, :, statistic],
self.vis[view].fixed_bias_value[statistic])
for from_statistic in range(self.phid[view].n_statistics):
facv_vis[view] = T.inc_subtensor(facv_vis[view][:, :, statistic],
T.dot(self.weights_priv[view][:, statistic, :, from_statistic].T,
fv_phid[:, :, from_statistic]))
for from_statistic in range(self.shid.n_statistics):
facv_vis[view] = T.inc_subtensor(facv_vis[view][:, :, statistic],
T.dot(self.weights_shrd[view][:, statistic, :, from_statistic].T,
fv_shid[:, :, from_statistic]))
return facv_vis
示例4: update_log_p
def update_log_p(skip_idxs,zeros,active,log_p_curr,log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
),
log_p_curr.shape[0]
), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(
_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
示例5: power_pool_2d
def power_pool_2d(x, ds, p=3, b=0):
n_batch, n_ch, s0, s1 = x.shape
d0, d1 = ds
c = tt.ones((s0, s1))
# sum elements in regions
y = tt.abs_(x[:, :, 0::d0, 0::d1])**p
d = c[0::d0, 0::d1].copy()
for i in range(0, d0):
for j in range(0, d1):
if i != 0 or j != 0:
ni = (s0 - i - 1) / d0 + 1
nj = (s1 - j - 1) / d1 + 1
xij = tt.abs_(x[:, :, i::d0, j::d1])**p
y = tt.inc_subtensor(y[:, :, :ni, :nj], xij)
d = tt.inc_subtensor(d[:ni, :nj], c[i::d0, j::d1])
# divide by number of elements
y /= d
y += b**p
# take root
y = y**(1. / p)
return y
示例6: recurrence
def recurrence(log_p_curr, log_p_prev, skip_mask=None):
if skip_mask is None:
skip_mask = T.ones_like(log_p_curr[:, 1:-2:2])
# normalise and bring back to p space
k = T.max(log_p_prev, axis=1, keepdims=True)
norm_p_prev = T.switch(
T.isinf(log_p_prev), 0, T.exp(log_p_prev - k)) # set -inf to 0
# previous
_result = norm_p_prev
# add shift of previous
_result = T.inc_subtensor(_result[:, 1:], norm_p_prev[:, :-1])
# add skips of previous
_result = T.inc_subtensor(_result[:, 3::2],
T.switch(skip_mask,norm_p_prev[:, 1:-2:2],0))
# current
# log(p) should be 0 for first 2 terms
result = T.switch(
T.eq(_result, 0),
-np.inf,
log_p_curr + T.log(_result) + k
)
return result
示例7: __init__
def __init__(self, vocab_size, dim, lr=0.5):
W = np.asarray(np.random.rand(vocab_size, dim),
dtype=theano.config.floatX) / float(dim)
W1 = np.asarray((np.random.rand(vocab_size, dim)),
dtype=theano.config.floatX) / float(dim)
self.W = theano.shared(W, name='W', borrow=True)
self.W1 = theano.shared(W1, name='W1', borrow=True)
gW = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX)
gW1 = np.asarray(
np.ones((vocab_size, dim)), dtype=theano.config.floatX)
self.gW = theano.shared(gW, name='gW', borrow=True)
self.gW1 = theano.shared(gW1, name='gW1', borrow=True)
X = T.vector()
fX = T.vector()
ind_W = T.ivector()
ind_W1 = T.ivector()
w = self.W[ind_W, :]
w1 = self.W1[ind_W1, :]
cost = T.sum(fX * ((T.sum(w * w1, axis=1) - X) ** 2))
grad = T.clip(T.grad(cost, [w, w1]), -5.0, 5.0)
updates1 = [(self.gW, T.inc_subtensor(self.gW[ind_W, :],
grad[0] ** 2))]
updates2 = [(self.gW1, T.inc_subtensor(self.gW1[ind_W1, :],
grad[1] ** 2))]
updates3 = [(self.W, T.inc_subtensor(self.W[ind_W, :],
- (lr / T.sqrt(self.gW[ind_W, :])) *
grad[0]))]
updates4 = [(self.W1, T.inc_subtensor(self.W1[ind_W1, :],
- (lr / T.sqrt(self.gW1[ind_W1, :])) *
grad[1]))]
updates = updates1 + updates2 + updates3 + updates4
self.cost_fn = theano.function(
inputs=[ind_W, ind_W1, X, fX], outputs=cost, updates=updates)
示例8: logp
def logp(self, x):
n = self.n
eta = self.eta
diag_idxs = self.diag_idxs
cumsum = tt.cumsum(x ** 2)
variance = tt.zeros(n)
variance = tt.inc_subtensor(variance[0], x[0] ** 2)
variance = tt.inc_subtensor(
variance[1:],
cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]])
sd_vals = tt.sqrt(variance)
logp_sd = self.sd_dist.logp(sd_vals).sum()
corr_diag = x[diag_idxs] / sd_vals
logp_lkj = (2 * eta - 3 + n - tt.arange(n)) * tt.log(corr_diag)
logp_lkj = tt.sum(logp_lkj)
# Compute the log det jacobian of the second transformation
# described in the docstring.
idx = tt.arange(n)
det_invjac = tt.log(corr_diag) - idx * tt.log(sd_vals)
det_invjac = det_invjac.sum()
norm = _lkj_normalizing_constant(eta, n)
return norm + logp_lkj + logp_sd + det_invjac
示例9: adam
def adam(self, param, grad, updates, sample_idx = None, epsilon = 1e-6):
v1 = np.float32(self.adapt_params[0])
v2 = np.float32(1.0 - self.adapt_params[0])
v3 = np.float32(self.adapt_params[1])
v4 = np.float32(1.0 - self.adapt_params[1])
acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
meang = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
countt = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
if sample_idx is None:
acc_new = v3 * acc + v4 * (grad**2)
meang_new = v1 * meang + v2 * grad
countt_new = countt + 1
updates[acc] = acc_new
updates[meang] = meang_new
updates[countt] = countt_new
else:
acc_s = acc[sample_idx]
meang_s = meang[sample_idx]
countt_s = countt[sample_idx]
# acc_new = v3 * acc_s + v4 * (grad**2) #Faster, but inaccurate when an index occurs multiple times
# updates[acc] = T.set_subtensor(acc_s, acc_new) #Faster, but inaccurate when an index occurs multiple times
updates[acc] = T.inc_subtensor(T.set_subtensor(acc_s, acc_s * v3)[sample_idx], v4 * (grad**2)) #Slower, but accurate when an index occurs multiple times
acc_new = updates[acc][sample_idx] #Slower, but accurate when an index occurs multiple times
# meang_new = v1 * meang_s + v2 * grad
# updates[meang] = T.set_subtensor(meang_s, meang_new) #Faster, but inaccurate when an index occurs multiple times
updates[meang] = T.inc_subtensor(T.set_subtensor(meang_s, meang_s * v1)[sample_idx], v2 * (grad**2)) #Slower, but accurate when an index occurs multiple times
meang_new = updates[meang][sample_idx] #Slower, but accurate when an index occurs multiple times
countt_new = countt_s + 1.0
updates[countt] = T.set_subtensor(countt_s, countt_new)
return (meang_new / (1 - v1**countt_new)) / (T.sqrt(acc_new / (1 - v1**countt_new)) + epsilon)
示例10: scan
def scan(self, x, z, non_sequences, i, outputs_info, W_re, W_in, b, go_backwards = False, truncate_gradient = -1):
W_re_b = self.parent.add_param(
self.parent.create_recurrent_weights(self.n_units, self.n_re, name="W_re_b_%s" % self.parent.name))
z_f = z[:,:,:z.shape[2]/2]
z_b = z[::-1,:,z.shape[2]/2:]
z_f = T.inc_subtensor(z_f[0], T.dot(outputs_info[0], W_re))
z_b = T.inc_subtensor(z_b[0], T.dot(outputs_info[0], W_re_b))
result = BLSTMOpInstance(z_f,z_b, W_re, W_re_b, outputs_info[1], T.zeros_like(outputs_info[1]), i, i[::-1])
return [ T.concatenate([result[0],result[1][::-1]],axis=2), T.concatenate([result[4],result[5][::-1]],axis=1).dimshuffle('x',0,1) ]
示例11: gs_recurrence
def gs_recurrence(p_curr, p_prev):
# add previous
_result = p_prev
# add shift of previous
_result = T.inc_subtensor(_result[1:], p_prev[:-1])
# add skips of previous
_result = T.inc_subtensor(_result[3::2], p_prev[1:-2:2])
# current
_result = _result * p_curr
return _result
示例12: add_synap_post_inp
def add_synap_post_inp(i,po,p,s,q):
# i:: sequence
# po:: post
# p:: pre
# s:: dA
# q:: W
index = T.nonzero(q[:self.Ne,i])
npo = T.inc_subtensor(po[index,i],s)
nw = T.inc_subtensor(q[:,i],p[:,i])
nw = T.clip(nw,0,self.wmax)
return {po:npo,q:nw}
示例13: past_weight_grad_step
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
"""
Do an efficient update of the weights given the two spike-update.
(This still runs FING SLOWLY!)
:param xs: An (n_in) vector
:param es: An (n_out) vector
:param kp_x:
:param kd_x:
:param kp_e:
:param kd_e:
:param shapes: (n_in, n_out)
:return:
"""
kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
n_in, n_out = shape
rx = kd_x/(kp_x+kd_x)
re = kd_e/(kp_e+kd_e)
tx_last = create_shared_variable(np.zeros(n_in)+1)
te_last = create_shared_variable(np.zeros(n_out)+1)
x_last = create_shared_variable(np.zeros(n_in))
e_last = create_shared_variable(np.zeros(n_out))
x_spikes = tt.neq(xs, 0)
e_spikes = tt.neq(es, 0)
x_spike_ixs, = tt.nonzero(x_spikes)
e_spike_ixs, = tt.nonzero(e_spikes)
if dws is None:
dws = tt.zeros(shape)
t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last) # (n_x_spikes, n_out)
dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
* rx**(tx_last[x_spike_ixs, None]-t_last)
* re**(te_last[None, :]-t_last)
* geoseries_sum(re*rx, t_end=t_last, t_start=1)
)
new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
new_tx_last = tt.switch(x_spikes, 0, tx_last)
t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs]) # (n_in, n_e_spikes)
dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
* rx**(new_tx_last[:, None]-t_last)
* re**(te_last[None, e_spike_ixs]-t_last)
* geoseries_sum(re*rx, t_end=t_last, t_start=1)
)
add_update(x_last, new_x_last)
add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
add_update(tx_last, new_tx_last+1)
add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
return dws
示例14: RMSprop
def RMSprop(self, cost, params, full_params, sampled_params, sidxs, epsilon=1e-6):
grads = [T.grad(cost = cost, wrt = param) for param in params]
sgrads = [T.grad(cost = cost, wrt = sparam) for sparam in sampled_params]
updates = OrderedDict()
if self.grad_cap>0:
norm=T.cast(T.sqrt(T.sum([T.sum([T.sum(g**2) for g in g_list]) for g_list in grads]) + T.sum([T.sum(g**2) for g in sgrads])), theano.config.floatX)
grads = [[T.switch(T.ge(norm, self.grad_cap), g*self.grad_cap/norm, g) for g in g_list] for g_list in grads]
sgrads = [T.switch(T.ge(norm, self.grad_cap), g*self.grad_cap/norm, g) for g in sgrads]
for p_list, g_list in zip(params, grads):
for p, g in zip(p_list, g_list):
if self.adapt:
if self.adapt == 'adagrad':
g = self.adagrad(p, g, updates)
if self.adapt == 'rmsprop':
g = self.rmsprop(p, g, updates)
if self.adapt == 'adadelta':
g = self.adadelta(p, g, updates)
if self.adapt == 'adam':
g = self.adam(p, g, updates)
if self.momentum > 0:
velocity = theano.shared(p.get_value(borrow=False) * 0., borrow=True)
velocity2 = self.momentum * velocity - np.float32(self.learning_rate) * (g + self.lmbd * p)
updates[velocity] = velocity2
updates[p] = p + velocity2
else:
updates[p] = p * np.float32(1.0 - self.learning_rate * self.lmbd) - np.float32(self.learning_rate) * g
for i in range(len(sgrads)):
g = sgrads[i]
fullP = full_params[i]
sample_idx = sidxs[i]
sparam = sampled_params[i]
if self.adapt:
if self.adapt == 'adagrad':
g = self.adagrad(fullP, g, updates, sample_idx)
if self.adapt == 'rmsprop':
g = self.rmsprop(fullP, g, updates, sample_idx)
if self.adapt == 'adadelta':
g = self.adadelta(fullP, g, updates, sample_idx)
if self.adapt == 'adam':
g = self.adam(fullP, g, updates, sample_idx)
if self.lmbd > 0:
delta = np.float32(self.learning_rate) * (g + self.lmbd * sparam)
else:
delta = np.float32(self.learning_rate) * g
if self.momentum > 0:
velocity = theano.shared(fullP.get_value(borrow=False) * 0., borrow=True)
vs = velocity[sample_idx]
velocity2 = self.momentum * vs - delta
updates[velocity] = T.set_subtensor(vs, velocity2)
updates[fullP] = T.inc_subtensor(sparam, velocity2)
else:
updates[fullP] = T.inc_subtensor(sparam, - delta)
return updates
示例15: _pyramid_step
def _pyramid_step(self, x_h, x_zr, x_m, t, h_tm1):
'''
x_h/z/r: input at time t shape=[batch, hid] or [hid]
x_m: mask of x_t shape=[batch] or [1]
h_tm1: previous state shape=[batch, t+1 or n_steps, hid] or [t+1 or n_steps, hid]
'''
if self.with_begin_tag:
if x_h.ndim == 1 and h_tm1.ndim == 2:
h_tm1 = T.set_subtensor(h_tm1[t,:], self.struct_begin_tag)
elif x_h.ndim == 2 and h_tm1.ndim == 3:
h_tm1 = T.set_subtensor(h_tm1[:,t,:], self.struct_begin_tag[None,:])
else:
raise NotImplementedError
zr_t = T.dot(h_tm1, self.W_hzr)
can_h_t = T.dot(h_tm1, self.W_hh)
if x_h.ndim == 1 and h_tm1.ndim == 2:
xzr = x_zr[None,:]
xm = x_m[:,None]
zr_t = T.inc_subtensor(zr_t[:t+1], xzr)
elif x_h.ndim == 2 and h_tm1.ndim == 3:
xzr = x_zr[:,None,:]
xm = x_m[:,None,None]
zr_t = T.inc_subtensor(zr_t[:,:t+1], xzr)
else:
raise NotImplementedError
zr_t = T.nnet.sigmoid(zr_t)
z_t = _slice(zr_t, 0, self.n_hids)
r_t = _slice(zr_t, 1, self.n_hids)
can_h_t *= r_t
if x_h.ndim == 1 and h_tm1.ndim == 2:
xh = x_h[None,:]
can_h_t = T.inc_subtensor(can_h_t[:t+1], xh)
elif x_h.ndim == 2 and h_tm1.ndim == 3:
xh = x_h[:,None,:]
can_h_t = T.inc_subtensor(can_h_t[:,:t+1], xh)
else:
raise NotImplementedError
can_h_t = T.tanh(can_h_t)
h_t = z_t * h_tm1 + (1 - z_t) * can_h_t
h_t = xm * h_t + (1. - xm) * h_tm1
return h_t