本文整理汇总了Python中theano.tensor.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _calc_regularization_cost
def _calc_regularization_cost(self):
"""Calculate the regularization cost given the weight decay parameters.
Only the parameters will be considered that are stored in the set
self.regularize. We need to handle it manually in this class, because
the weight matrices contain bias columns, which should not be considered
in regularization computation. Therefore, do not!!! add W1 and W2 to
self.regularize
Returns
-------
theano variable
regularization cost depending on the parameters to be regularized
and the weight decay parameters for L1 and L2 regularization.
"""
cost = super(SLmNce, self)._calc_regularization_cost()
l1_cost = T.sum(T.abs_(self.W1[:, :-1]))
l1_cost += T.sum(T.abs_(self.W2[:, :-1]))
l2_cost = T.sum(T.sqr(self.W1[:, :-1]))
l2_cost += T.sum(T.sqr(self.W2[:, :-1]))
if self.l1_weight != 0:
cost += self.l1_weight * l1_cost
if self.l2_weight != 0:
cost += self.l2_weight * l2_cost
return cost
示例2: _build_conditional
def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
sigma2 = tt.square(sigma)
Kuu = cov_total(Xu)
Kuf = cov_total(Xu, X)
Luu = cholesky(stabilize(Kuu))
A = solve_lower(Luu, Kuf)
Qffd = tt.sum(A * A, 0)
if self.approx == "FITC":
Kffd = cov_total(X, diag=True)
Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
else: # VFE or DTC
Lamd = tt.ones_like(Qffd) * sigma2
A_l = A / Lamd
L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
r = y - mean_total(X)
r_l = r / Lamd
c = solve_lower(L_B, tt.dot(A, r_l))
Kus = self.cov_func(Xu, Xnew)
As = solve_lower(Luu, Kus)
mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
C = solve_lower(L_B, As)
if diag:
Kss = self.cov_func(Xnew, diag=True)
var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
if pred_noise:
var += sigma2
return mu, var
else:
cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
tt.dot(tt.transpose(C), C))
if pred_noise:
cov += sigma2 * tt.identity_like(cov)
return mu, stabilize(cov)
示例3: _compute_local_cn_acts
def _compute_local_cn_acts(self, input, W):
# Without Scan (Faster than scan, but still way too slow)
shuffledIn = input.dimshuffle(0,1,'x')
shuffledMasks = self.localmask.dimshuffle('x',0,1)
# cubeIn = T.repeat(shuffledIn,self.localmask.shape[1],2)
# cubeMasks = T.repeat(shuffledMasks,input.shape[0],0)
maskedIn = shuffledIn * shuffledMasks
maskedInMean = T.sum(maskedIn,axis=1,keepdims=True) / T.sum(shuffledMasks,axis=1,keepdims=True)
maskedInVar = T.sum(T.sqr((maskedIn-maskedInMean)*shuffledMasks),axis=1,keepdims=True)/T.sum(shuffledMasks,axis=1,keepdims=True)
maskedInSTD = T.sqrt(maskedInVar)
maskedInSubMean = maskedIn - maskedInMean
maskedCN = maskedInSubMean / maskedInSTD
# maskedCN = maskedInSubMean
shuffledInCN = maskedCN.dimshuffle(2,0,1)
allOuts = T.dot(shuffledInCN, W)
diagMask = T.eye(self.localmask.shape[1],self.localmask.shape[1]).dimshuffle(0,'x',1)
diagMaskAll = allOuts * diagMask
activation = T.sum(diagMaskAll,axis=0)
return activation
示例4: test_pickle_unpickle_without_reoptimization
def test_pickle_unpickle_without_reoptimization():
mode = theano.config.mode
if mode in ["DEBUG_MODE", "DebugMode"]:
mode = "FAST_RUN"
x1 = T.fmatrix('x1')
x2 = T.fmatrix('x2')
x3 = theano.shared(numpy.ones((10, 10), dtype=floatX))
x4 = theano.shared(numpy.ones((10, 10), dtype=floatX))
y = T.sum(T.sum(T.sum(x1**2 + x2) + x3) + x4)
updates = OrderedDict()
updates[x3] = x3 + 1
updates[x4] = x4 + 1
f = theano.function([x1, x2], y, updates=updates, mode=mode)
# now pickle the compiled theano fn
string_pkl = pickle.dumps(f, -1)
# compute f value
in1 = numpy.ones((10, 10), dtype=floatX)
in2 = numpy.ones((10, 10), dtype=floatX)
# test unpickle without optimization
default = theano.config.reoptimize_unpickled_function
try:
# the default is True
theano.config.reoptimize_unpickled_function = False
f_ = pickle.loads(string_pkl)
assert f(in1, in2) == f_(in1, in2)
finally:
theano.config.reoptimize_unpickled_function = default
示例5: orthogonal_penalty
def orthogonal_penalty(W, D, epsilon=1e-6, axis=1):
num = T.sqr(T.sum(W * D, axis=axis)) # n = (d^T w)^2
den = T.sum(T.sqr(W), axis=axis) * T.sum(T.sqr(D), axis=axis) # d = ||w||_2^2 * ||d||_2^2
cos = num / den # c = n / d
value = cos - (epsilon**2) # v = c - epsilon^2
hinge = value * (value > 0) # h = [ v ]_+
return T.sum(hinge)
示例6: __init__
def __init__(self, vocab_size, dim, lr=0.5):
W = np.asarray(np.random.rand(vocab_size, dim),
dtype=theano.config.floatX) / float(dim)
W1 = np.asarray((np.random.rand(vocab_size, dim)),
dtype=theano.config.floatX) / float(dim)
self.W = theano.shared(W, name='W', borrow=True)
self.W1 = theano.shared(W1, name='W1', borrow=True)
gW = np.asarray(np.ones((vocab_size, dim)), dtype=theano.config.floatX)
gW1 = np.asarray(
np.ones((vocab_size, dim)), dtype=theano.config.floatX)
self.gW = theano.shared(gW, name='gW', borrow=True)
self.gW1 = theano.shared(gW1, name='gW1', borrow=True)
X = T.vector()
fX = T.vector()
ind_W = T.ivector()
ind_W1 = T.ivector()
w = self.W[ind_W, :]
w1 = self.W1[ind_W1, :]
cost = T.sum(fX * ((T.sum(w * w1, axis=1) - X) ** 2))
grad = T.clip(T.grad(cost, [w, w1]), -5.0, 5.0)
updates1 = [(self.gW, T.inc_subtensor(self.gW[ind_W, :],
grad[0] ** 2))]
updates2 = [(self.gW1, T.inc_subtensor(self.gW1[ind_W1, :],
grad[1] ** 2))]
updates3 = [(self.W, T.inc_subtensor(self.W[ind_W, :],
- (lr / T.sqrt(self.gW[ind_W, :])) *
grad[0]))]
updates4 = [(self.W1, T.inc_subtensor(self.W1[ind_W1, :],
- (lr / T.sqrt(self.gW1[ind_W1, :])) *
grad[1]))]
updates = updates1 + updates2 + updates3 + updates4
self.cost_fn = theano.function(
inputs=[ind_W, ind_W1, X, fX], outputs=cost, updates=updates)
示例7: dev_loss
def dev_loss(self, dev_types, dev_lams, ss_ratio, y):
su_mask = ss_ratio * T.neq(y, 0).reshape((y.shape[0], 1))
un_mask = T.eq(y, 0).reshape((y.shape[0], 1))
ss_mask = su_mask + un_mask
var_fun = lambda x1, x2: T.sum(((x1 - x2) * ss_mask)**2.0) / T.sum(ss_mask)
tanh_fun = lambda x1, x2: var_fun(T.tanh(x1), T.tanh(x2))
norm_fun = lambda x1, x2: var_fun( \
(x1 / T.sqrt(T.sum(x1**2.0,axis=1,keepdims=1) + 1e-6)), \
(x2 / T.sqrt(T.sum(x2**2.0,axis=1,keepdims=1) + 1e-6)))
sigm_fun = lambda x1, x2: var_fun(T.nnet.sigmoid(x1), T.nnet.sigmoid(x2))
cent_fun = lambda xt, xo: T.sum(T.nnet.binary_crossentropy( \
T.nnet.sigmoid(xo), T.nnet.sigmoid(xt))) / xt.shape[0]
L = 0.0
for i in xrange(self.layer_count):
if (i < (self.layer_count - 1)):
x1 = self.layers[i].output
x2 = self.drop_nets[0][i].output
else:
x1 = self.layers[i].linear_output
x2 = self.drop_nets[0][i].linear_output
if (dev_types[i] == 1):
L = L + (dev_lams[i] * norm_fun(x1, x2))
elif (dev_types[i] == 2):
L = L + (dev_lams[i] * tanh_fun(x1, x2))
elif (dev_types[i] == 3):
L = L + (dev_lams[i] * sigm_fun(x1, x2))
elif (dev_types[i] == 4):
L = L + (dev_lams[i] * cent_fun(x1, x2))
else:
L = L + (dev_lams[i] * var_fun(x1, x2))
return L
示例8: build_objective
def build_objective(model, deterministic=False, epsilon=1e-12):
predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
targets = nn.layers.get_output(model.l_target)
enable_targets = nn.layers.get_output(model.l_enable_target)
sum_of_objectives = 0
unit_ptr = 0
for obj_idx, obj_name in enumerate(order_objectives):
ptype = property_type[obj_name]
if ptype == 'classification':
num_units = len(property_bin_borders[obj_name])
v_obj = cce(obj_idx, (unit_ptr, unit_ptr+num_units), predictions, targets, epsilon)
# take the mean of the objectives where it matters (enabled targets)
obj_scalar = T.sum(enable_targets[:,obj_idx] * v_obj) / (0.00001 + T.sum(enable_targets[:,obj_idx]))
unit_ptr = unit_ptr + num_units
elif ptype == 'continuous':
v_obj = sqe(obj_idx, unit_ptr, predictions, targets)
obj_scalar = T.mean(v_obj)
unit_ptr += 1
else:
raise
if deterministic:
d_objectives_deterministic[obj_name] = obj_scalar
else:
d_objectives[obj_name] = obj_scalar
sum_of_objectives += norm_weights_loss[obj_name] * obj_scalar
return sum_of_objectives
示例9: finetune_cost_updates
def finetune_cost_updates(self, center, mu, learning_rate):
""" This function computes the cost and the updates ."""
# note : we sum over the size of a datapoint; if we are using
# minibatches, L will be a vector, withd one entry per
# example in minibatch
network_output = self.get_output()
temp = T.pow(center - network_output, 2)
L = T.sum(temp, axis=1)
# Add the network reconstruction error
z = self.get_network_reconst()
reconst_err = T.sum(T.pow(self.x - z, 2), axis = 1)
L = self.beta*L + self.lbd*reconst_err
cost1 = T.mean(L)
cost2 = self.lbd*T.mean(reconst_err)
cost3 = cost1 - cost2
# compute the gradients of the cost of the `dA` with respect
# to its parameters
gparams = T.grad(cost1, self.params)
# generate the list of updates
updates = []
grad_values = []
param_norm = []
for param, delta, gparam in zip(self.params, self.delta, gparams):
updates.append( (delta, mu*delta - learning_rate * gparam) )
updates.append( (param, param + mu*mu*delta - (1+mu)*learning_rate*gparam ))
grad_values.append(gparam.norm(L=2))
param_norm.append(param.norm(L=2))
grad_ = T.stack(*grad_values)
param_ = T.stack(*param_norm)
return ((cost1, cost2, cost3, grad_, param_), updates)
示例10: applyConstraint
def applyConstraint(self, param):
if param.ndim != 4 and param.ndim != 2:
warnings.warn("Norm constraints are normally applied to matrices"
+" or 4-dimensional tensors, but currently got "
+"%d dimensions, please make sure this is the desired"
+" parameter to apply norm constraints" % param.ndim)
needFlip = False
if param.ndim == 4: # a hack for conv layer filters
prevShape = param.shape
# conv layer filter shape is (nChannelOut, nChannelIn, r, c)
param = param.flatten(2)
# now it is (nout, nin), which is different from (nin, nout)
# from fulling connected networks, so need to flip here
needFlip = True
if needFlip:
col_norm = T.sqrt(T.sum(T.sqr(param), axis=1, keepdims=True))
else:
col_norm = T.sqrt(T.sum(T.sqr(param), axis=0, keepdims=True))
param /= (col_norm+1e-7)
param *= self.norm
if needFlip:
param = param.reshape(prevShape)
return param
示例11: sequence_log_likelihood
def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol, log_scale=True):
"""
Based on code from Shawn Tan.
Credits to Kyle Kastner as well.
This function computes the CTC log likelihood for a sequence that has
been augmented with blank labels.
"""
y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype="int32")
y_mask_len = tensor.sum(y_mask, axis=0, dtype="int32")
if log_scale:
log_probabs = _log_path_probabs(y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
batch_size = log_probabs.shape[1]
# Add the probabilities of the final time steps to get the total
# sequence likelihood.
log_labels_probab = _log_add(
log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1],
log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2],
)
else:
probabilities = _path_probabs(y, y_hat, y_mask, y_hat_mask, blank_symbol)
batch_size = probabilities.shape[1]
labels_probab = (
probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1]
+ probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2]
)
log_labels_probab = tensor.log(labels_probab)
return log_labels_probab
示例12: ThangAttentionUnit
def ThangAttentionUnit(attention_state_prev, current_stack_top, premise_stack_tops, projected_stack_tops, attention_dim,
vs, name="attention_unit", initializer=None):
"""
Args:
attention_state_prev: The output of this unit at the previous time step.
current_stack_top: The current stack top (h state only, if applicable).
premise_stack_tops: The values to do attention over.
projected_stack_tops: Projected vectors to use to produce an attentive
weighting alpha_t.
attention_dim: The dimension of the vectors over which to do attention.
vs: A variable store for the learned parameters.
name: An identifier for the learned parameters in this unit.
initializer: Used to initialize the learned parameters.
Dimension notation:
B : Batch size
k : Model dim
L : num_transitions
"""
# Shape: B x L
score = T.sum(projected_stack_tops * current_stack_top, axis=2).T
alpha_t = T.nnet.softmax(score)
# Shape B x k
Y__alpha_t = T.sum(premise_stack_tops * alpha_t.T[:, :, np.newaxis], axis=0)
mlstm_input = T.concatenate([Y__alpha_t, current_stack_top], axis=1)
r_t = LSTMLayer(attention_state_prev, mlstm_input, 2 * attention_dim, 2 * attention_dim, vs, name="%s/lstm" % name)
return r_t
示例13: getRpRnTpTnForTrain0OrVal1
def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1):
# The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
# Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
# param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z]
yPredToUse = self.y_pred_train if training0OrValidation1 == 0 else self.y_pred_val
checkDimsOfYpredAndYEqual(y, yPredToUse, "training" if training0OrValidation1 == 0 else "validation")
returnedListWithNumberOfRpRnTpTnForEachClass = []
for class_i in xrange(0, self._numberOfOutputClasses) :
#Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
tensorOneAtRealPos = T.eq(y, class_i)
tensorOneAtRealNeg = T.neq(y, class_i)
tensorOneAtPredictedPos = T.eq(yPredToUse, class_i)
tensorOneAtPredictedNeg = T.neq(yPredToUse, class_i)
tensorOneAtTruePos = T.and_(tensorOneAtRealPos,tensorOneAtPredictedPos)
tensorOneAtTrueNeg = T.and_(tensorOneAtRealNeg,tensorOneAtPredictedNeg)
returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealPos) )
returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtRealNeg) )
returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTruePos) )
returnedListWithNumberOfRpRnTpTnForEachClass.append( T.sum(tensorOneAtTrueNeg) )
return returnedListWithNumberOfRpRnTpTnForEachClass
示例14: get_cost_updates
def get_cost_updates(self, contraction_level, learning_rate):
""" This function computes the cost and the updates for one trainng
step of the cA """
y = self.get_hidden_values(self.x)
z = self.get_reconstructed_input(y)
J = self.get_jacobian(y, self.W)
# note : we sum over the size of a datapoint; if we are using
# minibatches, L will be a vector, with one entry per
# example in minibatch
self.L_rec = - T.sum(self.x * T.log(z) +
(1 - self.x) * T.log(1 - z),
axis=1)
# Compute the jacobian and average over the number of samples/minibatch
self.L_jacob = T.sum(J ** 2) // self.n_batchsize
# note : L is now a vector, where each element is the
# cross-entropy cost of the reconstruction of the
# corresponding example of the minibatch. We need to
# compute the average of all these to get the cost of
# the minibatch
cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)
# compute the gradients of the cost of the `cA` with respect
# to its parameters
gparams = T.grad(cost, self.params)
# generate the list of updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - learning_rate * gparam))
return (cost, updates)
示例15: KLD_X
def KLD_X(self,m,S):
N = m.shape[0]
Q = m.shape[1]
KL_X = T.sum(m*m)+T.sum(S-T.log(S)) - Q*N
return 0.5*KL_X