本文整理汇总了Python中theano.tensor.basic.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sum函数的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: local_abstract_batch_norm_train_grad
def local_abstract_batch_norm_train_grad(node):
if not isinstance(node.op, AbstractBatchNormTrainGrad):
return None
x, dy, scale, x_mean, x_invstd, epsilon = node.inputs
axes = node.op.axes
if min(axes) < 0 or max(axes) > x.ndim:
return None
if not isinstance(x.type, TensorType) or \
not isinstance(dy.type, TensorType) or \
not isinstance(scale.type, TensorType) or \
not isinstance(x_mean.type, TensorType) or \
not isinstance(x_invstd.type, TensorType) or \
not isinstance(epsilon.type, TensorType):
return None
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=axes, keepdims=True)
c = (dy * x_invstd) - x_diff * (mean_dy_x_diff * (x_invstd ** 3))
g_wrt_inputs = scale * (c - T.mean(c, axis=axes, keepdims=True))
g_wrt_scale = T.sum(dy * x_invstd * x_diff, axis=axes, keepdims=True)
g_wrt_bias = T.sum(dy, axis=axes, keepdims=True)
results = [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
results = [T.patternbroadcast(r, r_orig.broadcastable)
for (r, r_orig) in zip(results, node.outputs)]
for var in theano.gof.graph.variables(node.inputs, results):
if var not in node.inputs:
copy_stack_trace(node.outputs[0], var)
return results
示例2: grad
def grad(self, inp, grads):
x, dy, scale, x_mean, x_invstd, epsilon = inp
ddinputs, ddscale, ddbias = grads
x_diff = x - x_mean
mean_dy_x_diff = T.mean(dy * x_diff, axis=self.axes, keepdims=True)
# compute gradients given each of the output gradients
g_wrt_x = 0
g_wrt_dy = 0
g_wrt_scale = 0
g_wrt_x_mean = 0
g_wrt_x_invstd = 0
if not isinstance(ddinputs.type, theano.gradient.DisconnectedType):
ccc = scale * (ddinputs - T.mean(ddinputs, axis=self.axes, keepdims=True))
ddd = (x_invstd ** 3) * (ccc * T.mean(dy * x_diff, axis=self.axes, keepdims=True) +
dy * T.mean(ccc * x_diff, axis=self.axes, keepdims=True))
g_wrt_x = g_wrt_x - ddd
g_wrt_dy = g_wrt_dy + ((ccc * x_invstd) -
((x_invstd ** 3) * x_diff *
T.mean(ccc * x_diff, axis=self.axes, keepdims=True)))
eee = (dy * x_invstd) - ((x_invstd ** 3) * x_diff * mean_dy_x_diff)
g_wrt_scale = g_wrt_scale + T.sum(ddinputs * (eee - T.mean(eee, axis=self.axes, keepdims=True)),
axis=self.axes, keepdims=True)
g_wrt_x_mean = g_wrt_x_mean + T.sum(ddd, axis=self.axes, keepdims=True)
g_wrt_x_invstd = g_wrt_x_invstd + T.sum(ccc * (dy - 3 * (x_invstd ** 2) * x_diff * mean_dy_x_diff),
axis=self.axes, keepdims=True)
if not isinstance(ddscale.type, theano.gradient.DisconnectedType):
g_wrt_x = g_wrt_x + (x_invstd * ddscale * dy)
g_wrt_dy = g_wrt_dy + (x_invstd * ddscale * x_diff)
g_wrt_x_mean = g_wrt_x_mean - (x_invstd * ddscale * T.sum(dy, axis=self.axes, keepdims=True))
g_wrt_x_invstd = g_wrt_x_invstd + (ddscale * T.sum(dy * x_diff, axis=self.axes, keepdims=True))
if not isinstance(ddbias.type, theano.gradient.DisconnectedType):
g_wrt_dy = g_wrt_dy + T.fill(dy, ddbias)
# depending on which output gradients are given,
# some inputs should be disconnected
results = [g_wrt_x, g_wrt_dy, g_wrt_scale, g_wrt_x_mean, g_wrt_x_invstd,
theano.gradient.DisconnectedType()()]
return [theano.gradient.DisconnectedType()() if r is 0 else r
for r in results]
示例3: norm
def norm(x,ord):
x = as_tensor_variable(x)
ndim = x.ndim
if ndim == 0:
raise ValueError("'axis' entry is out of bounds.")
elif ndim == 1:
if ord is None:
return tensor.sum(x**2)**0.5
elif ord == 'inf':
return tensor.max(abs(x))
elif ord == '-inf':
return tensor.min(abs(x))
elif ord == 0:
return x[x.nonzero()].shape[0]
else:
try:
z = tensor.sum(abs(x**ord))**(1./ord)
except TypeError:
raise ValueError("Invalid norm order for vectors.")
return z
elif ndim == 2:
if ord is None or ord == 'fro':
return tensor.sum(abs(x**2))**(0.5)
elif ord == 'inf':
return tensor.max(tensor.sum(abs(x), 1))
elif ord == '-inf':
return tensor.min(tensor.sum(abs(x), 1))
elif ord == 1:
return tensor.max(tensor.sum(abs(x), 0))
elif ord == -1:
return tensor.min(tensor.sum(abs(x),0))
else:
raise ValueError(0)
elif ndim > 2:
raise NotImplementedError("We don't support norm witn ndim > 2")
示例4: grad
def grad(self, inputs, output_gradients):
V, W, b, d = inputs
dCdH, = output_gradients
# make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
# print dCdH.broadcastable
# print "dCdH.broadcastable"
# quit(-1)
# dCdH = printing.Print("dCdH = ",["shape"])
# Make sure the broadcasting pattern of the gradient is the the same
# as the initial variable
dCdV = theano.tensor.nnet.convTransp3D(
W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
dCdV = T.patternbroadcast(dCdV, V.broadcastable)
WShape = W.shape
dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
dCdW = T.patternbroadcast(dCdW, W.broadcastable)
dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
dCdb = T.patternbroadcast(dCdb, b.broadcastable)
dCdd = grad_undefined(
self, 3, inputs[3],
"The gradient of Conv3D with respect to the convolution"
" stride is undefined because Conv3D is only defined for"
" integer strides.")
if 'name' in dir(dCdH) and dCdH.name is not None:
dCdH_name = dCdH.name
else:
dCdH_name = 'anon_dCdH'
if 'name' in dir(V) and V.name is not None:
V_name = V.name
else:
V_name = 'anon_V'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon_W'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon_b'
dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ')')
dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
',W=' + W_name + ',b=' + b_name + ')')
return [dCdV, dCdW, dCdb, dCdd]
示例5: functions
def functions(self, sequence_length):
key = (sequence_length)
if key not in self.cache:
logging.info("Need to construct graph for sequence_length=%d..." % (sequence_length))
# creating network input variable nodes
correct_inputs = t.ftensor3("correct input")
noise_inputs = t.ftensor3("noise input")
learning_rate = t.fscalar("learning rate")
# creating op nodes for firing the network
correct_score, correct_prehidden = self.score(correct_inputs)
noise_score, noise_prehidden = self.score(noise_inputs)
# creating op nodes for the pairwise ranking cost function
loss = t.clip(1 - correct_score + noise_score, 0, 1e999)
total_loss = t.sum(loss)
# the necessary cost function gradients
parameters_gradient = grad(total_loss, list(self.parameters))
correct_inputs_gradient = grad(total_loss, correct_inputs)
noise_inputs_gradient = grad(total_loss, noise_inputs)
# setting network inputs
predict_inputs = [correct_inputs]
train_inputs = [correct_inputs, noise_inputs, learning_rate]
verbose_predict_inputs = predict_inputs
# setting network outputs
predict_outputs = [correct_score]
train_outputs = [correct_inputs_gradient, noise_inputs_gradient, loss, correct_score, noise_score]
verbose_predict_outputs = [correct_score, correct_prehidden]
nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
logging.info("About to compile prediction function over %d ops [nodes]..." % nnodes)
predict = theano.function(predict_inputs, predict_outputs, mode=COMPILE_MODE)
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
nnodes = len(theano.gof.graph.ops(verbose_predict_inputs, verbose_predict_outputs))
logging.info("About to compile verbose prediction function over %d ops [nodes]..." % nnodes)
verbose_predict = theano.function(verbose_predict_inputs, verbose_predict_outputs, mode=COMPILE_MODE)
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
logging.info("About to compile training function over %d ops [nodes]..." % nnodes)
train = theano.function(train_inputs, train_outputs, mode=COMPILE_MODE, updates=[(p, p - learning_rate * gp) for p, gp in zip(list(self.parameters), parameters_gradient)])
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
self.cache[key] = (predict, train, verbose_predict)
return self.cache[key]
示例6: grad
def grad(self,inputs, output_gradients):
V,W,b,d = inputs
dCdH ,= output_gradients
#make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
#print dCdH.broadcastable
#print "dCdH.broadcastable"
#quit(-1)
#dCdH = printing.Print("dCdH = ",["shape"])
# Make sure the broadcasting pattern of the gradient is the the same
# as the initial variable
dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0,0,0,0,:]), d, dCdH, V.shape[1:4])
dCdV = T.patternbroadcast(dCdV, V.broadcastable)
WShape = W.shape
dCdW = ConvGrad3D.convGrad3D(V,d,WShape,dCdH)
dCdW = T.patternbroadcast(dCdW, W.broadcastable)
dCdb = T.sum(dCdH, axis=(0,1,2,3))
dCdb = T.patternbroadcast(dCdb, b.broadcastable)
dCdd = None #not differentiable, since d is not continuous
if 'name' in dir(dCdH) and dCdH.name is not None:
dCdH_name = dCdH.name
else:
dCdH_name = 'anon'
if 'name' in dir(V) and V.name is not None:
V_name = V.name
else:
V_name = 'anon'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon'
dCdV.name = 'Conv3D_dCdV.dCdH='+dCdH_name+',V='+V_name
dCdW.name = 'Conv3D_dCdW.dCdH='+dCdH_name+',V='+V_name+',W='+W_name
dCdb.name = 'Conv3D_dCdb.dCdH='+dCdH_name+',V='+V_name+',W='+W_name+',b='+b_name
return [ dCdV, dCdW, dCdb, dCdd ]
示例7: grad
def grad(self, inputs, output_gradients):
W, b, d, H, RShape = inputs
dCdR, = output_gradients
dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
WShape = W.shape
dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
# not differentiable, since d affects the output elements
dCdd = grad_undefined(self, 2, d)
# disconnected, since RShape just determines the output shape
dCdRShape = DisconnectedType()()
if 'name' in dir(dCdR) and dCdR.name is not None:
dCdR_name = dCdR.name
else:
dCdR_name = 'anon_dCdR'
if 'name' in dir(H) and H.name is not None:
H_name = H.name
else:
H_name = 'anon_H'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon_W'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon_b'
dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name)
dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
',W=' + W_name + ',b=' + b_name)
dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name
return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
示例8: grad
def grad(self,inputs, output_gradients):
W,b,d,H, RShape = inputs
dCdR ,= output_gradients
dCdH = conv3D( dCdR, W, T.zeros_like(H[0,0,0,0,:]), d)
WShape = W.shape
dCdW = convGrad3D(dCdR,d,WShape,H)
dCdb = T.sum(dCdR,axis=(0,1,2,3))
dCdd = None #not differentiable, since d is not continuous
dCdRShape = None #not differentiable, since RShape is not continuous
if 'name' in dir(dCdR) and dCdR.name is not None:
dCdR_name = dCdR.name
else:
dCdR_name = 'anon'
if 'name' in dir(H) and H.name is not None:
H_name = H.name
else:
H_name = 'anon'
if 'name' in dir(W) and W.name is not None:
W_name = W.name
else:
W_name = 'anon'
if 'name' in dir(b) and b.name is not None:
b_name = b.name
else:
b_name = 'anon'
dCdW.name = 'ConvTransp3D_dCdW.H='+H_name+',dCdR='+dCdR_name+',W='+W_name
dCdb.name = 'ConvTransp3D_dCdb.H='+H_name+',dCdR='+dCdR_name+',W='+W_name+',b='+b_name
dCdH.name = 'ConvTransp3D_dCdH.H='+H_name+',dCdR='+dCdR_name
return [ dCdW, dCdb, dCdd, dCdH, dCdRShape ]