本文整理汇总了Python中theano.tensor.grad函数的典型用法代码示例。如果您正苦于以下问题:Python grad函数的具体用法?Python grad怎么用?Python grad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grad函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=False):
N, D = X.shape
n_batches = N / batch_sz
W0 = init_weights((D, self.M))
self.W = theano.shared(W0, 'W_%s' % self.id)
self.bh = theano.shared(np.zeros(self.M), 'bh_%s' % self.id)
self.bo = theano.shared(np.zeros(D), 'bo_%s' % self.id)
self.params = [self.W, self.bh, self.bo]
self.forward_params = [self.W, self.bh]
# TODO: technically these should be reset before doing backprop
self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
self.dparams = [self.dW, self.dbh, self.dbo]
self.forward_dparams = [self.dW, self.dbh]
X_in = T.matrix('X_%s' % self.id)
X_hat = self.forward_output(X_in)
# attach it to the object so it can be used later
# must be sigmoidal because the output is also a sigmoid
H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
self.hidden_op = theano.function(
inputs=[X_in],
outputs=H,
)
# cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N
cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
cost_op = theano.function(
inputs=[X_in],
outputs=cost,
)
updates = [
(p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
] + [
(dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
]
train_op = theano.function(
inputs=[X_in],
updates=updates,
)
costs = []
print "training autoencoder: %s" % self.id
for i in xrange(epochs):
print "epoch:", i
X = shuffle(X)
for j in xrange(n_batches):
batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
train_op(batch)
the_cost = cost_op(X) # technically we could also get the cost for Xtest here
print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
costs.append(the_cost)
if show_fig:
plt.plot(costs)
plt.show()
示例2: sgd_optimization
def sgd_optimization(learning_rate=0.13, n_epochs=1000, batch_size=100):
dataset = generate_data()
train_x, train_y = dataset[0]
print train_x.type, train_y.type
validate_x, validate_y = dataset[1]
test_x, test_y = dataset[2]
print 'train set size %d' %(train_x.get_value().shape[0])
print 'validate set size %d' %(validate_x.get_value().shape[0])
print 'test set size %d' %(test_x.get_value().shape[0])
n_batches = train_x.get_value(borrow=True).shape[0] / batch_size
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
lr = LogisticRegression(x, train_x.get_value().shape[1])
cost = lr.negative_log_likelihood(y)
print 'compile function test_model...'
test_model = theano.function(inputs=[index],
outputs=lr.errors(y),
givens={
x : train_x[index*batch_size : (index+1)*batch_size],
y : train_y[index*batch_size : (index+1)*batch_size]
})
g_w = T.grad(cost=cost, wrt=lr.w)
g_b = T.grad(cost=cost, wrt=lr.b)
updates = [(lr.w, lr.w-learning_rate*g_w),
(lr.b, lr.b-learning_rate*g_b)]
print 'complie function train_model...'
train_model = theano.function(inputs=[index],
outputs=cost,
updates=updates,
givens={
x : train_x[index*batch_size : (index+1)*batch_size],
y : train_y[index*batch_size : (index+1)*batch_size]
})
best_train_error = numpy.Inf
start_time = time.clock()
for epoch in xrange(n_epochs):
for minibatch_index in xrange(n_batches):
batch_cost = train_model(minibatch_index)
train_errors = [test_model(i) for i in xrange(n_batches)]
train_error = numpy.mean(train_errors)
if best_train_error > train_error:
best_train_error = train_error
print 'epoch %d, best_train_error %lf, train_error %lf' \
%(epoch, best_train_error, train_error)
#print 'iterator %d %lf' %(epoch*n_batches + minibatch_index+1, batch_cost)
end_time = time.clock()
print 'cost %d' %(end_time-start_time)
示例3: __init__
def __init__(self,
input=tensor.dvector('input'),
target=tensor.dvector('target'),
n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
super(NNet, self).__init__(**kw)
self.input = input
self.target = target
self.lr = shared(lr, 'learning_rate')
self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
# print self.lr.type
self.hidden = sigmoid(tensor.dot(self.w1, self.input))
self.output = tensor.dot(self.w2, self.hidden)
self.cost = tensor.sum((self.output - self.target)**2)
self.sgd_updates = {
self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)}
self.sgd_step = pfunc(
params=[self.input, self.target],
outputs=[self.output, self.cost],
updates=self.sgd_updates)
self.compute_output = pfunc([self.input], self.output)
self.output_from_hidden = pfunc([self.hidden], self.output)
示例4: get_gradients
def get_gradients(self, X, Y, weights=1.0):
W_mean, W_ls, b_mean, b_ls = self.parameters
mean, log_sigma = self.sample_expected(Y)
sigma = tensor.exp(log_sigma)
cost = -log_sigma - 0.5 * (X - mean) ** 2 / tensor.exp(2 * log_sigma)
if weights != 1.0:
cost = -weights.dimshuffle(0, "x") * cost
cost_scaled = sigma ** 2 * cost
cost_gscale = (sigma ** 2).sum(axis=1).dimshuffle([0, "x"])
cost_gscale = cost_gscale * cost
gradients = OrderedDict()
params = Selector(self.mlp).get_parameters()
for pname, param in params.iteritems():
gradients[param] = tensor.grad(cost_gscale.sum(), param, consider_constant=[X, Y])
gradients[W_mean] = tensor.grad(cost_scaled.sum(), W_mean, consider_constant=[X, Y])
gradients[b_mean] = tensor.grad(cost_scaled.sum(), b_mean, consider_constant=[X, Y])
gradients[W_ls] = tensor.grad(cost_scaled.sum(), W_ls, consider_constant=[X, Y])
gradients[b_ls] = tensor.grad(cost_scaled.sum(), b_ls, consider_constant=[X, Y])
return gradients
示例5: build
def build(self):
self.debug = []
lM = []
lpullerror = []
lpusherror = []
lupdate = []
for i in xrange(self.M):
if not self.localM:
lM.append(theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True))
lpullerror.append(0.0)
lpusherror.append(0.0)
continue
M = theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True)
pullerror, pusherror = self._local_error(M, i)
pullerror *= (1-self.mu)
pusherror *= self.mu
error = pullerror + pusherror
update = (M, M - self._lr[i] * T.grad(error, M))
lM.append(M)
lpullerror.append((1-self.mu)*pullerror)
lpusherror.append(self.mu*pusherror)
lupdate.append(update)
self.lM = lM
self.lpusherror = lpusherror
self.lpullerror = lpullerror
self.lupdate = lupdate
#gError = 0.0
gM = []
gpullerror = []
gpusherror = []
gupdate = []
for i in xrange(self.M):
if not self.globalM:
gM.append(theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True))
gpullerror.append(0.0)
gpusherror.append(0.0)
continue
M = theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True)
if i == 0:
pullerror, pusherror = self._global_error(M, i, None)
else:
pullerror, pusherror = self._global_error(M, i, gM[-1])
error = (1-self.mu) * pullerror + self.mu * pusherror
# gError += error#*(float(i+1)/self.M)
update = (M, M - self._lr[i+self.M] * T.grad(error, M))
gM.append(M)
gpullerror.append((1-self.mu)*pullerror)
gpusherror.append(self.mu*pusherror)
gupdate.append(update)
# if self.globalM:
# gupdate = [(gM[i], gM[i] - self._lr[i+self.M]*T.grad(gError, M)) for i in xrange(self.M)]
self.gM = gM
self.gpusherror = gpusherror
self.gpullerror = gpullerror
self.gupdate = gupdate
示例6: train
def train(self, epochs = 1000, learning_rate = 0.1):
regression = self.regression
X = self.X
Y = self.Y
x = T.matrix('x') # data, presented as rasterized images
y = T.vector('y') # labels, presented as 1D vector of [int] labels
error = regression.error(x, y)
g_W = T.grad(cost=error, wrt=regression.W)
g_b = T.grad(cost=error, wrt=regression.b)
# start-snippet-3
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs.
updates = [(regression.W, regression.W - learning_rate * g_W),
(regression.b, regression.b - learning_rate * g_b)]
# compiling a Theano function `train_model` that returns the cost, but in
# the same time updates the parameter of the model based on the rules
# defined in `updates`
train_model = tn.function(
inputs=[],
outputs=error,
updates=updates,
givens={
x: X,
y: Y
}
)
print('training start:')
start_time = timeit.default_timer()
epoch = 0
while(epoch < epochs):
avg_error = train_model()
print('epoch {0}, error {1}'.format(epoch, avg_error), end='\r')
epoch += 1
print('training finish (start: {0}) took {1} seconds.'.format(regression.error(X, Y).eval(), timeit.default_timer() - start_time))
# z = regression.compute(data_x).ravel()
# e = regression.error(data_y, z)
# l = regression.loss(e)
# epoch = 0
# while(epoch < epochs):
# g = regression.grad(data_y, z)
# d = regression.delta(g, data_x)
# regression.W -= learning_rate * d[0]
# regression.b -= learning_rate * d[1]
#
# z = regression.compute(data_x).ravel()
# e = regression.error(data_y, z)
# l = regression.loss(e)
# # print(l.eval())
#
# epoch += 1
# print('epoch:', epoch, end='\r')
pass
示例7: test_gradient_batch_normalization_op
def test_gradient_batch_normalization_op():
epsilon = 1e-8
op = gn.GradientBatchNormalizationOp(subtract_mean=True,
keep_mean=False,
epsilon=epsilon)
X = np.random.randn(3, 4).astype(fX)
W = np.random.randn(2, 3).astype(fX)
x = T.matrix("x")
w = T.matrix("w")
orig_grad = T.grad(w.dot(x).sum(), x).eval({x: X, w: W})
new_grad = T.grad(w.dot(op(x)).sum(), x).eval({x: X, w: W})
mu = orig_grad.mean(axis=0, keepdims=True)
sigma = orig_grad.std(axis=0, keepdims=True) + epsilon
ans = (orig_grad - mu) / sigma
np.testing.assert_allclose(ans,
new_grad,
rtol=1e-5)
np.testing.assert_allclose(np.zeros(4),
new_grad.mean(axis=0),
atol=1e-5)
np.testing.assert_allclose(np.ones(4),
new_grad.std(axis=0),
rtol=1e-5)
示例8: __build_theano__
def __build_theano__(self):
x = ivector(name="x")
y = ivector(name="y")
U, V, W = self.U, self.V, self.W
def forword_prop_step(x_t, s_t_prev, U, V, W):
s_t = T.tanh(U[:,x_t] + V.dot(s_t_prev))
o_t = T.nnet.softmax(W.dot(s_t))
return [o_t[0], s_t]
[o,s], updates = theano.scan(forword_prop_step, sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U,V,W], truncate_gradient=4, strict=True)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW = T.grad(o_error, W)
self.forward = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.c_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW])
learning_rate = scalar(name="learning_rate")
self.sgd_step = theano.function([x, y, learning_rate], [],
updates=[(self.U, self.U-learning_rate*dU),
(self.V, self.V-learning_rate*dV),
(self.W, self.W-learning_rate*dW)])
示例9: calculate_Rl
def calculate_Rl(v_input):
# Sample a h_sample according to one v_input
_, hl_mean, hl_sample = self.sample_h_given_v(v_input)
# Calculate the probability of visible output according to h_sample
_, vn_mean = self.propdown(hl_sample)
# - Part1.
# Desc: Multiply each element in grad with T.log(vn_mean).sum()
# Hint: [array(...), array(...), array(...)] = T.grad(..., self.params)
# The number of elements in gradient is the number of params which are partial derivation.
# part1 = map(lambda x: x * T.log(vn_mean).sum(),
# T.grad(T.log(hl_mean).sum(),
# self.params,
# disconnected_inputs='warn'))
part1 = [x * T.log(vn_mean).sum() for x in T.grad(
T.log(hl_mean).sum(),
self.params,
disconnected_inputs='warn')]
# - Part2.
part2 = T.grad((T.log(self.propdown(hl_sample)[1]).sum()),
self.params,
consider_constant=[hl_sample],
disconnected_inputs='warn')
# Rl is the result that add corresponding elements in two gradient.
# Rl = log(p(v^n|h^l;\theta)) * grad(log(p(h^l|v^n;\theta))) + grad(log(p(v^n|h^l;\theta)))
# Rl = map(lambda p1, p2: p1 + p2, part1, part2)
Rl = [x + y for x, y in zip(part1, part2)]
mi_cost_xi = T.log(vn_mean).sum()
Rl.append(mi_cost_xi)
return Rl
示例10: test_downsample
def test_downsample():
shps = [
(1, 1, 1, 12),
(1, 1, 2, 2),
(1, 1, 1, 1),
(1, 1, 4, 4),
(1, 1, 10, 11),
(1, 2, 2, 2),
(3, 5, 4, 4),
(25, 1, 7, 7),
(1, 1, 12, 12),
(1, 1, 2, 14),
(1, 1, 12, 14),
(1, 1, 14, 14),
(1, 1, 16, 16),
(1, 1, 18, 18),
(1, 1, 24, 24),
(1, 6, 24, 24),
(10, 1, 24, 24),
(10, 6, 24, 24),
(30, 6, 12, 12),
(30, 2, 24, 24),
(30, 6, 24, 24),
(10, 10, 10, 11),
(1, 1, 10, 1025),
(1, 1, 10, 1023),
(1, 1, 1025, 10),
(1, 1, 1023, 10),
]
numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)
for shp in shps:
for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]:
continue
if ds[1] > shp[3]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
if float(shp[3]) / ds[1] > 512:
continue
for ignore_border in (True, False):
print "test_downsample", shp, ds, ignore_border
ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), "a")
f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()])
assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()])
assert numpy.allclose(f(), f2())
g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu)
g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu)
assert any(
[isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]
)
assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()])
assert numpy.allclose(g(), g2())
示例11: fit
def fit(self,data_x,data_y):
print "Training"
start = time.clock()
n_batches = data_x.get_value(borrow=True).shape[0]/self.batch_size
tensor_x = T.matrix('x')
tensor_y = T.ivector('y')
index = T.lscalar('index')
self.single_layer = Layer(self.n_in,self.n_out,T.nnet.softmax)
cost = self.single_layer.negative_log_likelihood(tensor_x, tensor_y)
g_W = T.grad(cost,self.single_layer.W)
g_b = T.grad(cost,self.single_layer.b)
updates = [(self.single_layer.W,self.single_layer.W - g_W*self.learning_rate),
(self.single_layer.b,self.single_layer.b - g_b*self.learning_rate)]
train_batch = theano.function([index],[cost],
updates=updates,
givens={tensor_x : data_x[index*self.batch_size : (index + 1)*self.batch_size],
tensor_y : data_y[index*self.batch_size : (index + 1)*self.batch_size]})
train_batch_costs = [0 for i in xrange(n_batches)]
for iter in xrange(self.iters):
for minibatch_index in xrange(n_batches):
train_batch_costs[minibatch_index] = train_batch(minibatch_index)
if self.verbose==1: print "Iter %d --> %f" % (iter,np.mean(train_batch_costs))
end = time.clock()
print "Finished Training Logistic Regression Model\n" \
"Iterations %d\n" \
"Time Taken : %d secs" % (self.iters,end - start)
示例12: get_params_and_grads
def get_params_and_grads(graph, cost, verbose=False):
params = []
for k, p in graph.items():
if k == DATASETS_ID:
# skip datasets
continue
if k == RANDOM_ID:
# skip random
continue
params.append(p)
if verbose:
grads = []
for k, p in graph.items():
if k == DATASETS_ID:
# skip datasets
continue
if k == RANDOM_ID:
# skip random
continue
print("Computing grad w.r.t %s" % k)
grad = tensor.grad(cost, p)
grads.append(grad)
else:
grads = tensor.grad(cost, params)
return params, grads
示例13: get_mean_square_norm_gradients_variance_method_00
def get_mean_square_norm_gradients_variance_method_00(D_by_layer, cost, accum = 0):
# This returns a theano variable that will be of shape (minibatch_size, ).
# It will contain, for each training example, the associated mean of the
# variance wrt the gradient of that minibatch.
for (layer_name, D) in D_by_layer.items():
input = D['input']
input_square_norms = tensor.sqr(D['input']).sum(axis=1)
backprop_output = tensor.grad(cost, D['output'])
# I don't think that theano recomputes this.
# It should be just redundant nodes in the computational graph
# that end up being computed only once anyways.
grad_weight = tensor.grad(cost, D['weight'])
grad_bias = tensor.grad(cost, D['bias'])
backprop_output_square_norms = tensor.sqr(backprop_output).sum(axis=1)
if D.has_key('weight'):
A = input_square_norms * backprop_output_square_norms
C = tensor.sqr(grad_weight).sum() # all the terms get this "middle" expression added to them
B = (backprop_output.dot(grad_weight.T) * input).sum(axis=1)
accum += (A - 2*B + C)
if D.has_key('bias'):
# this last `sum` could be a component-wise `max` if we wanted
# to carry the maximum of the variances instead of the sum of squares
accum = accum + tensor.sqr(backprop_output - grad_bias.reshape((1,-1))).sum(axis=1)
return accum
示例14: test_reduce_custom_dtype
def test_reduce_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a reduce.
"""
# We try multiple axis combinations even though axis should not matter.
idx = 0
for method in self.methods:
for input_dtype in self.dtypes:
x = tensor.matrix(dtype=input_dtype)
for output_dtype in self.dtypes:
# If the output is a complex, the gradient of the reduce will
# cast the complex to the input dtype. We can't call the normal
# cast on a complex to a not complex as this is ambiguous.
if (not input_dtype.startswith('complex') and
output_dtype.startswith('complex')):
continue
axis = self.axes[idx % len(self.axes)]
var = getattr(x, method)(dtype=output_dtype, axis=axis)
assert var.dtype == output_dtype
f = theano.function([x], var, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
data = numpy.random.rand(3, 4) * 10
data = data.astype(input_dtype)
f(data)
if "complex" in input_dtype:
continue
# Check that we can take the gradient
tensor.grad(var.sum(), x,
disconnected_inputs='ignore')
idx += 1
示例15: check_mat_rop_lop
def check_mat_rop_lop(self, y, out_shape):
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = tensor.Rop(y, self.mx, self.mv)
rop_f = function([self.mx, self.mv], yv)
sy, _ = theano.scan( lambda i,y,x,v: (tensor.grad(y[i],x)*v).sum(),
sequences = tensor.arange(y.shape[0]),
non_sequences = [y,self.mx,self.mv])
scan_f = function([self.mx,self.mv], sy)
v1 = rop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('ROP mismatch: %s %s' % (v1, v2))
self.check_nondiff_rop( theano.clone(y,
replace={self.mx:break_op(self.mx)}))
vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
yv = tensor.Lop(y, self.mx, self.v)
lop_f = function([self.mx, self.v], yv)
sy = tensor.grad((self.v*y).sum(), self.mx)
scan_f = function([self.mx, self.v], sy)
v1 = lop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))