本文整理汇总了Python中theano.gradient.grad函数的典型用法代码示例。如果您正苦于以下问题:Python grad函数的具体用法?Python grad怎么用?Python grad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grad函数的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_disconnected_paths
def test_disconnected_paths(self):
# Test that taking gradient going through a disconnected
# path rasises an exception
T = theano.tensor
a = np.asarray(self.rng.randn(5, 5),
dtype=config.floatX)
x = T.matrix('x')
# This MUST raise a DisconnectedInputError error.
# This also rasies an additional warning from gradients.py.
self.assertRaises(gradient.DisconnectedInputError, gradient.grad,
gradient.disconnected_grad(x).sum(), x)
# This MUST NOT raise a DisconnectedInputError error.
y = gradient.grad((x + gradient.disconnected_grad(x)).sum(), x)
a = T.matrix('a')
b = T.matrix('b')
y = a + gradient.disconnected_grad(b)
# This MUST raise a DisconnectedInputError error.
# This also rasies an additional warning from gradients.py.
self.assertRaises(gradient.DisconnectedInputError,
gradient.grad, y.sum(), b)
# This MUST NOT raise a DisconnectedInputError error.
gradient.grad(y.sum(), a)
示例2: functions
def functions(self, sequence_length):
key = (sequence_length)
if key not in self.cache:
logging.info("Need to construct graph for sequence_length=%d..." % (sequence_length))
# creating network input variable nodes
correct_inputs = t.ftensor3("correct input")
noise_inputs = t.ftensor3("noise input")
learning_rate = t.fscalar("learning rate")
# creating op nodes for firing the network
correct_score, correct_prehidden = self.score(correct_inputs)
noise_score, noise_prehidden = self.score(noise_inputs)
# creating op nodes for the pairwise ranking cost function
loss = t.clip(1 - correct_score + noise_score, 0, 1e999)
total_loss = t.sum(loss)
# the necessary cost function gradients
parameters_gradient = grad(total_loss, list(self.parameters))
correct_inputs_gradient = grad(total_loss, correct_inputs)
noise_inputs_gradient = grad(total_loss, noise_inputs)
# setting network inputs
predict_inputs = [correct_inputs]
train_inputs = [correct_inputs, noise_inputs, learning_rate]
verbose_predict_inputs = predict_inputs
# setting network outputs
predict_outputs = [correct_score]
train_outputs = [correct_inputs_gradient, noise_inputs_gradient, loss, correct_score, noise_score]
verbose_predict_outputs = [correct_score, correct_prehidden]
nnodes = len(theano.gof.graph.ops(predict_inputs, predict_outputs))
logging.info("About to compile prediction function over %d ops [nodes]..." % nnodes)
predict = theano.function(predict_inputs, predict_outputs, mode=COMPILE_MODE)
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
nnodes = len(theano.gof.graph.ops(verbose_predict_inputs, verbose_predict_outputs))
logging.info("About to compile verbose prediction function over %d ops [nodes]..." % nnodes)
verbose_predict = theano.function(verbose_predict_inputs, verbose_predict_outputs, mode=COMPILE_MODE)
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
nnodes = len(theano.gof.graph.ops(train_inputs, train_outputs))
logging.info("About to compile training function over %d ops [nodes]..." % nnodes)
train = theano.function(train_inputs, train_outputs, mode=COMPILE_MODE, updates=[(p, p - learning_rate * gp) for p, gp in zip(list(self.parameters), parameters_gradient)])
logging.info("...done constructing graph for sequence_length=%d" % (sequence_length))
self.cache[key] = (predict, train, verbose_predict)
return self.cache[key]
示例3: compute_nll_upper_bound
def compute_nll_upper_bound(self, seq_length, validation=False):
#############
# Inference
(enc_mu, enc_sig, prior_mu, prior_sig, dec_bin), updates = \
self.inference(self.orch, self.piano, seq_length)
#############
# Cost
dec_bin_non_zero = T.switch(dec_bin > 0, dec_bin, 1e-30) # Avoid log zero
recon = T.sum(T.nnet.binary_crossentropy(dec_bin_non_zero, self.orch), axis=1)
# binary_crossentropy = nll for binary input. Sum along input dimension, mean along time (i.e. batch)
# for real-valued units, use GaussianNLL
kl = KLGaussianGaussian(enc_mu, enc_sig, prior_mu, prior_sig)
# Mean over batches
recon_term = T.mean(recon)
kl_term = T.mean(kl)
# Note that instead of maximazing the neg log-lik upper bound,
# We here minimize the log-lik upper bound
cost = recon_term + kl_term
if not validation:
#############
# Gradient
gparams = G.grad(cost, self.params_dico.values())
#############
# Updates
updates_train = self.optimizer.get_updates(self.params_dico.values(), gparams, updates)
#############
# Cost
return cost, updates_train
else:
return cost, recon_term, kl_term, dec_bin, updates
示例4: grad
def grad(self, inputs, output_grads):
# OpFromGraph doesn't implement a connection_pattern, so for
# now we regard all inputs and outputs as connected. This will
# compute the right numerical value for the gradients but
# could fail to raise the disconnected inputs error in some
# cases.
if hasattr(self, "grad_ops"):
grad_ops = self.grad_ops
else:
gs = G.grad(cost=None,
known_grads=dict(zip(self.new_outputs, output_grads)),
wrt=self.new_inputs,
disconnected_inputs='ignore')
grad_ops = []
for g in gs:
if g is None:
grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
grad_ops.append(OpFromGraph(self.new_inputs + output_grads,
[g],
on_unused_input='ignore'))
self.grad_ops = grad_ops
return [go(*(inputs + output_grads)) for go in grad_ops]
示例5: test_disconnected_nan
def test_disconnected_nan(self):
# test that connection_pattern can prevent getting NaN
# Op1 has two outputs, f and g
# x is connected to f but not to g
class Op1(theano.gof.Op):
def make_node(self, x):
return theano.Apply(self, inputs=[x], outputs=[x.type(), theano.tensor.scalar()])
def connection_pattern(self, node):
return [[True, False]]
def grad(self, inputs, output_grads):
return [inputs[0].zeros_like()]
# Op2 has two inputs, f and g
# Its gradient with respect to g is not defined
class Op2(theano.gof.Op):
def make_node(self, f, g):
return theano.Apply(self, inputs=[f, g], outputs=[theano.tensor.scalar()])
def grad(self, inputs, output_grads):
return [inputs[0].zeros_like(), NullType()()]
x = theano.tensor.vector()
f, g = Op1()(x)
cost = Op2()(f, g)
# cost is differentiable wrt x
# but we can't tell that without using Op1's connection pattern
# looking at the theano graph alone, g is an ancestor of cost
# and has x as an ancestor, so we must compute its gradient
g = gradient.grad(cost, x)
示例6: __init__
def __init__(self, intpic_parameters=None,
case_costs=None, pics=None, case_labels=None,
batch_size=None, pic_size=None, label_count=None, **kwargs):
super(IntpicGradientDescent, self).__init__(**kwargs)
center_val = 0.5
self.input_pics = pics
self.case_costs = case_costs
self.batch_size = batch_size
self.label_count = label_count
self.intpic_parameters = intpic_parameters
self.jacobians = self._compute_jacobians()
self.gradpics = OrderedDict(
[(param, _create_intpic_histogram_for(param, pic_size, label_count))
for param in self.intpic_parameters])
self.intpics = OrderedDict(
[(param, _create_intpic_histogram_for(param, pic_size, label_count))
for param in self.intpic_parameters])
# attributes pics: (cases, picy, picx) to (cases, labels, picy, picx)
# attributed_pics = tensor.batched_tensordot(
# tensor.extra_ops.to_one_hot(case_labels.flatten(), label_count),
# pics[:, 0, :, :], axes=0)
zeroed_pics = pics - 0.5
attributed_pics = tensor.batched_tensordot(
tensor.extra_ops.to_one_hot(
case_labels.flatten(), label_count),
zeroed_pics[:, 0, :, :],
axes=0)
self.gradpic_updates = OrderedDict(
[_create_gradpic_updates(
self.gradpics[param],
self.jacobians[param],
attributed_pics) for param in self.intpic_parameters])
self.add_updates(self.gradpic_updates)
intensity_pics = (zeroed_pics *
gradient.grad(case_costs.mean(), pics))
attributed_i_pics = tensor.batched_tensordot(
tensor.extra_ops.to_one_hot(
case_labels.flatten(), label_count),
intensity_pics[:, 0, :, :],
axes=0)
self.intpic_updates = OrderedDict(
[_create_intensity_updates(
self.intpics[param],
self.jacobians[param],
attributed_i_pics) for param in self.intpic_parameters])
self.add_updates(self.intpic_updates)
示例7: test_grad
def test_grad(self):
T = theano.tensor
a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)
x = T.matrix("x")
expressions_gradients = [
(x * gradient.disconnected_grad(x), x),
(x * gradient.disconnected_grad(T.exp(x)), T.exp(x)),
(x ** 2 * gradient.disconnected_grad(x), 2 * x ** 2),
]
for expr, expr_grad in expressions_gradients:
g = gradient.grad(expr.sum(), x)
# gradient according to theano
f = theano.function([x], g, on_unused_input="ignore")
# desired gradient
f2 = theano.function([x], expr_grad, on_unused_input="ignore")
assert np.allclose(f(a), f2(a))
示例8: test_grad_disconnected
def test_grad_disconnected():
#tests corner cases of gradient for shape and alloc
x = theano.tensor.vector(name='x')
total = x.sum()
total.name = 'total'
num_elements = x.shape[0]
num_elements.name = 'num_elements'
silly_vector = theano.tensor.alloc(total / num_elements, num_elements)
silly_vector.name = 'silly_vector'
cost = silly_vector.sum()
cost.name = 'cost'
#note that cost simplifies to be the same as "total"
g = gradient.grad(cost, x, add_names=False)
#we still need to pass in x because it determines the shape of the output
f = theano.function([x], g)
rng = np.random.RandomState([2012, 9, 5])
x = np.cast[x.dtype](rng.randn(3))
g = f(x)
assert np.allclose(g, np.ones(x.shape, dtype=x.dtype))
示例9: __init__
def __init__(self, inputs, outputs, grad_depth=1, **kwargs):
if not isinstance(outputs, list):
raise TypeError('outputs must be list', outputs)
for i in inputs + outputs:
if not isinstance(i, gof.Variable):
raise TypeError(
'inputs and outputs must be Variable instances', i)
if 'updates' in kwargs:
raise TypeError('updates are not allowed in kwargs')
# TODO: the graph may have implicit inputs like
# SharedVariable instances.
# what impact to they have on the validity of this Op?
self.fn = orig_function(inputs, outputs, **kwargs)
self.inputs = inputs
self.outputs = outputs
self.input_types = [input.type for input in inputs]
self.output_types = [output.type for output in outputs]
if grad_depth > 0:
output_grads = [t() for t in self.output_types]
# OpFromGraph doesn't implement a connection_pattern, so for now we regard
# all inputs and outputs as connected. This will compute the right numerical
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
wrt=self.inputs, disconnected_inputs='ignore')
self.grad_ops = []
for g in gs:
if g is None:
self.grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
self.grad_ops.append(OpFromGraph(inputs + output_grads,
[g],
grad_depth=grad_depth - 1,
on_unused_input='ignore'))
示例10: make_grad_func
def make_grad_func(X):
Z = theano.tensor.dot(X, W) + b
H = theano.tensor.nnet.sigmoid(Z)
cost = H.sum()
g = gradient.grad(cost, X)
return theano.function([X, W, b], g, on_unused_input='ignore')
示例11: main
def main(save_to):
batch_size = 500
image_size = (28, 28)
output_size = 10
# The above are from LeCun's paper. The blocks example had:
# feature_maps = [20, 50]
# mlp_hiddens = [500]
# Use ReLUs everywhere and softmax for the final prediction
convnet = create_lenet_5()
mnist_test = MNIST(("test",), sources=['features', 'targets'])
basis_init = create_fair_basis(mnist_test, 10, 2)
# b = shared_floatx(basis)
# random_init = numpy.rand.random(100, 1000)
# r = shared_floatx(random_init)
# rn = r / r.norm(axis=1)
# x = tensor.dot(rn, tensor.shape_padright(b))
x = shared_floatx(basis_init)
# Normalize input and apply the convnet
probs = convnet.apply(x)
cg = ComputationGraph([probs])
outs = VariableFilter(
roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)
# Create an interior activation model
model = Model([probs] + outs)
# Load it with trained parameters
params = load_parameters(open(save_to, 'rb'))
model.set_parameter_values(params)
learning_rate = shared_floatx(0.01, 'learning_rate')
unit = shared_floatx(0, 'unit', dtype='int64')
negate = False
suffix = '_negsynth.jpg' if negate else '_synth.jpg'
for output in outs:
layer = get_brick(output)
# For now, skip masks -for some reason they are always NaN
iterations = 10000
layername = layer.parents[0].name + '-' + layer.name
# if layername != 'noisylinear_2-linear':
# continue
dims = layer.get_dims(['output'])[0]
if negate:
measure = -output
else:
measure = output
measure = measure[(slice(0, basis_init.shape[0]), ) +
(slice(None),) * (measure.ndim - 1)]
if isinstance(dims, numbers.Integral):
dims = (dims, )
costvec = -tensor.log(tensor.nnet.softmax(
measure)[:,unit].flatten())
else:
flatout = measure.flatten(ndim=3)
maxout = flatout.max(axis=2)
costvec = -tensor.log(tensor.nnet.softmax(
maxout)[:,unit].flatten())
# Add a regularization to favor gray images.
# cost = costvec.sum() + (x - 0.5).norm(2) * (
# 10.0 / basis_init.shape[0])
cost = costvec.sum()
grad = gradient.grad(cost, x)
stepx = x - learning_rate * grad
normx = stepx / tensor.shape_padright(
stepx.flatten(ndim=2).max(axis=1), n_ones=3)
newx = tensor.clip(normx, 0, 1)
newx = newx[(slice(0, basis_init.shape[0]), ) +
(slice(None),) * (newx.ndim - 1)]
fn = theano.function([], [cost], updates=[(x, newx)])
filmstrip = Filmstrip(
basis_init.shape[-2:], (dims[0], basis_init.shape[0]),
background='red')
for u in range(dims[0]):
unit.set_value(u)
x.set_value(basis_init)
print('layer', layername, 'unit', u)
for index in range(iterations):
c = fn()[0]
if index % 1000 == 0:
print('cost', c)
result = x.get_value()
for i2 in range(basis_init.shape[0]):
filmstrip.set_image((u, i2), result[i2,:,:,:])
filmstrip.save(layername + suffix)
result = x.get_value()
for index in range(basis_init.shape[0]):
filmstrip.set_image((u, index), result[index,:,:,:])
filmstrip.save(layername + suffix)
示例12: main
#.........这里部分代码省略.........
border_mode='valid',
weights_init=Uniform(width=.2),
biases_init=Constant(0))
# We push initialization config to set different initialization schemes
# for convolutional layers.
convnet.push_initialization_config()
convnet.layers[0].weights_init = Uniform(width=.2)
convnet.layers[1].weights_init = Uniform(width=.09)
convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
convnet.initialize()
logging.info("Input dim: {} {} {}".format(
*convnet.children[0].get_dim('input_')))
for i, layer in enumerate(convnet.layers):
if isinstance(layer, Activation):
logging.info("Layer {} ({})".format(
i, layer.__class__.__name__))
else:
logging.info("Layer {} ({}) dim: {} {} {}".format(
i, layer.__class__.__name__, *layer.get_dim('output')))
random_init = (numpy.random.rand(100, 1, 28, 28) * 128).astype('float32')
layers = [l for l in convnet.layers if isinstance(l, Convolutional)]
mnist_test = MNIST(("test",), sources=['features', 'targets'])
basis_init = create_fair_basis(mnist_test, 10, 50)
basis_set = make_shifted_basis(basis_init, convnet, layers)
for layer, basis in zip(layers, basis_set):
# basis is 5d:
# (probed_units, base_cases, 1-c, 28-y, 28-x)
b = shared_floatx(basis)
# coefficients is 2d:
# (probed_units, base_cases)
coefficients = shared_floatx(
numpy.ones(basis.shape[0:2],
dtype=theano.config.floatX))
# prod is 5d: (probed_units, base_cases, 1-c, 28-y, 28-x)
prod = tensor.shape_padright(coefficients, 3) * b
# x is 4d: (probed_units, 1-c, 28-y, 28-x)
ux = prod.sum(axis=1)
x = tensor.clip(ux /
tensor.shape_padright(ux.flatten(ndim=2).max(axis=1), 3),
0, 1)
# Normalize input and apply the convnet
probs = convnet.apply(x)
cg = ComputationGraph([probs])
outs = VariableFilter(
roles=[OUTPUT], bricks=[layer])(cg.variables)
# Create an interior activation model
model = Model([probs] + outs)
# Load it with trained parameters
params = load_parameters(open(save_to, 'rb'))
model.set_parameter_values(params)
learning_rate = shared_floatx(0.03, 'learning_rate')
# We will try to do all units at once.
# unit = shared_floatx(0, 'unit', dtype='int64')
# But we are only doing one layer at once.
output = outs[0]
dims = layer.get_dims(['output'])[0]
if isinstance(dims, numbers.Integral):
# FC case: output is 2d: (probed_units, units)
dims = (dims, )
unitrange = tensor.arange(dims[0])
costvec = -tensor.log(
tensor.nnet.softmax(output)[unitrange, unitrage].
flatten())
else:
# Conv case: output is 4d: (probed_units, units, y, x)
unitrange = tensor.arange(dims[0])
print('dims is', dims)
costvec = -tensor.log(tensor.nnet.softmax(output[
unitrange, unitrange, dims[1] // 2, dims[2] // 2]).
flatten())
cost = costvec.sum()
# grad is dims (probed_units, basis_size)
grad = gradient.grad(cost, coefficients)
stepc = coefficients # - learning_rate * grad
newc = stepc / tensor.shape_padright(stepc.mean(axis=1))
fn = theano.function([], [cost, x], updates=[(coefficients, newc)])
filmstrip = Filmstrip(
random_init.shape[-2:], (dims[0], 1),
background='red')
layer = get_brick(output)
learning_rate.set_value(0.1)
for index in range(20000):
c, result = fn()
if index % 1000 == 0:
learning_rate.set_value(numpy.cast[theano.config.floatX](
learning_rate.get_value() * 0.8))
print('cost', c)
for u in range(dims[0]):
filmstrip.set_image((u, 0), result[u,:,:,:])
filmstrip.save(layer.name + '_stroke.jpg')
for u in range(dims[0]):
filmstrip.set_image((u, 0), result[u,:,:,:])
filmstrip.save(layer.name + '_stroke.jpg')
示例13: evaluate_lenet5
#.........这里部分代码省略.........
#
# layer3 = HiddenLayer(
# rng,
# input=layer3_input,
# n_in=1000,
# n_out=500,
# activation=T.tanh
# )
layer4 = LogisticRegression(input=layer2.output, n_in=500, n_out=13)
cost = layer4.negative_log_likelihood(y)
test_model = theano.function(
[index],
layer4.errors(y),
givens={
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
test_model_on_train = theano.function(
[index],
layer4.errors(y),
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
params = layer4.params + layer2.params + layer1.params + layer0.params
grads = grad(cost, params)
updates_0 = [
(param_i, param_i - learning_rate[0] * grad_i)
for param_i, grad_i in zip(params, grads)
]
updates_1 = [
(param_i, param_i - learning_rate[1] * grad_i)
for param_i, grad_i in zip(params, grads)
]
updates_2 = [
(param_i, param_i - learning_rate[2] * grad_i)
for param_i, grad_i in zip(params, grads)
]
updates_3 = [
(param_i, param_i - learning_rate[3] * grad_i)
for param_i, grad_i in zip(params, grads)
]
updates_4 = [
(param_i, param_i - learning_rate[4] * grad_i)
for param_i, grad_i in zip(params, grads)
]
updates_5 = [
(param_i, param_i - learning_rate[5] * grad_i)
for param_i, grad_i in zip(params, grads)
]
# updates_6 = [