本文整理汇总了Python中theano_lstm.create_optimization_updates函数的典型用法代码示例。如果您正苦于以下问题:Python create_optimization_updates函数的具体用法?Python create_optimization_updates怎么用?Python create_optimization_updates使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了create_optimization_updates函数的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_training_function
def create_training_function(self):
updates, _, _, _, _ = create_optimization_updates(self.lstm_cost, self.params, method="SGD", lr=self.lstm_lr)
# updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta", lr=self.lr)
self.lstm_update_fun = theano.function(
inputs=[self.input_mat, self.for_how_long],
outputs=self.lstm_cost,
updates=updates,
allow_input_downcast=True)
updates_turing = self.turing_updates(self.final_cost , lr=self.turing_lr)
# updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta", lr=self.lr)
self.turing_update_fun = theano.function(
inputs=[self.input_mat, self.for_how_long],
outputs=self.final_cost,
updates=updates_turing,
mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True),
allow_input_downcast=True)
all_updates_lstm, _, _, _, _ = create_optimization_updates(self.final_cost, self.params, method="SGD", lr=self.all_lr,part=True)
all_updates_turing_temp = self.turing_updates(self.final_cost , lr=self.all_lr)
updates_all = all_updates_lstm
for pair in all_updates_turing_temp :
updates_all[pair[0]] = pair[1]
self.all_update_fun = theano.function(
inputs=[self.input_mat, self.for_how_long],
outputs=self.final_cost,
updates=updates_all,
allow_input_downcast=True)
示例2: create_training_function
def create_training_function(self):
updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
self.update_fun = theano.function(
inputs=[self.input_mat, self.for_how_long],
outputs=self.cost,
updates=updates,
allow_input_downcast=True)
示例3: create_training_function
def create_training_function(self):
updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
self.update_fun = theano.function(
inputs=[self.gfs,self.pm25in, self.pm25target,self.steps],
outputs=self.cost,
updates=updates,
allow_input_downcast=True)
示例4: create_training_function
def create_training_function(self):
updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, lr=0.01, method="adagrad")#这一步Gradient Decent!!!!
self.update_fun = theano.function(
inputs=[self.x, self.target0,self.target1,self.steps],
outputs=self.cost,
updates=updates,
name='update_fun',
profile=False,
allow_input_downcast=True)
示例5: lstmTrain
def lstmTrain(examples,labels,input_size,num_iterations,steps,saveto=""):
print examples,labels
# Make a dataset where the network should learn whether the number 1 has been seen yet in the first column of
# the input sequence. This probably isn't really a good example use case for an LSTM, but it's simple.
'''rng = np.random.RandomState(123456789)
input_size = 2
input_length = 3
sample_size = 500
num_iterations = 1
examples = rng.choice([0,1], (1, input_length,2)).astype(theano.config.floatX)
#labels = np.array([[1 if np.sum(np.abs(x[:y + 1])) > 5 else 0 for y in range(len(x))]
# for x in examples],
# dtype=theano.config.floatX)
labels = np.array([[[1,0,1]]],
dtype=theano.config.floatX)'''
hidden_layer_size = 10
num_hidden_layers = 2
nodes=len(labels)
assert len(labels)==len(examples)
model = StackedCells(input_size,
layers=[20,nodes],
activation=T.tanh,
celltype=LSTM)
# Make the connections from the input to the first layer have linear activations.
model.layers[0].in_gate2.activation = lambda x: x
# Add an output layer to predict the labels for each time step.
output_layer = Layer(nodes, nodes,lambda x: T.nnet.softmax(x)[0])
model.layers.append(output_layer)
#model.layers.append(Layer(3, 3, lambda x: T.nnet.softmax(x)[0]))
#tensor.nnet.softmax(x)
#pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
#softmax_layer = Layer(3, 3, T.nnet.sigmoid)
#softmax_layer.activation = lambda x: T.nnet.softmax(x)
#model.layers.append(softmax_layer)
#pred = T.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
def step(x, *prev_hiddens):
activations = model.forward(x, prev_hiddens=prev_hiddens)
return activations
input_vec = T.matrix('input_vec')
#input_mat=np.zeros((3,2))
#input_mat=input_vec.dimshuffle((0,'x',1))
#input_mat = input_vec.dimshuffle((0,'x')).eval({input_vec:examples[0]})
#print input_mat
result, _ = theano.scan(fn=step,
sequences=[input_vec],
#outputs_info=([dict(initial=input_vec, taps=[-1])] + [dict(initial=layer.initial_hidden_state, taps=[-1]) for layer in model.layers if hasattr(layer, 'initial_hidden_state')]),
outputs_info=([dict(initial=hidden_layer.initial_hidden_state)
for hidden_layer in model.layers[:-1]] +[dict(initial=model.layers[-1].bias_matrix)]),
#[dict(initial=T.zeros_like(model.layers[-1].bias_matrix), taps=[-1])]),
n_steps=steps)
#print result[0].eval({input_vec:examples[0]})
#print model.layers[-1].eval({input_vec:examples[0]})
#print result[-1].eval({input_vec:examples[0]})
#print result[-1].T[0].eval({input_vec:examples[0]})
#target = T.vector('target')
target=T.vector('target',dtype='int64')
prediction = result[-1]#.T[1]#.eval({examples:rng.choice([0,1], (1, input_length,2)).astype(theano.config.floatX),input_mat:np.zeros((3,2))})
#cost = T.nnet.binary_crossentropy(prediction, target).mean()
#pred = T.nnet.softmax(prediction)
#print 'predict'
#print pred.eval({input_vec:examples[0]})
cost=-T.log(prediction[target] + 1e-8).mean()
updates, _, _, _, _ = create_optimization_updates(cost, model.params)
update_func = theano.function([input_vec, target], cost, updates=updates, allow_input_downcast=True,on_unused_input='warn')
predict_func = theano.function([input_vec], prediction, allow_input_downcast=True,on_unused_input='warn')
for cur_iter in range(num_iterations):
for i, (example, label) in enumerate(zip(examples, labels)):
#print i,example,label
c = update_func(example, label)
print "cost",c
#create_optimization_updates(cost, model.params)
#if i % 100 == 0:
# print "."#, end
#print()
if saveto:
np.savez(saveto, model.params)
'''test_cases = [np.array([[-1,1], [1,2],[0,0], [1,3], [2,-2]], dtype=theano.config.floatX)]
示例6: main
def main():
# Make a dataset where the network should learn whether the number 1 has been seen yet in the first column of
# the input sequence. This probably isn't really a good example use case for an LSTM, but it's simple.
rng = np.random.RandomState(123456789)
input_size = 1
input_length = 2
sample_size = 1
num_iterations = 3
examples = rng.choice([-2, -1, 0, 1, 2], (sample_size, input_length)).astype(theano.config.floatX)
labels = np.array([[1 if np.sum(np.abs(x[:y + 1])) > 5 else 0 for y in range(len(x))]
for x in examples],
dtype=theano.config.floatX)
hidden_layer_size = 10
num_hidden_layers = 2
model = StackedCells(input_size,
layers=[hidden_layer_size] * num_hidden_layers,
activation=T.tanh,
celltype=LSTM)
# Make the connections from the input to the first layer have linear activations.
model.layers[0].in_gate2.activation = lambda x: x
# Add an output layer to predict the labels for each time step.
output_layer = Layer(hidden_layer_size, 1, T.nnet.sigmoid)
model.layers.append(output_layer)
def step(x, *prev_hiddens):
activations = model.forward(x, prev_hiddens=prev_hiddens)
return activations
input_vec = T.vector('input_vec')
input_mat = input_vec.dimshuffle((0, 'x'))
result, _ = theano.scan(fn=step,
sequences=[input_mat],
outputs_info=([dict(initial=hidden_layer.initial_hidden_state, taps=[-1])
for hidden_layer in model.layers[:-1]] +
[dict(initial=T.zeros_like(model.layers[-1].bias_matrix), taps=[-1])]))
print result[-2].eval({input_vec:examples[0]})
target = T.vector('target')
prediction = result[-1].T[0]
cost = T.nnet.binary_crossentropy(prediction, target).mean()
updates, _, _, _, _ = create_optimization_updates(cost, model.params)
update_func = theano.function([input_vec, target], cost, updates=updates, allow_input_downcast=True)
predict_func = theano.function([input_vec], prediction, allow_input_downcast=True)
for cur_iter in range(num_iterations):
for i, (example, label) in enumerate(zip(examples, labels)):
c = update_func(example, label)
#if i % 100 == 0:
#print(".")
print()
test_cases = [np.array([-1, 1, 0, 1, -2, 0, 1, 0, 2, 0], dtype=theano.config.floatX),
np.array([2, 2, 2, 0, 0, 0], dtype=theano.config.floatX),
np.array([-2, -2, -2, 0, 0, 0], dtype=theano.config.floatX),
np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0], dtype=theano.config.floatX),
np.array([2, 0, 0, 0, 2, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0], dtype=theano.config.floatX),
np.array([2, 2, 2, 0, 0, 0, 2, 2, 2, 0], dtype=theano.config.floatX)]
for example in test_cases:
print("input", "output")
for x, pred in zip(example, predict_func(example)):
print(x, "{:.3f}".format(pred))
print()
示例7: setup_train
#.........这里部分代码省略.........
# time_inputs is a matrix (time, batch/note, input_per_note)
time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
num_time_parallel = time_inputs.shape[1]
# apply dropout
if self.dropout > 0:
time_masks = MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
else:
time_masks = []
print "model-setup-train::Trace-3"
time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)
print "model-setup-train::Trace-4"
self.time_thoughts = time_result
# Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about
# the hidden state of the last layer.
# Transpose to be (note, batch/time, hidden_states)
last_layer = get_last_layer(time_result)
n_hidden = last_layer.shape[2]
time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))
# note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
# In (note, batch/time, 2) format
# Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
start_note_values = T.alloc(0, 1, time_final.shape[1], 2 )
correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)
print "model-setup-train::Trace-5"
# Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
# one direction are the steps in the other, and vice versa.
note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
num_timebatch = note_inputs.shape[1]
# apply dropout
if self.dropout > 0:
pitch_masks = MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
else:
pitch_masks = []
print "model-setup-train::Trace-6"
note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)
self.note_thoughts = note_result
# Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about
# the hidden state of the last layer.
# Transpose to be (batch, time, note, onOrArticProb)
note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)
print "model-setup-train::Trace-7"
# The cost of the entire procedure is the negative log likelihood of the events all happening.
# For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
# the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
# Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
# multiply the likelihoods, or, since we are logging them, add the logs.
# Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
# whether or not those are articulated.
# The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with
# (b,x,y,1) instead
active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)
loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )
print "model-setup-train::Trace-8"
self.cost = T.neg(T.sum(loglikelihoods))
print "model-setup-train::Trace-9"
updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
print "model-setup-train::Trace-10"
self.update_fun = theano.function(
inputs=[self.input_mat, self.output_mat],
outputs=self.cost,
updates=updates,
allow_input_downcast=True)
self.update_thought_fun = theano.function(
inputs=[self.input_mat, self.output_mat],
outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
allow_input_downcast=True)
示例8: __init__
#.........这里部分代码省略.........
else:
res = result[-1].dimshuffle(1, 0, 2) if x.ndim > 1 else result[-1]
price_preds = self.price_model.forward(
self.model.layers[-2].postprocess_activation(
result[-2][lengths, T.arange(0, lengths.shape[0])]
), None, []
)[-1][:,0] if x.ndim > 1 else \
self.price_model.forward(
self.model.layers[-2].postprocess_activation(
result[-2][-1]
), None, [])[-1][0]
# gate values can be obtained by asking for them from the stacked cells
if return_memory:
return result[0], res, price_preds
else:
return res, price_preds
# every sequence is a series of indices
# for words:
input_sentences = T.imatrix()
# some sequences are shorter than others, so we'll note where they
# end in a zero-indexed fashion
sequence_lengths = T.ivector()
sequence_starts = T.ivector()
# the labels are integers in the range of dictionary
self.input_sentences = input_sentences
self.sequence_lengths = sequence_lengths
self.sequence_starts = sequence_starts
self.prices = T.vector()
memory_usage, self.predictions, self.price_predictions = predict_sequence(input_sentences, self.sequence_starts, return_memory=True)
self.error = (
theano_lstm.masked_loss(
self.predictions,
input_sentences[:,1:] - self.vocabulary_size,
sequence_lengths,
sequence_starts).mean() +
(memory_usage.sum() * self.memory_sparsity) / input_sentences.shape[0] +
((self.price_predictions - self.prices)**2).mean()
)
self.memory_fun = theano.function([input_sentences], memory_usage,
allow_input_downcast=True,
mode=self.theano_mode)
self.price_predict_fun = theano.function([input_sentences, sequence_starts],
self.price_predictions,
allow_input_downcast=True,
mode=self.theano_mode)
self.predict_fun = theano.function([input_sentences],
self.predictions,
allow_input_downcast=True,
mode=self.theano_mode)
self.error_fun = theano.function([input_sentences, sequence_lengths, sequence_starts, self.prices],
self.error,
allow_input_downcast=True,
mode=self.theano_mode)
self.input_sentence = T.ivector()
prep_result = predict_sequence(self.input_sentence, None, return_all=True)
pred_outputs_info = [dict(initial=self.input_sentence[-1], taps=[-1])] + [dict(initial=prep_hidden[-1], taps=[-1]) for prep_hidden in prep_result[1:-1]]
prediction_steps = T.iscalar()
pred_result, _ = theano.scan(pred_step,
n_steps = prediction_steps,
outputs_info = pred_outputs_info)
self.reconstruct_fun = theano.function([self.input_sentence, prediction_steps],
pred_result[0],
allow_input_downcast=True,
mode=self.theano_mode)
self.input_labels = theano.function([input_sentences],
input_sentences[:,1:] - self.vocabulary_size,
mode=self.theano_mode)
if verbose:
print("created prediction & error functions")
updates, gsums, xsums, lr, max_norm = theano_lstm.create_optimization_updates(self.error, model.params + model2.params, max_norm=None, rho=rho, method="adadelta")
self.lr = lr
if verbose:
print("took the gradient")
self.gsums = gsums
self.xsums = xsums
self.update_fun = theano.function([input_sentences, sequence_lengths, sequence_starts, self.prices],
outputs=None,
updates=updates,
mode=self.theano_mode)
if verbose:
print("created the gradient descent function")
示例9: __init__
def __init__(self,
hidden_size,
internal_features,
intermediate_size,
vocab_size,
num_answers,
tensor=True,
method="sgd"):
self.text_embedding = Embedding(vocab_size, hidden_size)
self.question_embedding = Embedding(vocab_size, hidden_size)
self.answer_embedding = Embedding(vocab_size, hidden_size)
self.params = self.text_embedding.params + self.question_embedding.params + self.answer_embedding.params
self.tensor = tensor
if tensor:
self.q_form_U = create_shared("question_answer_tensor",
intermediate_size,
internal_features,
3 * hidden_size)
self.q_form_V = create_shared("question_answer_tensor",
intermediate_size,
internal_features,
3 * hidden_size)
self.params.append(self.q_form_U)
self.params.append(self.q_form_V)
# here are the affine parameters
self.bias = create_shared("bias", intermediate_size)
self.projection_mat = create_shared("projection_mat",
intermediate_size,
3 * hidden_size)
self.scoring_mat = create_shared("scoring_mat",
1,
intermediate_size)
self.params += [
self.bias,
self.projection_mat
]
# create a triplet scoring function:
sentence = T.ivector()
question = T.ivector()
answer = T.ivector()
self.score_triplet = theano.function([sentence, question, answer],
self.get_score(sentence, question, answer),
allow_input_downcast=True)
# create an error function
answers = [T.ivector() for i in range(num_answers)]
targets = [T.fscalar() for i in range(num_answers)]
answer_targets = []
for a, t in zip(answers, targets):
answer_targets.extend([a, t])
error = self.get_error(
sentence,
question,
*answer_targets)
self.error_fun = theano.function([
sentence,
question] + answer_targets,
error,
allow_input_downcast=True)
gparams = T.grad(error, self.params,
disconnected_inputs='ignore')
updates = OrderedDict()
self.gradient_caches = [theano.shared(param.get_value(True, True) * 0.0, borrow=True, name=param.name + "_grad")
for param in self.params]
for gparam_cache, gparam in zip(self.gradient_caches, gparams):
updates[gparam_cache] = gparam_cache + gparam
self.update_gradient = theano.function([
sentence,
question] + answer_targets,
error,
updates=updates, allow_input_downcast=True)
# create a training function:
true_updates, self.gsums, self.xsums, lr, max_norm = create_optimization_updates(
None,
self.params,
method=method,
gradients=self.gradient_caches
)
self.lr = lr
for gparam_cache in self.gradient_caches:
true_updates[gparam_cache] = T.zeros_like(gparam_cache)
self.apply_gradient = theano.function(
inputs = [],
outputs = [],
#.........这里部分代码省略.........
示例10: __init__
def __init__(self, hidden_size, vocab_size, num_answers):
self.embedding = Embedding(vocab_size, hidden_size)
self.q_form = create_shared("tensor",
1,
hidden_size,
hidden_size)
self.params = self.embedding.params + [self.q_form]
# create a triplet scoring function:
sentence = T.ivector()
question = T.ivector()
answer = T.ivector()
self.score_triplet = theano.function([sentence, question, answer],
self.get_score(sentence, question, answer),
allow_input_downcast=True)
# create an error function
answers = [T.ivector() for i in range(num_answers)]
targets = [T.fscalar() for i in range(num_answers)]
answer_targets = []
for a, t in zip(answers, targets):
answer_targets.extend([a, t])
error = self.get_error(
sentence,
question,
*answer_targets)
self.error_fun = theano.function([
sentence,
question] + answer_targets,
error, allow_input_downcast=True)
gparams = T.grad(error, self.params)
updates = OrderedDict()
self.gradient_caches = [theano.shared(param.get_value(True, True) * 0.0, borrow=True, name=param.name + "_grad")
for param in self.params]
for gparam_cache, gparam in zip(self.gradient_caches, gparams):
updates[gparam_cache] = gparam_cache + gparam
self.update_gradient = theano.function([
sentence,
question] + answer_targets,
error,
updates=updates, allow_input_downcast=True)
# create a training function:
true_updates, gsums, xsums, lr, max_norm = create_optimization_updates(
None,
self.params,
method="sgd",
gradients=self.gradient_caches
)
self.lr = lr
for gparam_cache in self.gradient_caches:
true_updates[gparam_cache] = T.zeros_like(gparam_cache)
self.apply_gradient = theano.function(
inputs = [],
outputs = [],
updates = true_updates)