本文整理汇总了Python中blocks.bricks.cost.CategoricalCrossEntropy类的典型用法代码示例。如果您正苦于以下问题:Python CategoricalCrossEntropy类的具体用法?Python CategoricalCrossEntropy怎么用?Python CategoricalCrossEntropy使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CategoricalCrossEntropy类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup_model
def setup_model(configs):
tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
# shape: T x B x C x X x Y
input_ = tensor5("features")
tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
locs = tensor3("locs")
# shape: B x Classes
target = T.ivector("targets")
model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
model.initialize()
(h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
input_, locs
)
model.location = location
model.scale = scale
model.alpha = location
model.patch = patch
classifier = MLP(
[Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
)
classifier.initialize()
probabilities = classifier.apply(h[-1])
cost = CategoricalCrossEntropy().apply(target, probabilities)
cost.name = "CE"
error_rate = MisclassificationRate().apply(target, probabilities)
error_rate.name = "ER"
model.cost = cost
model.error_rate = error_rate
model.probabilities = probabilities
if configs["load_pretrained"]:
blocks_model = Model(model.cost)
all_params = blocks_model.parameters
with open("VGG_CNN_params.npz") as f:
loaded = np.load(f)
all_conv_params = loaded.keys()
for param in all_params:
if param.name in loaded.keys():
assert param.get_value().shape == loaded[param.name].shape
param.set_value(loaded[param.name])
all_conv_params.pop(all_conv_params.index(param.name))
print "the following parameters did not match: " + str(all_conv_params)
if configs["test_model"]:
print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
f(data[1], data[0], data[2])
print "Test passed! ;)"
model.monitorings = [cost, error_rate]
return model
示例2: main
def main(save_to, num_epochs, bokeh=False):
mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tensor.matrix('features')
y = tensor.lmatrix('targets')
probs = mlp.apply(x)
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
error_rate = MisclassificationRate().apply(y.flatten(), probs)
cg = ComputationGraph([cost])
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
cost.name = 'final_cost'
mnist_train = MNIST("train")
mnist_test = MNIST("test")
algorithm = GradientDescent(
cost=cost, params=cg.parameters,
step_rule=Scale(learning_rate=0.1))
extensions = [Timing(),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring(
[cost, error_rate],
DataStream(mnist_test,
iteration_scheme=SequentialScheme(
mnist_test.num_examples, 500)),
prefix="test"),
TrainingDataMonitoring(
[cost, error_rate,
aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
if bokeh:
extensions.append(Plot(
'MNIST example',
channels=[
['test_final_cost',
'test_misclassificationrate_apply_error_rate'],
['train_total_gradient_norm']]))
main_loop = MainLoop(
algorithm,
DataStream(mnist_train,
iteration_scheme=SequentialScheme(
mnist_train.num_examples, 50)),
model=Model(cost),
extensions=extensions)
main_loop.run()
示例3: apply
def apply(self, input_, target):
mlp = MLP(self.non_lins, self.dims,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name=self.name)
mlp.initialize()
probs = mlp.apply(T.flatten(input_, outdim=2))
probs.name = 'probs'
cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
cost.name = "CE"
self.outputs = {}
self.outputs['probs'] = probs
self.outputs['cost'] = cost
示例4: main
def main(save_to, num_epochs, batch_size):
mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tt.tensor4('features', dtype='float32')
y = tt.vector('label', dtype='int32')
probs = mlp.apply(x.reshape((-1,3072)))
cost = CategoricalCrossEntropy().apply(y, probs)
error_rate = MisclassificationRate().apply(y, probs)
cg = ComputationGraph([cost])
ws = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
cost.name = 'final_cost'
train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True)
valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False)
train_stream = train_dataset.get_stream(batch_size)
valid_stream = valid_dataset.get_stream(batch_size)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=Adam(learning_rate=0.001))
extensions = [Timing(),
LogExtension('/home/belohlavek/ALI/mlp.log'),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
TrainingDataMonitoring(
[cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
main_loop = MainLoop(algorithm,
train_stream,
model=Model(cost),
extensions=extensions)
main_loop.run()
示例5: main
def main(job_id, params, config_file='params.ec'):
config = ConfigParser.ConfigParser()
config.readfp(open('./configs/{}'.format(config_file)))
pr = pprint.PrettyPrinter(indent=4)
pr.pprint(config)
net_name = config.get('hyperparams', 'net_name', 'adni')
struct_name = net_name.split('_')[0]
max_epoch = int(config.get('hyperparams', 'max_iter', 100))
base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
train_batch = int(config.get('hyperparams', 'train_batch', 256))
valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
test_batch = int(config.get('hyperparams', 'valid_batch', 512))
W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
b_mu = float(config.get('hyperparams', 'b_mu', 0.0))
hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
input_dropout_ratio = float(config.get('hyperparams', 'input_dropout_ratio', 0.2))
dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
solver = config.get('hyperparams', 'solver_type', 'rmsprop')
data_file = config.get('hyperparams', 'data_file')
side = config.get('hyperparams', 'side', 'b')
input_dim = input_dims[struct_name]
# Spearmint optimization parameters:
if params:
base_lr = float(params['base_lr'][0])
dropout_ratio = float(params['dropout_ratio'][0])
hidden_units = params['hidden_units'][0]
weight_decay = params['weight_decay'][0]
if 'adagrad' in solver:
solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
else:
solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
data_file = config.get('hyperparams', 'data_file')
if 'b' in side:
train = H5PYDataset(data_file, which_set='train')
valid = H5PYDataset(data_file, which_set='valid')
test = H5PYDataset(data_file, which_set='test')
x_l = tensor.matrix('l_features')
x_r = tensor.matrix('r_features')
x = tensor.concatenate([x_l, x_r], axis=1)
else:
train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets'])
valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets'])
test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets'])
x = tensor.matrix('{}_features'.format(side))
y = tensor.lmatrix('targets')
# Define a feed-forward net with an input, two hidden layers, and a softmax output:
model = MLP(activations=[
Rectifier(name='h1'),
Rectifier(name='h2'),
Softmax(name='output'),
],
dims=[
input_dim[side],
hidden_units,
hidden_units,
2],
weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
biases_init=IsotropicGaussian(b_sd, b_mu))
# Don't forget to initialize params:
model.initialize()
# y_hat is the output of the neural net with x as its inputs
y_hat = model.apply(x)
# Define a cost function to optimize, and a classification error rate.
# Also apply the outputs from the net and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'
# This is the model: before applying dropout
model = Model(cost)
# Need to define the computation graph for the cost func:
cost_graph = ComputationGraph([cost])
# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)
# Add some regularization to this model:
#.........这里部分代码省略.........
示例6: Linear
update_prob=update_prob, name="rnn")
h1, sd = rnn.apply(pre_rnn[:, :, :h_dim],
pre_rnn[:, :, h_dim:],
drops, is_for_test)
h1_to_o = Linear(name='h1_to_o',
input_dim=h_dim,
output_dim=y_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, y_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'
# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y, softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y, softmax_out[-1])
error_rate.name = 'error_rate'
# Initialization
for brick in (x_to_h1, h1_to_o, rnn):
brick.weights_init = Glorot()
brick.biases_init = Constant(0)
brick.initialize()
train_stream = get_stream('train', batch_size, h_dim, False)
data = train_stream.get_epoch_iterator(as_dict=True).next()
cg = ComputationGraph(cost)
f = theano.function(cg.inputs, cost)
print f(data['y'], data['x'], data['is_for_test'], data['drops'])
示例7: BinaryCrossEntropy
sampler = q_sampler,
classifier=classifier_mlp)
draw.initialize()
#------------------------------------------------------------------------
x = tensor.matrix(u'features')
y = tensor.lmatrix(u'targets')
#y = theano.tensor.extra_ops.to_one_hot(tensor.lmatrix(u'targets'),2)
#probs, h_enc, c_enc, i_dec, h_dec, c_dec, center_y, center_x, delta = draw.reconstruct(x)
probs, h_enc, c_enc, center_y, center_x, delta = draw.reconstruct(x)
trim_probs = probs[-1,:,:] #Only take information from the last iteration
labels = y.flatten()
#cost = BinaryCrossEntropy().apply(labels, trim_probs)
cost = CategoricalCrossEntropy().apply(y, trim_probs)
error_rate = MisclassificationRate().apply(labels, trim_probs)
cost.name = "CCE"
#------------------------------------------------------------
cg = ComputationGraph([cost])
params = VariableFilter(roles=[PARAMETER])(cg.variables)
algorithm = GradientDescent(
cost=cost,
parameters=params,
step_rule=CompositeRule([
StepClipping(10.),
Adam(learning_rate),
]),
on_unused_sources='ignore',
示例8: create_model
#.........这里部分代码省略.........
# bidirectional encoder that translates context
context_encoder = self.create_bidi_encoder("context_encoder", embedding_dims, hidden_states)
# bidirectional encoder for question
question_encoder = self.create_bidi_encoder("question_encoder", embedding_dims, hidden_states)
# Initialize the components (where not done upon creation)
lookup.initialize()
###################
# Wiring the components together
#
# Where present, the 3 letters at the end of the variable name identify its dimensions:
# b ... position of the example within the batch
# t ... position of the word within the document/question
# f ... features of the embedding vector
###################
### Read the context document
# Map token indices to word embeddings
context_embedding_tbf = lookup.apply(context_bt.T)
# Read the embedded context document using the bidirectional GRU and produce the contextual embedding of each word
memory_encoded_btf = context_encoder.apply(context_embedding_tbf, context_mask_bt.T).dimshuffle(1,0,2)
memory_encoded_btf.name = "memory_encoded_btf"
### Correspondingly, read the query
x_embedded_tbf = lookup.apply(question_bt.T)
x_encoded_btf = question_encoder.apply(x_embedded_tbf, question_mask_bt.T).dimshuffle(1,0,2)
# The query encoding is a concatenation of the final states of the forward and backward GRU encoder
x_forward_encoded_bf = x_encoded_btf[:,-1,0:hidden_states]
x_backward_encoded_bf = x_encoded_btf[:,0,hidden_states:hidden_states*2]
query_representation_bf = tt.concatenate([x_forward_encoded_bf,x_backward_encoded_bf],axis=1)
# Compute the attention on each word in the context as a dot product of its contextual embedding and the query
mem_attention_presoft_bt = tt.batched_dot(query_representation_bf, memory_encoded_btf.dimshuffle(0,2,1))
# TODO is this pre-masking necessary?
mem_attention_presoft_masked_bt = tt.mul(mem_attention_presoft_bt,context_mask_bt)
# Normalize the attention using softmax
mem_attention_bt = SoftmaxWithMask(name="memory_query_softmax").apply(mem_attention_presoft_masked_bt,context_mask_bt)
if self.args.weighted_att:
# compute weighted attention over original word vectors
att_weighted_responses_bf = theano.tensor.batched_dot(mem_attention_bt, context_embedding_tbf.dimshuffle(1,0,2))
# compare desired response to all candidate responses
# select relevant candidate answer words
candidates_embeddings_bfi = lookup.apply(candidates_bi).dimshuffle(0,2,1)
# convert it to output symbol probabilities
y_hat_presoft = tt.batched_dot(att_weighted_responses_bf, candidates_embeddings_bfi)
y_hat = SoftmaxWithMask(name="output_softmax").apply(y_hat_presoft,candidates_bi_mask)
else:
# Sum the attention of each candidate word across the whole context document,
# this is the key innovation of the model
# TODO: Get rid of sentence-by-sentence processing?
# TODO: Rewrite into matrix notation instead of scans?
def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs):
word_ixs_in_sentence = tt.eq(sentence_ixs,word_ix).nonzero()[0]
return sentence_attention_probs[word_ixs_in_sentence].sum()
def sum_probs_single_sentence(candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t):
result, updates = theano.scan(
fn=sum_prob_of_word,
sequences=[candidate_indices_i],
non_sequences=[sentence_ixs_t, sentence_attention_probs_t])
return result
def sum_probs_batch(candidate_indices_bt,sentence_ixs_bt, sentence_attention_probs_bt):
result, updates = theano.scan(
fn=sum_probs_single_sentence,
sequences=[candidate_indices_bt, sentence_ixs_bt, sentence_attention_probs_bt],
non_sequences=None)
return result
# Sum the attention of each candidate word across the whole context document
y_hat = sum_probs_batch(candidates_bi, context_bt, mem_attention_bt)
y_hat.name = "y_hat"
# We use the convention that ground truth is always at index 0, so the following are the target answers
y = y.zeros_like()
# We use Cross Entropy as the training objective
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
cost.name = "cost"
predicted_response_index = tt.argmax(y_hat,axis=1)
accuracy = tt.eq(y,predicted_response_index).mean()
accuracy.name = "accuracy"
return cost, accuracy, mem_attention_bt, y_hat, context_bt, candidates_bi, candidates_bi_mask, y, context_mask_bt, question_bt, question_mask_bt
示例9: LookupTable
Xs = tensor.imatrix("context")
y = tensor.ivector('center')
w1 = LookupTable(name="w1", length=VOCAB_DIM, dim=EMBEDDING_DIM)
w2 = Linear(name='w2', input_dim=EMBEDDING_DIM, output_dim=VOCAB_DIM)
hidden = tensor.mean(w1.apply(Xs), axis=1)
y_hat = Softmax().apply(w2.apply(hidden))
w1.weights_init = w2.weights_init = IsotropicGaussian(0.01)
w1.biases_init = w2.biases_init = Constant(0)
w1.initialize()
w2.initialize()
cost = CategoricalCrossEntropy().apply(y, y_hat)
cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
cost.name = "loss"
#
# the actual training of the model
#
main = MainLoop(data_stream = DataStream.default_stream(
dataset,
iteration_scheme=SequentialScheme(dataset.num_instances, batch_size=512)),
algorithm = GradientDescent(
示例10: Linear
# In[5]:
from blocks.bricks import Linear, Logistic, Softmax
# In[10]:
hidden_layer_size = 100
input_to_hidden = Linear(name='input_to_hidden', input_dim=117, output_dim=hidden_layer_size)
h = Logistic().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_layer_size, output_dim=2)
y_hat = Softmax().apply(hidden_to_output.apply(h))
y = tensor.lmatrix('targets')
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y, y_hat)
error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = "error_rate"
# >>> from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
# >>> from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
# >>> W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
# >>> cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
# >>> cost.name = 'cost_with_regularization'
cost.name = 'cost_simple_xentropy'
from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
示例11: Softmax
probs = Softmax().apply(n5)
statistics_list=[(M1,S1,a1), (M2,S2,a2), (M3,S3,a3), (M4,S4,a4), (M5,S5,a5)]
# initialize_variables
# for variable (M,S) in variables:
# compute M and S in the whole data.
if normalization == 'bn2':
for m,s,var in statistics_list:
var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0)
init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name]
m.set_value(init_mn.astype(floatX))
s.set_value(sqrt(init_var).astype(floatX))
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'
cg = ComputationGraph([cost])
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
for m,s,var in statistics_list:
parameters.extend([m,s])
algorithm = GradientDescent(
cost=cost, parameters=parameters, step_rule=Adam(0.01))
示例12: main
def main():
parser = argparse.ArgumentParser()
parser.add_argument("every_n_batches", type=int, default=[1], nargs=1)
args = parser.parse_args()
print("We were asked to sync with legion at every_n_batches = %s" % str(args.every_n_batches[0]))
# The rest is a copy paste from the blocks tutorial, except for the inclusion of the sync extension
# at the creation of the MainLoop blocks object.
x = tensor.matrix('features')
input_to_hidden = Linear(name='input_to_hidden', input_dim=784, output_dim=100)
h = Rectifier().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output', input_dim=100, output_dim=10)
y_hat = Softmax().apply(hidden_to_output.apply(h))
y = tensor.lmatrix('targets')
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
cost.name = 'cost_with_regularization'
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
hidden_to_output.initialize()
mnist = MNIST(("train",))
data_stream = Flatten(
DataStream.default_stream(
mnist,
iteration_scheme=SequentialScheme(mnist.num_examples, batch_size=256)))
algorithm = GradientDescent(
cost=cost,
params=cg.parameters,
step_rule=Scale(learning_rate=0.1)
)
mnist_test = MNIST(("test",))
data_stream_test = Flatten(DataStream.default_stream(
mnist_test,
iteration_scheme=SequentialScheme(mnist_test.num_examples,
batch_size=1024)))
monitor = DataStreamMonitoring(variables=[cost],
data_stream=data_stream_test,
prefix="test")
# Except for this line
b1, b2 = VariableFilter(roles=[BIAS])(cg.variables)
main_loop = MainLoop(data_stream=data_stream,
algorithm=algorithm,
extensions=[monitor,
FinishAfter(after_n_epochs=500),
Printing(),
# And the inclusion of the legion sync module, SharedParamsRateLimited:
SharedParamsRateLimited(
params={"W1": W1,
"W2": W2,
"b1": b1,
"b2": b2
},
alpha=.5,
beta=.5,
every_n_batches=args.every_n_batches[0],
maximum_rate=0.1)])
main_loop.run()
示例13: main
def main(save_to, num_epochs):
mlp = MLP([Tanh(), Tanh(), Softmax()], [784, 100, 100, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tensor.matrix('features')
y = tensor.lmatrix('targets')
probs = mlp.apply(tensor.flatten(x, outdim=2))
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
error_rate = MisclassificationRate().apply(y.flatten(), probs)
cg = ComputationGraph([cost, error_rate])
cost.name = 'final_cost'
test_cost = cost
for_dropout = VariableFilter(roles=[INPUT],
bricks=mlp.linear_transformations[1:])(cg.variables)
dropout_graph = apply_dropout(cg, for_dropout, 0.5)
dropout_graph = apply_dropout(dropout_graph, [x], 0.1)
dropout_cost, dropout_error_rate = dropout_graph.outputs
mnist_train = MNIST(("train",))
mnist_test = MNIST(("test",))
algorithm = GradientDescent(
cost=dropout_cost, parameters=cg.parameters,
step_rule=Scale(learning_rate=0.1))
extensions = [Timing(),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring(
[cost, error_rate],
Flatten(
DataStream.default_stream(
mnist_test,
iteration_scheme=SequentialScheme(
mnist_test.num_examples, 500)),
which_sources=('features',)),
prefix="test"),
TrainingDataMonitoring(
[dropout_cost, dropout_error_rate,
aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
if BLOCKS_EXTRAS_AVAILABLE:
extensions.append(Plot(
'MNIST example',
channels=[
['test_final_cost',
'test_misclassificationrate_apply_error_rate'],
['train_total_gradient_norm']]))
main_loop = MainLoop(
algorithm,
Flatten(
DataStream.default_stream(
mnist_train,
iteration_scheme=SequentialScheme(
mnist_train.num_examples, 50)),
which_sources=('features',)),
model=Model(dropout_cost),
extensions=extensions)
main_loop.run()
示例14: _create_main_loop
def _create_main_loop(self):
# hyper parameters
hp = self.params
batch_size = hp['batch_size']
biases_init = Constant(0)
batch_normalize = hp['batch_normalize']
### Build fprop
tensor5 = T.TensorType(config.floatX, (False,)*5)
X = tensor5("images")
#X = T.tensor4("images")
y = T.lvector('targets')
gnet_params = OrderedDict()
#X_shuffled = X[:, :, :, :, [2, 1, 0]]
#X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255
X = X[:, :, :, :, [2, 1, 0]]
X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
X_r = X_shuffled.reshape((X_shuffled.shape[0],
X_shuffled.shape[1]*X_shuffled.shape[2],
X_shuffled.shape[3], X_shuffled.shape[4]))
X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32')
expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r),
mode='rgb')
res = expressions['outloss']
y_hat = res.flatten(ndim=2)
import pdb; pdb.set_trace()
### Build Cost
cost = CategoricalCrossEntropy().apply(y, y_hat)
cost = T.cast(cost, theano.config.floatX)
cost.name = 'cross_entropy'
y_pred = T.argmax(y_hat, axis=1)
misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
misclass.name = 'misclass'
monitored_channels = []
monitored_quantities = [cost, misclass, y_hat, y_pred]
model = Model(cost)
training_cg = ComputationGraph(monitored_quantities)
inference_cg = ComputationGraph(monitored_quantities)
### Get evaluation function
#training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
training_eval = training_cg.get_theano_function()
#inference_eval = inference_cg.get_theano_function()
# Dataset
test = JpegHDF5Dataset('test',
#name='jpeg_data_flows.hdf5',
load_in_memory=True)
#mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
import pdb; pdb.set_trace()
### Eval
labels = np.zeros(test.num_video_examples)
y_hat = np.zeros((test.num_video_examples, 101))
labels_flip = np.zeros(test.num_video_examples)
y_hat_flip = np.zeros((test.num_video_examples, 101))
### Important to shuffle list for batch normalization statistic
#rng = np.random.RandomState()
#examples_list = range(test.num_video_examples)
#import pdb; pdb.set_trace()
#rng.shuffle(examples_list)
nb_frames=1
for i in xrange(24):
scheme = HDF5SeqScheme(test.video_indexes,
examples=test.num_video_examples,
batch_size=batch_size,
f_subsample=i,
nb_subsample=25,
frames_per_video=nb_frames)
#for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
for crop in ['center']:
stream = JpegHDF5Transformer(
input_size=(240, 320), crop_size=(224, 224),
#input_size=(256, 342), crop_size=(224, 224),
crop_type=crop,
translate_labels = True,
flip='noflip', nb_frames = nb_frames,
data_stream=ForceFloatX(DataStream(
dataset=test, iteration_scheme=scheme)))
stream_flip = JpegHDF5Transformer(
input_size=(240, 320), crop_size=(224, 224),
#input_size=(256, 342), crop_size=(224, 224),
crop_type=crop,
translate_labels = True,
flip='flip', nb_frames = nb_frames,
data_stream=ForceFloatX(DataStream(
dataset=test, iteration_scheme=scheme)))
#.........这里部分代码省略.........
示例15: create_network
def create_network(inputs=None, batch=batch_size):
if inputs is None:
inputs = T.tensor4('features')
x = T.cast(inputs,'float32')
x = x / 255. if dataset != 'binarized_mnist' else x
# GatedPixelCNN
gated = GatedPixelCNN(
name='gated_layer_0',
filter_size=7,
image_size=(img_dim,img_dim),
num_filters=h*n_channel,
num_channels=n_channel,
batch_size=batch,
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
res=False
)
gated.initialize()
x_v, x_h = gated.apply(x, x)
for i in range(n_layer):
gated = GatedPixelCNN(
name='gated_layer_{}'.format(i+1),
filter_size=3,
image_size=(img_dim,img_dim),
num_channels=h*n_channel,
batch_size=batch,
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
res=True
)
gated.initialize()
x_v, x_h = gated.apply(x_v, x_h)
conv_list = []
conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')])
#conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')])
sequence = ConvolutionalSequence(
conv_list,
num_channels=h*n_channel,
batch_size=batch,
image_size=(img_dim,img_dim),
border_mode='half',
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
tied_biases=False
)
sequence.initialize()
x = sequence.apply(x_h)
if MODE == '256ary':
x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1)
x = x.reshape((-1, 256))
x_hat = Softmax().apply(x)
inp = T.cast(inputs, 'int64').flatten()
cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
else:
x_hat = Logistic().apply(x)
cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
#cost = T.nnet.binary_crossentropy(x_hat, inputs)
#cost = cost.sum() / inputs.shape[0]
cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()
cost_bits_dim.name = "nnl_bits_dim"
cost.name = 'loglikelihood_nat'
return cost, cost_bits_dim