本文整理匯總了Python中blocks.bricks.cost.MisclassificationRate類的典型用法代碼示例。如果您正苦於以下問題:Python MisclassificationRate類的具體用法?Python MisclassificationRate怎麽用?Python MisclassificationRate使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了MisclassificationRate類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: setup_model
def setup_model(configs):
tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
# shape: T x B x C x X x Y
input_ = tensor5("features")
tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
locs = tensor3("locs")
# shape: B x Classes
target = T.ivector("targets")
model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
model.initialize()
(h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
input_, locs
)
model.location = location
model.scale = scale
model.alpha = location
model.patch = patch
classifier = MLP(
[Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
)
classifier.initialize()
probabilities = classifier.apply(h[-1])
cost = CategoricalCrossEntropy().apply(target, probabilities)
cost.name = "CE"
error_rate = MisclassificationRate().apply(target, probabilities)
error_rate.name = "ER"
model.cost = cost
model.error_rate = error_rate
model.probabilities = probabilities
if configs["load_pretrained"]:
blocks_model = Model(model.cost)
all_params = blocks_model.parameters
with open("VGG_CNN_params.npz") as f:
loaded = np.load(f)
all_conv_params = loaded.keys()
for param in all_params:
if param.name in loaded.keys():
assert param.get_value().shape == loaded[param.name].shape
param.set_value(loaded[param.name])
all_conv_params.pop(all_conv_params.index(param.name))
print "the following parameters did not match: " + str(all_conv_params)
if configs["test_model"]:
print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
f(data[1], data[0], data[2])
print "Test passed! ;)"
model.monitorings = [cost, error_rate]
return model
示例2: apply
def apply(self, input_lb, input_un, target):
batch_size = input_lb.shape[0]
get_labeled = lambda x: x[:batch_size] if x is not None else x
input = T.concatenate([input_lb, input_un], axis=0)
self.layer_dims = {0: self.input_dim}
self.lr = self.shared(self.default_lr, "learning_rate", role=None)
top = len(self.layers) - 1
clean = self.encoder(input, noise_std=[0])
corr = self.encoder(input, noise_std=self.noise_std)
ests, costs = self.decoder(clean, corr, batch_size)
# Costs
y = target.flatten()
costs.class_clean = CategoricalCrossEntropy().apply(y, get_labeled(clean.h[top]))
costs.class_clean.name = "CE_clean"
costs.class_corr = CategoricalCrossEntropy().apply(y, get_labeled(corr.h[top]))
costs.class_corr.name = "CE_corr"
costs.total = costs.class_corr * 1.0
for i in range(len(self.layers)):
costs.total += costs.denois[i] * self.denoising_cost_x[i]
costs.total.name = "Total_cost"
self.costs = costs
# Classification error
mr = MisclassificationRate()
self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.0)
self.error.name = "Error_rate"
示例3: maxout_vae_mnist_test
def maxout_vae_mnist_test(path_vae_mnist):
# load vae model on mnist
vae_mnist = load(path_vae_mnist)
maxout = Maxout()
x = T.matrix('features')
y = T.imatrix('targets')
batch_size = 128
z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
predict = maxout.apply(z)
cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
y_hat = Softmax().apply(predict)
cost.name = 'cost'
cg = ComputationGraph(cost)
temp = cg.parameters
for t, i in zip(temp, range(len(temp))):
t.name = t.name+str(i)+"maxout"
error_brick = MisclassificationRate()
error_rate = error_brick.apply(y, y_hat)
# training
step_rule = RMSProp(0.01, 0.9)
#step_rule = Momentum(0.2, 0.9)
train_set = MNIST('train')
test_set = MNIST("test")
data_stream_train = Flatten(DataStream.default_stream(
train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))
data_stream_test =Flatten(DataStream.default_stream(
test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))
algorithm = GradientDescent(cost=cost, params=cg.parameters,
step_rule=step_rule)
monitor_train = TrainingDataMonitoring(
variables=[cost], data_stream=data_stream_train, prefix="train")
monitor_valid = DataStreamMonitoring(
variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")
extensions = [ monitor_train,
monitor_valid,
FinishAfter(after_n_epochs=50),
Printing(every_n_epochs=1)
]
main_loop = MainLoop(data_stream=data_stream_train,
algorithm=algorithm, model = Model(cost),
extensions=extensions)
main_loop.run()
# save here
from blocks.serialization import dump
with closing(open('../data_mnist/maxout', 'w')) as f:
dump(maxout, f)
示例4: setup_model
def setup_model(configs):
tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
# shape: T x B x C x X x Y
input_ = tensor5('features')
# shape: B x Classes
target = T.lmatrix('targets')
model = LSTMAttention(
configs,
weights_init=Glorot(),
biases_init=Constant(0))
model.initialize()
(h, c, location, scale, patch, downn_sampled_input,
conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)
classifier = MLP(
[Rectifier(), Logistic()],
configs['classifier_dims'],
weights_init=Glorot(),
biases_init=Constant(0))
classifier.initialize()
probabilities = classifier.apply(h[-1])
cost = BinaryCrossEntropy().apply(target, probabilities)
cost.name = 'CE'
error_rate = MisclassificationRate().apply(target, probabilities)
error_rate.name = 'ER'
model.cost = cost
if configs['load_pretrained']:
blocks_model = Model(model.cost)
all_params = blocks_model.parameters
with open('VGG_CNN_params.npz') as f:
loaded = np.load(f)
all_conv_params = loaded.keys()
for param in all_params:
if param.name in loaded.keys():
assert param.get_value().shape == loaded[param.name].shape
param.set_value(loaded[param.name])
all_conv_params.pop(all_conv_params.index(param.name))
print "the following parameters did not match: " + str(all_conv_params)
if configs['test_model']:
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.cost],
on_unused_input='ignore',
allow_input_downcast=True)
data = np.random.randn(10, 40, 3, 224, 224)
targs = np.random.randn(40, 101)
f(data, targs)
print "Test passed! ;)"
model.monitorings = [cost, error_rate]
return model
示例5: test_misclassification_rate
def test_misclassification_rate():
y = tensor.vector(dtype="int32")
yhat = tensor.matrix(theano.config.floatX)
top1_brick = MisclassificationRate()
top2_brick = MisclassificationRate(top_k=2)
top3_brick = MisclassificationRate(top_k=3)
f = theano.function([y, yhat], [top1_brick.apply(y, yhat), top2_brick.apply(y, yhat), top3_brick.apply(y, yhat)])
y_ = numpy.array([2, 1, 0, 1, 2], dtype="int32")
yhat_ = numpy.array([[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]], dtype="float32")
top1_error = 0.6
top2_error = 0.4
top3_error = 0.2
assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
示例6: apply
def apply(self, input_labeled, target_labeled, input_unlabeled):
self.layer_counter = 0
self.layer_dims = {0: self.input_dim}
self.lr = self.shared(self.default_lr, 'learning_rate', role=None)
top = len(self.layers) - 1
num_labeled = input_labeled.shape[0]
self.join = lambda l, u: T.concatenate([l, u], axis=0)
self.labeled = lambda x: x[:num_labeled] if x is not None else x
self.unlabeled = lambda x: x[num_labeled:] if x is not None else x
self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))
input_concat = self.join(input_labeled, input_unlabeled)
clean = self.encoder(input_concat, 'clean',
input_noise_std=0.0,
noise_std=[])
corr = self.encoder(input_concat, 'corr',
input_noise_std=self.super_noise_std,
noise_std=self.f_local_noise_std)
est, costs = self.decoder(clean, corr)
# Costs
y = target_labeled.flatten()
costs.class_clean = CategoricalCrossEntropy().apply(
y, clean.labeled.h[top])
costs.class_clean.name = 'CE_clean'
costs.class_corr = CategoricalCrossEntropy().apply(
y, corr.labeled.h[top])
costs.class_corr.name = 'CE_corr'
costs.total = costs.class_corr * 1.0
for i in range(len(self.layers)):
costs.total += costs.denois[i] * self.denoising_cost_x[i]
costs.total.name = 'Total_cost'
self.costs = costs
# Classification error
mr = MisclassificationRate()
self.error = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
self.error.name = 'Error_rate'
示例7: Uniform
mlp.weights_init = Uniform(0.0, 0.01)
mlp.biases_init = Constant(0.0)
mlp.initialize()
lin = Linear(200, 10, use_bias=True)
lin.weights_init = Uniform(0.0, 0.01)
lin.biases_init = Constant(0.0)
lin.initialize()
train_out = lin.apply(mlp.apply(flat_x))
test_out = lin.apply(mlp.apply(flat_x))
sm = Softmax(name='softmax')
loss = sm.categorical_cross_entropy(flat_y, train_out).mean()
loss.name = 'nll'
misclass = MisclassificationRate().apply(flat_y, train_out)
misclass.name = 'misclass'
test_loss = sm.categorical_cross_entropy(flat_y, test_out).mean()
test_loss.name = 'nll'
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'
model = Model(loss)
######################
# Data
######################
import numpy
#from mnist import MNIST
from fuel.datasets.mnist import MNIST
示例8: build_submodel
#.........這裏部分代碼省略.........
# reguarding the batch dropout : the dropout is applied on the filter
# which is equivalent to the output dimension
# you have to look at the dropout_rate of the next layer
# that is why we throw away the first value of L_exo_dropout_full_layers
L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:]
pre_dim = output_dim
print "When constructing the model, the output_dim of the conv section is %d." % output_dim
if len(L_dim_full_layers):
for (dim, activation_str,
dropout, index) in zip(L_dim_full_layers,
L_activation_full,
L_exo_dropout_full_layers,
range(len(L_dim_conv_layers),
len(L_dim_conv_layers)+
len(L_dim_full_layers))
):
# TO DO : leaky relu
if activation_str.lower() == 'rectifier':
activation = Rectifier().apply
elif activation_str.lower() == 'tanh':
activation = Tanh().apply
elif activation_str.lower() in ['sigmoid', 'logistic']:
activation = Logistic().apply
elif activation_str.lower() in ['id', 'identity']:
activation = Identity().apply
else:
raise Exception("unknown activation function : %s", activation_str)
assert 0.0 <= dropout and dropout < 1.0
dim = dim - int(dim*dropout)
print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim)
layer_full = MLP(activations=[activation], dims=[pre_dim, dim],
weights_init=Uniform(width=0.1),
biases_init=Constant(0.0),
name="layer_%d" % index)
layer_full.initialize()
full_layers.append(layer_full)
pre_dim = dim
for layer in full_layers:
output_mlp = layer.apply(output_mlp)
output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1])
# COST FUNCTION
output_layer = Linear(output_dim, prediction,
weights_init=Uniform(width=0.1),
biases_init=Constant(0.0),
name="layer_"+str(len(L_dim_conv_layers)+
len(L_dim_full_layers))
)
output_layer.initialize()
full_layers.append(output_layer)
y_pred = output_layer.apply(output_mlp)
y_hat = Softmax().apply(y_pred)
# SOFTMAX and log likelihood
y_pred = Softmax().apply(y_pred)
# be careful. one version expects the output of a softmax; the other expects just the
# output of the network
cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred)
#cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred)
cost.name = "cost"
# Misclassification
error_rate_brick = MisclassificationRate()
error_rate = error_rate_brick.apply(y.flatten(), y_hat)
error_rate.name = "error_rate"
# put names
D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers)
# test computation graph
cg = ComputationGraph(cost)
# DROPOUT
L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers
cg_dropout = cg
inputs = VariableFilter(roles=[INPUT])(cg.variables)
for (index, drop_rate) in enumerate(L_endo_dropout):
for input_ in inputs:
m = re.match(r"layer_(\d+)_apply.*", input_.name)
if m and index == int(m.group(1)):
if drop_rate < 0.0001:
print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name
break
else:
cg_dropout = apply_dropout(cg, [input_], drop_rate)
print "Applied dropout %f on %s." % (drop_rate, input_.name)
break
cg = cg_dropout
return (cg, error_rate, cost, D_params, D_kind)
示例9: main
#.........這裏部分代碼省略.........
classification_mlp3 = MLP(activations=[Softmax()], dims=[fc_dim, num_classes], name='MPL_class3', **inits)
edram = EDRAM(channels=channels, out_height=w_height, out_width=w_width, n_iter=n_iter, num_classes=num_classes, rectifier=rectifier, conv1=conv1,
conv1_bn=conv1_bn, conv2=conv2, conv2_bn=conv2_bn, max_pooling=max_pooling, conv3=conv3, conv3_bn=conv3_bn, conv4=conv4, conv4_bn=conv4_bn,
conv5=conv5, conv5_bn=conv5_bn, conv6=conv6, conv6_bn=conv6_bn, conv_mlp=conv_mlp, conv_mlp_bn=conv_mlp_bn,
loc_mlp=loc_mlp, loc_mlp_bn=loc_mlp_bn, conv_init=conv_init, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, decoder_mlp=decoder_mlp,
decoder_rnn=decoder_rnn, classification_mlp1=classification_mlp1, classification_mlp1_bn=classification_mlp1_bn,
classification_mlp2=classification_mlp2, classification_mlp2_bn=classification_mlp2_bn, classification_mlp3=classification_mlp3,
emit_mlp=emit_mlp)
edram.initialize()
# ------------------------------------------------------------------------
x = T.ftensor4('features')
x_coarse = T.ftensor4('features_coarse')
y = T.ivector('labels')
wr = T.fmatrix('locations')
with batch_normalization(edram):
bn_p, bn_l, m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, \
m_c_bn, s_c_bn, m_l_bn, s_l_bn, m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn = edram.calculate_train(x, x_coarse)
def compute_cost(p, wr, y, l):
cost_where = T.dot(T.sqr(wr - l), [1, 0.5, 1, 0.5, 1, 1])
cost_y = T.stack([T.nnet.categorical_crossentropy(T.maximum(p[i, :], 1e-7), y) for i in range(0, n_iter)])
return cost_where, cost_y
cost_where, cost_y = compute_cost(bn_p, wr, y, bn_l)
bn_cost = cost_y + cost_where
bn_cost = bn_cost.sum(axis=0)
bn_cost = bn_cost.mean()
bn_cost.name = 'cost'
bn_error_rate = MisclassificationRate().apply(y, bn_p[-1])
bn_error_rate.name = 'error_rate'
# ------------------------------------------------------------
bn_cg = ComputationGraph([bn_cost, bn_error_rate])
# Prepare algorithm
algorithm = GradientDescent(
cost=bn_cg.outputs[0],
on_unused_sources='ignore',
parameters=bn_cg.parameters,
step_rule=CompositeRule([
RemoveNotFinite(),
StepClipping(10.),
Adam(learning_rate)
])
)
pop_updates = get_batch_normalization_updates(bn_cg)
update_params = [conv1_bn.population_mean, conv1_bn.population_stdev, conv2_bn.population_mean, conv2_bn.population_stdev, conv3_bn.population_mean,
conv3_bn.population_stdev, conv4_bn.population_mean, conv4_bn.population_stdev, conv5_bn.population_mean, conv5_bn.population_stdev,
conv6_bn.population_mean, conv6_bn.population_stdev, conv_mlp_bn.population_mean, conv_mlp_bn.population_stdev,
loc_mlp_bn.population_mean, loc_mlp_bn.population_stdev, classification_mlp1_bn.population_mean, classification_mlp1_bn.population_stdev,
classification_mlp2_bn.population_mean, classification_mlp2_bn.population_stdev]
update_values = [m_c1_bn, s_c1_bn, m_c2_bn, s_c2_bn, m_c3_bn, s_c3_bn, m_c4_bn, s_c4_bn, m_c5_bn, s_c5_bn, m_c6_bn, s_c6_bn, m_c_bn, s_c_bn, m_l_bn, s_l_bn,
m_cl1_bn, s_cl1_bn, m_cl2_bn, s_cl2_bn]
pop_updates.extend([(p, m) for p, m in zip(update_params, update_values)])
decay_rate = 0.05
extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates]
algorithm.add_updates(extra_updates)
示例10: main
def main(feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=None, num_batches=None):
if feature_maps is None:
feature_maps = [32, 48, 64, 80, 128, 128]
if mlp_hiddens is None:
mlp_hiddens = [1000]
if conv_sizes is None:
conv_sizes = [7, 5, 5, 5, 5, 4]
if pool_sizes is None:
pool_sizes = [3, 2, 2, 2, 2, 1]
if batch_size is None:
batch_size = 64
conv_steps = [2, 1, 1, 1, 1, 1] # same as stride
image_size = (256, 256)
output_size = 2
learningRate = 0.001
drop_prob = 0.5
weight_noise = 0.75
num_epochs = 250
num_batches = None
host_plot = "http://hades:5090"
save_to = "ModelSimpleConvNet128_NesterovAdam.pkl"
graph_name = "CNN_convnet_128_NesterovAdam"
mode = "CPU_test" # "CPU_test" or "GPU_run"
# Use ReLUs everywhere and softmax for the final prediction
conv_activations = [Rand_Leaky_Rectifier() for _ in feature_maps]
mlp_activations = [Rand_Leaky_Rectifier() for _ in mlp_hiddens] + [Softmax()]
convnet = LeNet(
conv_activations,
3,
image_size,
filter_sizes=zip(conv_sizes, conv_sizes),
feature_maps=feature_maps,
pooling_sizes=zip(pool_sizes, pool_sizes),
conv_steps=zip(conv_steps, conv_steps),
top_mlp_activations=mlp_activations,
top_mlp_dims=mlp_hiddens + [output_size],
border_mode="full",
weights_init=Glorot(),
biases_init=Constant(0),
)
# We push initialization config to set different initialization schemes
# for convolutional layers.
convnet.push_initialization_config()
convnet.layers[0].weights_init = Glorot()
convnet.layers[1].weights_init = Constant(0)
convnet.top_mlp.linear_transformations[0].weights_init = Glorot()
convnet.top_mlp.linear_transformations[1].weights_init = Constant(0)
convnet.initialize()
logging.info("Input dim: {} {} {}".format(*convnet.children[0].get_dim("input_")))
for i, layer in enumerate(convnet.layers):
if isinstance(layer, Activation):
logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
else:
logging.info("Layer {} ({}) dim: {} {} {}".format(i, layer.__class__.__name__, *layer.get_dim("output")))
x = tensor.tensor4("image_features")
y = tensor.lmatrix("targets")
# Normalize input and apply the convnet
probs = convnet.apply(x)
cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name="cost")
error_rate = MisclassificationRate().apply(y.flatten(), probs).copy(name="error_rate")
error_rate2 = error_rate.copy(name="error_rate2")
cg = ComputationGraph([cost, error_rate])
weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables)
############# Dropout #############
logger.info("Applying dropout")
cg = apply_dropout(cg, weights[-1:0], drop_prob) # Dropout only on fully-connected layer
dropped_out = VariableFilter(roles=[DROPOUT])(cg.variables)
"""
############# Guaussian Noise #############
logger.info('Applying Gaussian noise')
cg = apply_noise(cg, weights, weight_noise)
"""
########### Loading images #####################
from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift
def create_data(data):
stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size))
# Data Augmentation
stream = MinimumImageDimensions(stream, image_size, which_sources=("image_features",))
stream = MaximumImageDimensions(stream, image_size, which_sources=("image_features",))
stream = RandomHorizontalSwap(stream, which_sources=("image_features",))
stream = Random2DRotation(stream, which_sources=("image_features",))
# stream = ScikitResize(stream, image_size, which_sources=('image_features',))
# Data Preprocessing
#.........這裏部分代碼省略.........
示例11: CategoricalCrossEntropy
biases_init=IsotropicGaussian(),
prototype=input_mlp,
)
parallel_nets.initialize()
l_h, r_h = parallel_nets.apply(l_x=l_x, r_x=r_x)
# Concatenate the inputs from the two hidden subnets into a single variable
# for input into the next layer.
merge = tensor.concatenate([l_h, r_h], axis=1)
y_hat = output_mlp.apply(merge)
# Define a cost function to optimize, and a classification error rate:
# Also apply the outputs from the net, and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'
# Need to define the computation graph:
graph = ComputationGraph(cost)
# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(graph.variables)
# Add some regularization to this model:
lam = 0.001
cost += lam * l2_norm(W)
cost.name = 'entropy'
# This is the model without dropout, but with l2 reg.
model = Model(cost)
示例12: train_net
def train_net(net, train_stream, test_stream, L1 = None, L2=None, early_stopping=False,
finish=None, dropout=False, jobid=None, update=None,
duration= None,
**ignored):
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')
y_hat = net.apply(x)
#Cost
cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
cost_before.name = "cost_without_regularization"
#Error
#Taken from brodesf
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = "Misclassification rate"
#Regularization
cg = ComputationGraph(cost_before)
WS = VariableFilter(roles=[WEIGHT])(cg.variables)
if dropout:
print("Dropout")
cg = apply_dropout(cg, WS, 0.5)
if L1:
print("L1 with lambda ",L1)
L1_reg = L1 * sum([abs(W).sum() for W in WS])
L1_reg.name = "L1 regularization"
cost_before += L1_reg
if L2:
print("L2 with lambda ",L2)
L2_reg = L2 * sum([(W ** 2).sum() for W in WS])
L2_reg.name = "L2 regularization"
cost_before += L2_reg
cost = cost_before
cost.name = 'cost_with_regularization'
#Initialization
print("Initilization")
net.initialize()
#Algorithm
step_rule = Scale(learning_rate=0.1)
if update is not None:
if update == "rmsprop":
print("Using RMSProp")
step_rule = RMSProp()
remove_not_finite = RemoveNotFinite(0.9)
step_rule = CompositeRule([step_rule, remove_not_finite])
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=step_rule)
print("Extensions")
extensions = []
#Monitoring
monitor = DataStreamMonitoring(variables=[cost, error], data_stream=test_stream, prefix="test")
extensions.append(monitor)
def filename(suffix=""):
prefix = jobid if jobid else str(os.getpid())
ctime = str(time.time())
return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"
#Serialization
#serialization = Checkpoint(filename())
#extensions.append(serialization)
notification = "test_"+error.name
track = TrackTheBest(notification)
best_notification = track.notification_name
checkpointbest = SaveBest(best_notification, filename("best"))
extensions.extend([track, checkpointbest])
if early_stopping:
print("Early stopping")
stopper = FinishIfNoImprovementAfterPlus(best_notification)
extensions.append(stopper)
#Other extensions
if finish != None:
print("Force finish ", finish)
extensions.append(FinishAfter(after_n_epochs=finish))
if duration != None:
print("Stop after " , duration, " seconds")
extensions.append(FinishAfterTime(duration))
extensions.extend([
Timing(),
Printing()
])
#Main loop
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions)
#.........這裏部分代碼省略.........
示例13: training_model_mnist
def training_model_mnist(learning_rate, momentum, iteration, batch_size, epoch_end, iter_batch):
x = T.tensor4('features')
y = T.imatrix('targets')
classifier = build_model_mnist()
predict = classifier.apply(x)
y_hat = Softmax().apply(predict)
cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
cost.name = "cost"
cg = ComputationGraph(cost)
error_brick = MisclassificationRate()
error_rate = error_brick.apply(y.flatten(), y_hat)
error_rate.name = "error"
train_set = MNIST(('train', ))
test_set = MNIST(("test",))
if iteration =="slice":
data_stream = DataStream.default_stream(
train_set, iteration_scheme=SequentialScheme_slice(train_set.num_examples,
batch_size))
data_stream_test = DataStream.default_stream(
test_set, iteration_scheme=SequentialScheme_slice(test_set.num_examples,
batch_size))
else:
data_stream = DataStream.default_stream(
train_set, iteration_scheme=SequentialScheme(train_set.num_examples,
batch_size))
data_stream_test = DataStream.default_stream(
test_set, iteration_scheme=SequentialScheme(test_set.num_examples,
batch_size))
step_rule = Momentum(learning_rate=learning_rate,
momentum=momentum)
start = time.clock()
time_spent = shared_floatx(np.float32(0.), name="time_spent")
time_extension = Time_reference(start, time_spent, every_n_batches=1)
algorithm = GradientDescent(cost=cost, params=cg.parameters,
step_rule=step_rule)
monitor_train = TrainingDataMonitoring(
variables=[cost], prefix="train", every_n_epochs=iter_batch)
monitor_valid = DataStreamMonitoring(
variables=[cost, error_rate, time_spent], data_stream=data_stream_test, prefix="valid",
every_n_epochs=iter_batch)
# add a monitor variable about the time
extensions = [ monitor_train,
monitor_valid,
FinishAfter(after_n_epochs=epoch_end),
Printing(every_n_epochs=iter_batch),
time_extension
]
main_loop = MainLoop(data_stream=data_stream,
algorithm=algorithm, model = Model(cost),
extensions=extensions)
main_loop.run()
示例14: build_and_run
def build_and_run(label, config):
############## CREATE THE NETWORK ###############
#Define the parameters
num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation']
# print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
lambda_l1 = 0.000025
lambda_l2 = 0.000025
print("Building model")
#Create the symbolics variable
x = T.tensor4('image_features')
y = T.lmatrix('targets')
#Get the parameters
conv_parameters = zip(filter_size, num_filter)
#Create the convolutions layers
conv_layers = list(interleave([(Convolutional(
filter_size=filter_size,
num_filters=num_filter,
name='conv_{}'.format(i))
for i, (filter_size, num_filter)
in enumerate(conv_parameters)),
(activation),
(MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
# (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
out = Flattener().apply(conv_sequence.apply(x))
mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
biases_init=Constant(0.))
#Initialisze the MLP
mlp.initialize()
#Get the output
predict = mlp.apply(out)
cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
########### REGULARIZATION ##################
cg = ComputationGraph([cost])
weights = VariableFilter(roles=[WEIGHT])(cg.variables)
biases = VariableFilter(roles=[BIAS])(cg.variables)
# # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer
# # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
# # #l2_penalty = l2_penalty_weights + l2_penalty_bias
l2_penalty.name = 'l2_penalty'
l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases])
# l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
# l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
# l1_penalty = l1_penalty_biases + l1_penalty_weights
l1_penalty.name = 'l1_penalty'
costreg = cost + l2_penalty + l1_penalty
costreg.name = 'costreg'
########### DEFINE THE ALGORITHM #############
# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam())
########### GET THE DATA #####################
istest = 'test' in config.keys()
train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest)
########### INITIALIZING EXTENSIONS ##########
checkpoint = Checkpoint('models/best_'+label+'.tar')
checkpoint.add_condition(['after_epoch'],
predicate=OnLogRecord('valid_error_rate_best_so_far'))
#Adding a live plot with the bokeh server
plot = Plot(label,
channels=[['train_error_rate', 'valid_error_rate'],
['valid_cost', 'valid_error_rate2'],
# ['train_costreg','train_grad_norm']], #
['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']],
server_url="http://hades.calculquebec.ca:5042")
grad_norm = aggregation.mean(algorithm.total_gradient_norm)
grad_norm.name = 'grad_norm'
extensions = [Timing(),
FinishAfter(after_n_epochs=num_epochs,
after_n_batches=num_batches),
DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"),
TrainingDataMonitoring([costreg, error_rate, error_rate2,
grad_norm,l2_penalty,l1_penalty],
prefix="train", after_epoch=True),
plot,
ProgressBar(),
Printing(),
TrackTheBest('valid_error_rate',min), #Keep best
#.........這裏部分代碼省略.........
示例15: main
def main(num_epochs=50, batch_normalized=True, alpha=0.1):
"""Run the example.
Parameters
----------
num_epochs : int, optional
Number of epochs for which to train.
batch_normalized : bool, optional
Batch-normalize the training graph. Defaults to `True`.
alpha : float, optional
Weight to apply to a new sample when calculating running
averages for population statistics (1 - alpha weight is
given to the existing average).
"""
if batch_normalized:
# Add an extra keyword argument that only BatchNormalizedMLP takes,
# in order to speed things up at the cost of a bit of extra memory.
mlp_class = BatchNormalizedMLP
extra_kwargs = {'conserve_memory': False}
else:
mlp_class = MLP
extra_kwargs = {}
mlp = mlp_class([Logistic(), Logistic(), Logistic(), Softmax()],
[2, 5, 5, 5, 3],
weights_init=IsotropicGaussian(0.2),
biases_init=Constant(0.), **extra_kwargs)
mlp.initialize()
# Generate a dataset with 3 spiral arms, using 8000 examples for
# training and 2000 for testing.
dataset = Spiral(num_examples=10000, classes=3,
sources=['features', 'label'],
noise=0.05)
train_stream = DataStream(dataset,
iteration_scheme=ShuffledScheme(examples=8000,
batch_size=20))
test_stream = DataStream(dataset,
iteration_scheme=SequentialScheme(
examples=list(range(8000, 10000)),
batch_size=2000))
# Build a cost graph; this contains BatchNormalization bricks that will
# by default run in inference mode.
features = tensor.matrix('features')
label = tensor.lvector('label')
prediction = mlp.apply(features)
cost = CategoricalCrossEntropy().apply(label, prediction)
misclass = MisclassificationRate().apply(label, prediction)
misclass.name = 'misclass' # The default name for this is annoyingly long
original_cg = ComputationGraph([cost, misclass])
if batch_normalized:
cg = apply_batch_normalization(original_cg)
# Add updates for population parameters
pop_updates = get_batch_normalization_updates(cg)
extra_updates = [(p, m * alpha + p * (1 - alpha))
for p, m in pop_updates]
else:
cg = original_cg
extra_updates = []
algorithm = GradientDescent(step_rule=Adam(0.001),
cost=cg.outputs[0],
parameters=cg.parameters)
algorithm.add_updates(extra_updates)
main_loop = MainLoop(algorithm=algorithm,
data_stream=train_stream,
# Use the original cost and misclass variables so
# that we monitor the (original) inference-mode graph.
extensions=[DataStreamMonitoring([cost, misclass],
train_stream,
prefix='train'),
DataStreamMonitoring([cost, misclass],
test_stream,
prefix='test'),
Printing(),
FinishAfter(after_n_epochs=num_epochs)])
main_loop.run()
return main_loop