本文整理汇总了Python中blocks.bricks.cost.CategoricalCrossEntropy.name方法的典型用法代码示例。如果您正苦于以下问题:Python CategoricalCrossEntropy.name方法的具体用法?Python CategoricalCrossEntropy.name怎么用?Python CategoricalCrossEntropy.name使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.cost.CategoricalCrossEntropy
的用法示例。
在下文中一共展示了CategoricalCrossEntropy.name方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup_model
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def setup_model(configs):
tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
# shape: T x B x C x X x Y
input_ = tensor5("features")
tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
locs = tensor3("locs")
# shape: B x Classes
target = T.ivector("targets")
model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
model.initialize()
(h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
input_, locs
)
model.location = location
model.scale = scale
model.alpha = location
model.patch = patch
classifier = MLP(
[Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
)
classifier.initialize()
probabilities = classifier.apply(h[-1])
cost = CategoricalCrossEntropy().apply(target, probabilities)
cost.name = "CE"
error_rate = MisclassificationRate().apply(target, probabilities)
error_rate.name = "ER"
model.cost = cost
model.error_rate = error_rate
model.probabilities = probabilities
if configs["load_pretrained"]:
blocks_model = Model(model.cost)
all_params = blocks_model.parameters
with open("VGG_CNN_params.npz") as f:
loaded = np.load(f)
all_conv_params = loaded.keys()
for param in all_params:
if param.name in loaded.keys():
assert param.get_value().shape == loaded[param.name].shape
param.set_value(loaded[param.name])
all_conv_params.pop(all_conv_params.index(param.name))
print "the following parameters did not match: " + str(all_conv_params)
if configs["test_model"]:
print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
cg = ComputationGraph(model.cost)
f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
f(data[1], data[0], data[2])
print "Test passed! ;)"
model.monitorings = [cost, error_rate]
return model
示例2: main
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def main(save_to, num_epochs, bokeh=False):
mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tensor.matrix('features')
y = tensor.lmatrix('targets')
probs = mlp.apply(x)
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
error_rate = MisclassificationRate().apply(y.flatten(), probs)
cg = ComputationGraph([cost])
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
cost.name = 'final_cost'
mnist_train = MNIST("train")
mnist_test = MNIST("test")
algorithm = GradientDescent(
cost=cost, params=cg.parameters,
step_rule=Scale(learning_rate=0.1))
extensions = [Timing(),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring(
[cost, error_rate],
DataStream(mnist_test,
iteration_scheme=SequentialScheme(
mnist_test.num_examples, 500)),
prefix="test"),
TrainingDataMonitoring(
[cost, error_rate,
aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
if bokeh:
extensions.append(Plot(
'MNIST example',
channels=[
['test_final_cost',
'test_misclassificationrate_apply_error_rate'],
['train_total_gradient_norm']]))
main_loop = MainLoop(
algorithm,
DataStream(mnist_train,
iteration_scheme=SequentialScheme(
mnist_train.num_examples, 50)),
model=Model(cost),
extensions=extensions)
main_loop.run()
示例3: apply
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def apply(self, input_, target):
mlp = MLP(self.non_lins, self.dims,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name=self.name)
mlp.initialize()
probs = mlp.apply(T.flatten(input_, outdim=2))
probs.name = 'probs'
cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
cost.name = "CE"
self.outputs = {}
self.outputs['probs'] = probs
self.outputs['cost'] = cost
示例4: main
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def main(save_to, num_epochs, batch_size):
mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tt.tensor4('features', dtype='float32')
y = tt.vector('label', dtype='int32')
probs = mlp.apply(x.reshape((-1,3072)))
cost = CategoricalCrossEntropy().apply(y, probs)
error_rate = MisclassificationRate().apply(y, probs)
cg = ComputationGraph([cost])
ws = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
cost.name = 'final_cost'
train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True)
valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False)
train_stream = train_dataset.get_stream(batch_size)
valid_stream = valid_dataset.get_stream(batch_size)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=Adam(learning_rate=0.001))
extensions = [Timing(),
LogExtension('/home/belohlavek/ALI/mlp.log'),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
TrainingDataMonitoring(
[cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
main_loop = MainLoop(algorithm,
train_stream,
model=Model(cost),
extensions=extensions)
main_loop.run()
示例5: main
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def main(job_id, params, config_file='params.ec'):
config = ConfigParser.ConfigParser()
config.readfp(open('./configs/{}'.format(config_file)))
pr = pprint.PrettyPrinter(indent=4)
pr.pprint(config)
net_name = config.get('hyperparams', 'net_name', 'adni')
struct_name = net_name.split('_')[0]
max_epoch = int(config.get('hyperparams', 'max_iter', 100))
base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
train_batch = int(config.get('hyperparams', 'train_batch', 256))
valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
test_batch = int(config.get('hyperparams', 'valid_batch', 512))
W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
b_mu = float(config.get('hyperparams', 'b_mu', 0.0))
hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
input_dropout_ratio = float(config.get('hyperparams', 'input_dropout_ratio', 0.2))
dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
solver = config.get('hyperparams', 'solver_type', 'rmsprop')
data_file = config.get('hyperparams', 'data_file')
side = config.get('hyperparams', 'side', 'b')
input_dim = input_dims[struct_name]
# Spearmint optimization parameters:
if params:
base_lr = float(params['base_lr'][0])
dropout_ratio = float(params['dropout_ratio'][0])
hidden_units = params['hidden_units'][0]
weight_decay = params['weight_decay'][0]
if 'adagrad' in solver:
solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
else:
solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
data_file = config.get('hyperparams', 'data_file')
if 'b' in side:
train = H5PYDataset(data_file, which_set='train')
valid = H5PYDataset(data_file, which_set='valid')
test = H5PYDataset(data_file, which_set='test')
x_l = tensor.matrix('l_features')
x_r = tensor.matrix('r_features')
x = tensor.concatenate([x_l, x_r], axis=1)
else:
train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets'])
valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets'])
test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets'])
x = tensor.matrix('{}_features'.format(side))
y = tensor.lmatrix('targets')
# Define a feed-forward net with an input, two hidden layers, and a softmax output:
model = MLP(activations=[
Rectifier(name='h1'),
Rectifier(name='h2'),
Softmax(name='output'),
],
dims=[
input_dim[side],
hidden_units,
hidden_units,
2],
weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
biases_init=IsotropicGaussian(b_sd, b_mu))
# Don't forget to initialize params:
model.initialize()
# y_hat is the output of the neural net with x as its inputs
y_hat = model.apply(x)
# Define a cost function to optimize, and a classification error rate.
# Also apply the outputs from the net and corresponding targets:
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = 'error'
# This is the model: before applying dropout
model = Model(cost)
# Need to define the computation graph for the cost func:
cost_graph = ComputationGraph([cost])
# This returns a list of weight vectors for each layer
W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)
# Add some regularization to this model:
#.........这里部分代码省略.........
示例6: Linear
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
h1, sd = rnn.apply(pre_rnn[:, :, :h_dim],
pre_rnn[:, :, h_dim:],
drops, is_for_test)
h1_to_o = Linear(name='h1_to_o',
input_dim=h_dim,
output_dim=y_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, y_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'
# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y, softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y, softmax_out[-1])
error_rate.name = 'error_rate'
# Initialization
for brick in (x_to_h1, h1_to_o, rnn):
brick.weights_init = Glorot()
brick.biases_init = Constant(0)
brick.initialize()
train_stream = get_stream('train', batch_size, h_dim, False)
data = train_stream.get_epoch_iterator(as_dict=True).next()
cg = ComputationGraph(cost)
f = theano.function(cg.inputs, cost)
print f(data['y'], data['x'], data['is_for_test'], data['drops'])
示例7: Rectifier
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
o = Rectifier().apply(o)
l = Linear(input_dim=l.get_dim("output"),
output_dim=10,
weights_init=IsotropicGaussian(std=0.01),
biases_init=IsotropicGaussian(std=0.01))
l.initialize()
o = l.apply(o)
o = Softmax().apply(o)
Y = T.imatrix(name="targets")
cost = CategoricalCrossEntropy().apply(Y.flatten(), o)
cost.name = "cost"
miss_class = 1.0 - MisclassificationRate().apply(Y.flatten(), o)
miss_class.name = "accuracy"
cg = ComputationGraph(cost)
print cg.shared_variables
bricks = [get_brick(var) for var in cg.variables if get_brick(var)]
for i, b in enumerate(bricks):
b.name += str(i)
step_rule = AdaM()
algorithm = GradientDescent(cost=cost, step_rule=step_rule)
示例8: create_model
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def create_model(self, symbols_num = 500):
# Hyperparameters
# The dimension of the hidden state of the GRUs in each direction.
hidden_states = self.args.encoder_hidden_dims
# Dimension of the word-embedding space
embedding_dims = self.args.source_embeddings_dim
###################
# Declaration of the Theano variables that come from the data stream
###################
# The context document.
context_bt = tt.lmatrix('context')
# Context document mask used to distinguish real symbols from the sequence and padding symbols that are at the end
context_mask_bt = tt.matrix('context_mask')
# The question
question_bt = tt.lmatrix('question')
question_mask_bt = tt.matrix('question_mask')
# The correct answer
y = tt.lmatrix('answer')
y = y[:,0] # originally answers are in a 2d matrix, here we convert it to a vector
# The candidates among which the answer is selected
candidates_bi = tt.lmatrix("candidates")
candidates_bi_mask = tt.matrix("candidates_mask")
###################
# Network's components
###################
# Lookup table with randomly initialized word embeddings
lookup = LookupTable(symbols_num, embedding_dims, weights_init=Uniform(width=0.2))
# bidirectional encoder that translates context
context_encoder = self.create_bidi_encoder("context_encoder", embedding_dims, hidden_states)
# bidirectional encoder for question
question_encoder = self.create_bidi_encoder("question_encoder", embedding_dims, hidden_states)
# Initialize the components (where not done upon creation)
lookup.initialize()
###################
# Wiring the components together
#
# Where present, the 3 letters at the end of the variable name identify its dimensions:
# b ... position of the example within the batch
# t ... position of the word within the document/question
# f ... features of the embedding vector
###################
### Read the context document
# Map token indices to word embeddings
context_embedding_tbf = lookup.apply(context_bt.T)
# Read the embedded context document using the bidirectional GRU and produce the contextual embedding of each word
memory_encoded_btf = context_encoder.apply(context_embedding_tbf, context_mask_bt.T).dimshuffle(1,0,2)
memory_encoded_btf.name = "memory_encoded_btf"
### Correspondingly, read the query
x_embedded_tbf = lookup.apply(question_bt.T)
x_encoded_btf = question_encoder.apply(x_embedded_tbf, question_mask_bt.T).dimshuffle(1,0,2)
# The query encoding is a concatenation of the final states of the forward and backward GRU encoder
x_forward_encoded_bf = x_encoded_btf[:,-1,0:hidden_states]
x_backward_encoded_bf = x_encoded_btf[:,0,hidden_states:hidden_states*2]
query_representation_bf = tt.concatenate([x_forward_encoded_bf,x_backward_encoded_bf],axis=1)
# Compute the attention on each word in the context as a dot product of its contextual embedding and the query
mem_attention_presoft_bt = tt.batched_dot(query_representation_bf, memory_encoded_btf.dimshuffle(0,2,1))
# TODO is this pre-masking necessary?
mem_attention_presoft_masked_bt = tt.mul(mem_attention_presoft_bt,context_mask_bt)
# Normalize the attention using softmax
mem_attention_bt = SoftmaxWithMask(name="memory_query_softmax").apply(mem_attention_presoft_masked_bt,context_mask_bt)
if self.args.weighted_att:
# compute weighted attention over original word vectors
att_weighted_responses_bf = theano.tensor.batched_dot(mem_attention_bt, context_embedding_tbf.dimshuffle(1,0,2))
# compare desired response to all candidate responses
# select relevant candidate answer words
candidates_embeddings_bfi = lookup.apply(candidates_bi).dimshuffle(0,2,1)
# convert it to output symbol probabilities
y_hat_presoft = tt.batched_dot(att_weighted_responses_bf, candidates_embeddings_bfi)
y_hat = SoftmaxWithMask(name="output_softmax").apply(y_hat_presoft,candidates_bi_mask)
else:
# Sum the attention of each candidate word across the whole context document,
#.........这里部分代码省略.........
示例9: Linear
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
hidden_to_output = Linear(name="hidden_to_output", input_dim=50, output_dim=10)
y_hat = Softmax().apply(hidden_to_output.apply(h))
y = tensor.lmatrix("targets")
from blocks.bricks.cost import CategoricalCrossEntropy
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
cost.name = "cost_with_regularization"
from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
hidden_to_output.initialize()
from blocks.algorithms import GradientDescent, Scale
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1))
from blocks.extensions.monitoring import DataStreamMonitoring
monitor = DataStreamMonitoring(variables=[cost], data_stream=dst, prefix="test")
示例10: main
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def main(num_epochs=100):
x = tensor.matrix('features')
m = tensor.matrix('features_mask')
x_int = x.astype(dtype='int32').T
train_dataset = TextFile('inspirational.txt')
train_dataset.indexables[0] = numpy.array(sorted(
train_dataset.indexables[0], key=len
))
n_voc = len(train_dataset.dict.keys())
init_probs = numpy.array(
[sum(filter(lambda idx:idx == w,
[s[0] for s in train_dataset.indexables[
train_dataset.sources.index('features')]]
)) for w in xrange(n_voc)],
dtype=theano.config.floatX
)
init_probs = init_probs / init_probs.sum()
n_h = 100
linear_embedding = LookupTable(
length=n_voc,
dim=n_h,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
linear_embedding.initialize()
lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
lstm_biases[n_h:(2 * n_h)] = 4.
rnn = SimpleRecurrent(
dim=n_h,
activation=Tanh(),
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
rnn.initialize()
score_layer = Linear(
input_dim=n_h,
output_dim=n_voc,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
score_layer.initialize()
embedding = (linear_embedding.apply(x_int[:-1])
* tensor.shape_padright(m.T[1:]))
rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
probs = softmax(
sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0]
)
idx_mask = m.T[1:].nonzero()
cost = CategoricalCrossEntropy().apply(
x_int[1:][idx_mask[0], idx_mask[1]],
probs[idx_mask[0], idx_mask[1]]
)
cost.name = 'cost'
misclassification = MisclassificationRate().apply(
x_int[1:][idx_mask[0], idx_mask[1]],
probs[idx_mask[0], idx_mask[1]]
)
misclassification.name = 'misclassification'
cg = ComputationGraph([cost])
params = cg.parameters
algorithm = GradientDescent(
cost=cost,
params=params,
step_rule=Adam()
)
train_data_stream = Padding(
data_stream=DataStream(
dataset=train_dataset,
iteration_scheme=BatchwiseShuffledScheme(
examples=train_dataset.num_examples,
batch_size=10,
)
),
mask_sources=('features',)
)
model = Model(cost)
extensions = []
extensions.append(Timing())
extensions.append(FinishAfter(after_n_epochs=num_epochs))
extensions.append(TrainingDataMonitoring(
[cost, misclassification],
prefix='train',
after_epoch=True))
batch_size = 10
length = 30
trng = MRG_RandomStreams(18032015)
u = trng.uniform(size=(length, batch_size, n_voc))
gumbel_noise = -tensor.log(-tensor.log(u))
init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
#.........这里部分代码省略.........
示例11: variable
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
statistics_list=[(M1,S1,a1), (M2,S2,a2), (M3,S3,a3), (M4,S4,a4), (M5,S5,a5)]
# initialize_variables
# for variable (M,S) in variables:
# compute M and S in the whole data.
if normalization == 'bn2':
for m,s,var in statistics_list:
var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0)
init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name]
m.set_value(init_mn.astype(floatX))
s.set_value(sqrt(init_var).astype(floatX))
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'
cg = ComputationGraph([cost])
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
for m,s,var in statistics_list:
parameters.extend([m,s])
algorithm = GradientDescent(
cost=cost, parameters=parameters, step_rule=Adam(0.01))
#update the M and S with batch statistics
示例12: main
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def main(save_to, num_epochs):
mlp = MLP([Tanh(), Tanh(), Softmax()], [784, 100, 100, 10],
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0))
mlp.initialize()
x = tensor.matrix('features')
y = tensor.lmatrix('targets')
probs = mlp.apply(tensor.flatten(x, outdim=2))
cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
error_rate = MisclassificationRate().apply(y.flatten(), probs)
cg = ComputationGraph([cost, error_rate])
cost.name = 'final_cost'
test_cost = cost
for_dropout = VariableFilter(roles=[INPUT],
bricks=mlp.linear_transformations[1:])(cg.variables)
dropout_graph = apply_dropout(cg, for_dropout, 0.5)
dropout_graph = apply_dropout(dropout_graph, [x], 0.1)
dropout_cost, dropout_error_rate = dropout_graph.outputs
mnist_train = MNIST(("train",))
mnist_test = MNIST(("test",))
algorithm = GradientDescent(
cost=dropout_cost, parameters=cg.parameters,
step_rule=Scale(learning_rate=0.1))
extensions = [Timing(),
FinishAfter(after_n_epochs=num_epochs),
DataStreamMonitoring(
[cost, error_rate],
Flatten(
DataStream.default_stream(
mnist_test,
iteration_scheme=SequentialScheme(
mnist_test.num_examples, 500)),
which_sources=('features',)),
prefix="test"),
TrainingDataMonitoring(
[dropout_cost, dropout_error_rate,
aggregation.mean(algorithm.total_gradient_norm)],
prefix="train",
after_epoch=True),
Checkpoint(save_to),
Printing()]
if BLOCKS_EXTRAS_AVAILABLE:
extensions.append(Plot(
'MNIST example',
channels=[
['test_final_cost',
'test_misclassificationrate_apply_error_rate'],
['train_total_gradient_norm']]))
main_loop = MainLoop(
algorithm,
Flatten(
DataStream.default_stream(
mnist_train,
iteration_scheme=SequentialScheme(
mnist_train.num_examples, 50)),
which_sources=('features',)),
model=Model(dropout_cost),
extensions=extensions)
main_loop.run()
示例13: _create_main_loop
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def _create_main_loop(self):
# hyper parameters
hp = self.params
batch_size = hp['batch_size']
biases_init = Constant(0)
batch_normalize = hp['batch_normalize']
### Build fprop
tensor5 = T.TensorType(config.floatX, (False,)*5)
X = tensor5("images")
#X = T.tensor4("images")
y = T.lvector('targets')
gnet_params = OrderedDict()
#X_shuffled = X[:, :, :, :, [2, 1, 0]]
#X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255
X = X[:, :, :, :, [2, 1, 0]]
X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
X_r = X_shuffled.reshape((X_shuffled.shape[0],
X_shuffled.shape[1]*X_shuffled.shape[2],
X_shuffled.shape[3], X_shuffled.shape[4]))
X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32')
expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r),
mode='rgb')
res = expressions['outloss']
y_hat = res.flatten(ndim=2)
import pdb; pdb.set_trace()
### Build Cost
cost = CategoricalCrossEntropy().apply(y, y_hat)
cost = T.cast(cost, theano.config.floatX)
cost.name = 'cross_entropy'
y_pred = T.argmax(y_hat, axis=1)
misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
misclass.name = 'misclass'
monitored_channels = []
monitored_quantities = [cost, misclass, y_hat, y_pred]
model = Model(cost)
training_cg = ComputationGraph(monitored_quantities)
inference_cg = ComputationGraph(monitored_quantities)
### Get evaluation function
#training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
training_eval = training_cg.get_theano_function()
#inference_eval = inference_cg.get_theano_function()
# Dataset
test = JpegHDF5Dataset('test',
#name='jpeg_data_flows.hdf5',
load_in_memory=True)
#mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
import pdb; pdb.set_trace()
### Eval
labels = np.zeros(test.num_video_examples)
y_hat = np.zeros((test.num_video_examples, 101))
labels_flip = np.zeros(test.num_video_examples)
y_hat_flip = np.zeros((test.num_video_examples, 101))
### Important to shuffle list for batch normalization statistic
#rng = np.random.RandomState()
#examples_list = range(test.num_video_examples)
#import pdb; pdb.set_trace()
#rng.shuffle(examples_list)
nb_frames=1
for i in xrange(24):
scheme = HDF5SeqScheme(test.video_indexes,
examples=test.num_video_examples,
batch_size=batch_size,
f_subsample=i,
nb_subsample=25,
frames_per_video=nb_frames)
#for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
for crop in ['center']:
stream = JpegHDF5Transformer(
input_size=(240, 320), crop_size=(224, 224),
#input_size=(256, 342), crop_size=(224, 224),
crop_type=crop,
translate_labels = True,
flip='noflip', nb_frames = nb_frames,
data_stream=ForceFloatX(DataStream(
dataset=test, iteration_scheme=scheme)))
stream_flip = JpegHDF5Transformer(
input_size=(240, 320), crop_size=(224, 224),
#input_size=(256, 342), crop_size=(224, 224),
crop_type=crop,
translate_labels = True,
flip='flip', nb_frames = nb_frames,
data_stream=ForceFloatX(DataStream(
dataset=test, iteration_scheme=scheme)))
#.........这里部分代码省略.........
示例14: create_network
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def create_network(inputs=None, batch=batch_size):
if inputs is None:
inputs = T.tensor4('features')
x = T.cast(inputs,'float32')
x = x / 255. if dataset != 'binarized_mnist' else x
# GatedPixelCNN
gated = GatedPixelCNN(
name='gated_layer_0',
filter_size=7,
image_size=(img_dim,img_dim),
num_filters=h*n_channel,
num_channels=n_channel,
batch_size=batch,
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
res=False
)
gated.initialize()
x_v, x_h = gated.apply(x, x)
for i in range(n_layer):
gated = GatedPixelCNN(
name='gated_layer_{}'.format(i+1),
filter_size=3,
image_size=(img_dim,img_dim),
num_channels=h*n_channel,
batch_size=batch,
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
res=True
)
gated.initialize()
x_v, x_h = gated.apply(x_v, x_h)
conv_list = []
conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')])
#conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')])
sequence = ConvolutionalSequence(
conv_list,
num_channels=h*n_channel,
batch_size=batch,
image_size=(img_dim,img_dim),
border_mode='half',
weights_init=IsotropicGaussian(std=0.02, mean=0),
biases_init=Constant(0.02),
tied_biases=False
)
sequence.initialize()
x = sequence.apply(x_h)
if MODE == '256ary':
x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1)
x = x.reshape((-1, 256))
x_hat = Softmax().apply(x)
inp = T.cast(inputs, 'int64').flatten()
cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
else:
x_hat = Logistic().apply(x)
cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
#cost = T.nnet.binary_crossentropy(x_hat, inputs)
#cost = cost.sum() / inputs.shape[0]
cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()
cost_bits_dim.name = "nnl_bits_dim"
cost.name = 'loglikelihood_nat'
return cost, cost_bits_dim
示例15: train_net
# 需要导入模块: from blocks.bricks.cost import CategoricalCrossEntropy [as 别名]
# 或者: from blocks.bricks.cost.CategoricalCrossEntropy import name [as 别名]
def train_net(net, train_stream, test_stream, L1 = None, L2=None, early_stopping=False,
finish=None, dropout=False, jobid=None, update=None,
duration= None,
**ignored):
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')
y_hat = net.apply(x)
#Cost
cost_before = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
cost_before.name = "cost_without_regularization"
#Error
#Taken from brodesf
error = MisclassificationRate().apply(y.flatten(), y_hat)
error.name = "Misclassification rate"
#Regularization
cg = ComputationGraph(cost_before)
WS = VariableFilter(roles=[WEIGHT])(cg.variables)
if dropout:
print("Dropout")
cg = apply_dropout(cg, WS, 0.5)
if L1:
print("L1 with lambda ",L1)
L1_reg = L1 * sum([abs(W).sum() for W in WS])
L1_reg.name = "L1 regularization"
cost_before += L1_reg
if L2:
print("L2 with lambda ",L2)
L2_reg = L2 * sum([(W ** 2).sum() for W in WS])
L2_reg.name = "L2 regularization"
cost_before += L2_reg
cost = cost_before
cost.name = 'cost_with_regularization'
#Initialization
print("Initilization")
net.initialize()
#Algorithm
step_rule = Scale(learning_rate=0.1)
if update is not None:
if update == "rmsprop":
print("Using RMSProp")
step_rule = RMSProp()
remove_not_finite = RemoveNotFinite(0.9)
step_rule = CompositeRule([step_rule, remove_not_finite])
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=step_rule)
print("Extensions")
extensions = []
#Monitoring
monitor = DataStreamMonitoring(variables=[cost, error], data_stream=test_stream, prefix="test")
extensions.append(monitor)
def filename(suffix=""):
prefix = jobid if jobid else str(os.getpid())
ctime = str(time.time())
return "checkpoints/" + prefix + "_" + ctime + "_" + suffix + ".zip"
#Serialization
#serialization = Checkpoint(filename())
#extensions.append(serialization)
notification = "test_"+error.name
track = TrackTheBest(notification)
best_notification = track.notification_name
checkpointbest = SaveBest(best_notification, filename("best"))
extensions.extend([track, checkpointbest])
if early_stopping:
print("Early stopping")
stopper = FinishIfNoImprovementAfterPlus(best_notification)
extensions.append(stopper)
#Other extensions
if finish != None:
print("Force finish ", finish)
extensions.append(FinishAfter(after_n_epochs=finish))
if duration != None:
print("Stop after " , duration, " seconds")
extensions.append(FinishAfterTime(duration))
extensions.extend([
Timing(),
Printing()
])
#Main loop
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions)
#.........这里部分代码省略.........