本文整理汇总了Python中mlp.MLP.negative_log_likelihood方法的典型用法代码示例。如果您正苦于以下问题:Python MLP.negative_log_likelihood方法的具体用法?Python MLP.negative_log_likelihood怎么用?Python MLP.negative_log_likelihood使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mlp.MLP
的用法示例。
在下文中一共展示了MLP.negative_log_likelihood方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mlp
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def test_mlp(dataset, hyper):
train_set_x, train_set_y = dataset.sharedTrain
valid_set_x, valid_set_y = dataset.sharedValid
test_set_x, test_set_y = dataset.sharedTest
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / hyper.batchSize
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / hyper.batchSize
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / hyper.batchSize
validationFrequency = min(n_train_batches, hyper.patience / 2)
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = numpy.random.RandomState(1234)
# construct the MLP class
classifier = MLP(rng=rng, input=x, n_in=dataset.n_in,
n_hidden=hyper.nHidden1, n_out=dataset.n_out)
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (L1 and L2); cost is expressed
# here symbolically
cost = classifier.negative_log_likelihood(y) \
+ hyper.L1Reg * classifier.L1 \
+ hyper.L2Reg * classifier.L2_sqr
# compiling a Theano function that computes the mistakes that are made
# by the model on a minibatch
test_model = theano.function(inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize],
y: test_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]})
validate_model = theano.function(inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize],
y: valid_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]})
# compute the gradient of cost with respect to theta (sotred in params)
# the resulting gradients will be stored in a list gparams
gparams = []
for param in classifier.params:
gparam = T.grad(cost, param)
gparams.append(gparam)
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs
updates = []
# given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
# same length, zip generates a list C of same size, where each element
# is a pair formed from the two lists :
# C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
for param, gparam in zip(classifier.params, gparams):
updates.append((param, param - hyper.learningRate * gparam))
# compiling a Theano function `train_model` that returns the cost, but
# in the same time updates the parameter of the model based on the rules
# defined in `updates`
train_model = theano.function(inputs=[index], outputs=cost,
updates=updates,
givens={
x: train_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize],
y: train_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]})
###############
# TRAIN MODEL #
###############
print '... training'
best_params = None
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = time.time()
epoch = 0
done_looping = False
patience = hyper.patience
while (epoch < hyper.numberEpochs) and (not done_looping):
epoch = epoch + 1
print('epoch %i, time %0.2fm' % (epoch, (time.clock() - start_time) / 60.0))
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validationFrequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i) for i
in xrange(n_valid_batches)]
#.........这里部分代码省略.........
示例2: xrange
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
# calculate current loss
svm_train_losses = [svm_train_loss(i) for i in xrange(n_train_batches)]
svm_train_score = numpy.mean(svm_train_losses)
svm_validation_losses = [svm_validate_model(i) for i in xrange(n_valid_batches)]
svm_validation_score = numpy.mean(validation_losses)
svm_test_losses = [svm_test_model(i) for i in xrange(n_test_batches)]
svm_test_score = numpy.mean(svm_test_losses)
print "SVM: Training: %.3f%%, Validation: %.3f%%, Test: %.3f%%" % \
(svm_train_score * 100., svm_validation_score * 100., svm_test_score * 100.)
print 'Training SVM logreg layer...'
# train logistic regression layer
cost = svm_classifier.negative_log_likelihood(y) \
+ L1_reg * classifier.L1 \
+ L2_reg * classifier.L2_sqr
# update only logreg parameters using gradient descent
svm_updates = {param: param - learning_rate * T.grad(cost, param)
for param in svm_classifier.logRegressionLayer.params}
svm_train_model = theano.function(inputs=[index], outputs=cost,
updates=svm_updates,
givens={
x: train_set_x[index * batch_size:(index + 1) * batch_size],
y: train_set_y[index * batch_size:(index + 1) * batch_size]})
# early-stopping parameters
patience = 10000 # look as this many examples regardless
示例3: test_mlp
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=500,
batch_size=20, n_hidden=3):
numpy.random.seed(1)
rng = numpy.random.RandomState(1234)
# 集団内の要素数 (散布図の通り、同じ色の2集団で 1クラスを形成)
N = 100
# 説明変数
x = numpy.matrix([[0] * N + [1] * N + [0] * N + [1] * N,
[0] * N + [1] * N + [1] * N + [0] * N], dtype=numpy.float32).T
x += numpy.random.rand(N * 4, 2) / 2
# 目的変数
y = numpy.array([0] * N * 2 + [1] * N * 2, dtype=numpy.int32)
# 2 次元にプロット
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ['red'] * N * 2 + ['blue'] * N * 2
ax.scatter(x[:, 0], x[:, 1], color=colors)
plt.show()
# Theano の共有変数として宣言
x_data = theano.shared(value=x, name='x', borrow=True)
y_data = theano.shared(value=y, name='y', borrow=True)
n_train_batches = x_data.get_value(borrow=True).shape[0] / batch_size
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
# MLPインスタンスを生成
classifier = MLP(rng=rng, input=x, n_in=2, n_hidden=n_hidden, n_out=2)
# 損失関数
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# 各係数行列、バイアスの更新処理
gparams = [T.grad(cost, param) for param in classifier.params]
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: x_data[index * batch_size: (index + 1) * batch_size],
y: y_data[index * batch_size: (index + 1) * batch_size]
}
)
# 隠れ層の出力を取得
apply_hidden = theano.function(inputs=[x], outputs=classifier.hiddenLayer.output)
labels = y_data.eval()
# 3 次元にプロット
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# 表示領域 / カメラアングルを指定
ax.set_xlabel('x0')
ax.set_xlim(-1, 1.5)
ax.set_ylabel('x1')
ax.set_ylim(-0.5, 1.5)
ax.set_zlabel('z')
ax.set_zlim(-1, 1)
ax.view_init(azim=30, elev=30)
# 座標 x0, x1 について 分離平面の z 座標を計算
def calc_z(classifier, x0, x1):
w = classifier.logRegressionLayer.W.get_value()
b = classifier.logRegressionLayer.b.get_value()
z = ((w[0, 0] - w[0, 1]) * x0 + (w[1, 0] - w[1, 1]) * x1 + b[0] - b[1]) / (w[2, 1] - w[2, 0])
return z
objs = []
colors = ['red'] * N * 2 + ['blue'] * N * 2
for epoch in range(n_epochs):
for minibatch_index in xrange(n_train_batches):
train_model(minibatch_index)
# 10 エポックごとに描画
if epoch % 10 == 0:
z_data = apply_hidden(x_data.get_value())
s = ax.scatter(z_data[:, 0], z_data[:, 1], z_data[:, 2], color=colors)
zx0_min = z_data[:, 0].min()
zx0_max = z_data[:, 0].max()
zx1_min = z_data[:, 1].min()
zx1_max = z_data[:, 1].max()
#.........这里部分代码省略.........
示例4: fun_mlp
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def fun_mlp(shared_args, private_args, this_queue, that_queue):
'''
shared_args
contains neural network parameters
private_args
contains parameters for process run on each gpu
this_queue and that_queue are used for synchronization between processes.
'''
learning_rate = shared_args['learning_rate']
n_epochs = shared_args['n_epochs']
dataset = shared_args['dataset']
batch_size = shared_args['batch_size']
L1_reg = shared_args['L1_reg']
L2_reg = shared_args['L2_reg']
n_hidden = shared_args['n_hidden']
####
# pycuda and zmq environment
drv.init()
dev = drv.Device(private_args['ind_gpu'])
ctx = dev.make_context()
sock = zmq.Context().socket(zmq.PAIR)
if private_args['flag_client']:
sock.connect('tcp://localhost:5000')
else:
sock.bind('tcp://*:5000')
####
####
# import theano related
import theano.sandbox.cuda
theano.sandbox.cuda.use(private_args['gpu'])
import theano
import theano.tensor as T
from logistic_sgd import load_data
from mlp import MLP
import theano.misc.pycuda_init
import theano.misc.pycuda_utils
####
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = np.random.RandomState(1234)
classifier = MLP(rng=rng, input=x, n_in=28 * 28,
n_hidden=n_hidden, n_out=10)
cost = (classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]}
)
gparams = [T.grad(cost, param) for param in classifier.params]
updates = [(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)]
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]})
####
# setting pycuda and
#.........这里部分代码省略.........
示例5: MLP
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
L2_reg=0.0001
n_epochs=1000
dataset='train.mat'
batch_size=1000
n_hidden=50
classifier = MLP(
rng=rng,
input=x,
n_in=3000,
n_hidden=n_hidden,
n_out=2
)
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
validate_model = theano.function(
inputs=[x,y],
outputs=classifier.errors(y)
)
test_model = theano.function(
inputs=[x],
outputs=classifier.y_pred
)
test_model_proba = theano.function(
示例6: mlp_mnist_train
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def mlp_mnist_train(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
print('... building the model')
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = np.random.RandomState(1234)
classifier = MLP(
rng=rng,
input=x,
n_in=28*28,
n_hidden=n_hidden,
n_out=10
)
cost = (
classifier.negative_log_likelihood(y) + L1_reg*classifier.L1 + L2_reg*classifier.L2_sqr
)
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]
}
)
gparams = [T.grad(cost, param) for param in classifier.params]
updates = [
(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)
]
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
print('... training')
patience = 10000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
improvement_threshold = 0.995 # a relative improvement of this much is
validation_frequency = min(n_train_batches, patience // 2)
best_validation_loss = np.inf
best_iter = 0
test_score = 0.
start_time = timeit.default_timer()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in range(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = [validate_model(i) for i in range(n_valid_batches)]
this_validation_loss = np.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
#.........这里部分代码省略.........
示例7: do_gd
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def do_gd(etaVal, epochs, layers, train_set,
valid_set=None, test_set=None, L2_reg=0, batch_size=100, scale=1, noise_scale=1):
'''
batch_size = 100
0 L2 regularization (by default)
function returns training error and validation error after each epoch
'''
SEED = 5318
np.random.seed(SEED)
X = T.matrix('X')
Y = T.ivector('Y')
index = T.lscalar('index')
noise = T.matrix('noise')
eta = T.fscalar('eta')
n_scale = T.fscalar('noise_scale')
n_in = layers[0]
n_out = layers[-1]
# Get the datasets
trainX, trainY = train_set
validX, validY = valid_set
testX, testY = test_set
# Get the dataset sizes
train_dims = trainX.get_value(borrow=True).shape
train_size = trainX.get_value(borrow=True).shape[0]
valid_size = validX.get_value(borrow=True).shape[0]
test_size = testX.get_value(borrow=True).shape[0]
classifier = MLP(
rng = np.random.RandomState(SEED),
inpt = X,
layers = layers,
scale = scale
)
cost = (
classifier.negative_log_likelihood(Y)
+ L2_reg * classifier.L2_sqr # using the L2 regularization
)
gparams = [T.grad(cost, param) for param in classifier.params]
# Random number generator for the gaussian noise
# theano_rng = RandomStreams(int(np.random.rand()*100))
train_model = theano.function(
inputs = [index, eta, noise],
outputs = cost,
updates = [(param, param - eta * gparam)
for param, gparam in zip(classifier.params, gparams)],
givens = {
# train_dims[1] is the number of columns (features) in the training data
# apparently trainX gets first added to the random numbers before its sliced
# Hence we use 784 (features) random numbers and not 100 (batch_size) random numbers
# X : trainX[index * batch_size : (index + 1) * batch_size] + theano_rng.normal(size=(train_dims[1],))* n_scale,
X : trainX[index * batch_size : (index + 1) * batch_size] + noise,
Y : trainY[index * batch_size : (index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs = [index],
outputs = classifier.errors(Y),
givens = {
X : validX[index * batch_size : (index + 1) * batch_size],
Y : validY[index * batch_size : (index + 1) * batch_size]
}
)
test_model = theano.function(
inputs = [index],
outputs = classifier.errors(Y),
givens = {
X : testX[index * batch_size : (index + 1) * batch_size],
Y : testY[index * batch_size : (index + 1) * batch_size]
}
)
train_error = []
valid_error = []
test_error = []
# Calculate the number of batches.
n_train_batches = int(train_size / batch_size)
n_val_batches = int(valid_size / batch_size)
n_test_batches = int(test_size / batch_size)
ANNEAL = 10*train_size # rate at which learning parameter "eta" is reduced as iterations increase ( momentum )
print("Anneal = {}".format(ANNEAL))
start_time = timeit.default_timer()
learn_rate = etaVal
# Initial Gaussian Noise
gaussian_noise = 0
for epoch in xrange(epochs):
# shuffle data, reset the seed so that trainX and trainY are randomized
#.........这里部分代码省略.........
示例8: main
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
dataset='mnist.pkl.gz', batch_size=20, n_hidden=[500, 500]):
"""
Demonstrate stochastic gradient descent optimization for a multilayer
perceptron
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used (factor for the stochastic
gradient
:type L1_reg: float
:param L1_reg: L1-norm's weight when added to the cost (see
regularization)
:type L2_reg: float
:param L2_reg: L2-norm's weight when added to the cost (see
regularization)
:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer
:type dataset: string
:param dataset: the path of the MNIST dataset file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
"""
if add_noise==True:
datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize,
noise_prop=noise_proportion)
else:
datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
print('Showing data samples')
if nb_classes == 2:
labels = ['odd', 'even']
else:
labels = [0,1,2,3,4,5,6,7,8,9]
imshow_samples(train_set_x.get_value(), train_set_y,
valid_set_x.get_value(), valid_set_y, num_samples=4, labels=labels)
plt.pause(0.0001)
diary.save_figure(plt, filename='samples', extension='svg')
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
######################
# BUILD ACTUAL MODEL #
######################
print('... building the model')
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = numpy.random.RandomState(1234)
# construct the MLP class
classifier = MLP(
rng=rng,
input=x,
n_in=28 * 28,
n_hidden=n_hidden,
n_out=nb_classes
)
# start-snippet-4
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (L1 and L2); cost is expressed
# here symbolically
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# end-snippet-4
# compiling a Theano function that computes the mistakes that are made
# by the model on a minibatch
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
#.........这里部分代码省略.........
示例9: test_mlp
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
"""
Demonstrate stochastic gradient descent optimization for a multilayer
perceptron
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used (factor for the stochastic
gradient
:type L1_reg: float
:param L1_reg: L1-norm's weight when added to the cost (see
regularization)
:type L2_reg: float
:param L2_reg: L2-norm's weight when added to the cost (see
regularization)
:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer
:type dataset: string
:param dataset: the path of the MNIST dataset file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = numpy.random.RandomState(1234)
# construct the MLP class
classifier = MLP(
rng=rng,
input=x,
n_in=28 * 28,
n_hidden=n_hidden,
n_out=10
)
# start-snippet-4
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (L1 and L2); cost is expressed
# here symbolically
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# end-snippet-4
# compiling a Theano function that computes the mistakes that are made
# by the model on a minibatch
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]
}
)
# start-snippet-5
# compute the gradient of cost with respect to theta (sotred in params)
# the resulting gradients will be stored in a list gparams
gparams = [T.grad(cost, param) for param in classifier.params]
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs
#.........这里部分代码省略.........
示例10: train_mlp
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def train_mlp(L1_reg = 0.0, L2_reg = 0.0000, num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'):
voc_list = Vocabulary(path_name + 'train')
voc_list.vocab_create()
vocab = voc_list.vocab
vocab_size = voc_list.vocab_size
voc_list_valid = Vocabulary(path_name + 'valid')
voc_list_valid.vocab_create()
count = voc_list_valid.count
voc_list_test = Vocabulary(path_name + 'test')
voc_list_test.vocab_create()
no_test_tokens = voc_list_test.count
print 'The number of sentenses in test set:', no_test_tokens
#print 'number of words in valid data:', count
dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size )
dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )
#learn_list = [0.1, 0.1, 0.1, 0.75, 0.5, 0.25, 0.125, 0.0625, 0]
exp_name = 'fine_tuning.hdf5'
posterior_path = 'log_likelihoods'
print '..building the model'
#symbolic variables for input, target vector and batch index
index = T.lscalar('index')
x = T.fmatrix('x')
y = T.ivector('y')
learning_rate = T.fscalar('learning_rate')
#theano shares variables for train, valid and test
train_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
valid_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
test_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True)
test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
rng = numpy.random.RandomState(1234)
classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden1 = 30, n_hidden2 = 60 , n_out = vocab_size)
#classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden = 60, n_out = vocab_size)
cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
#constructor for learning rate class
learnrate_schedular = LearningRateNewBob(start_rate=0.001, scale_by=.5, max_epochs=9999,\
min_derror_ramp_start=.1, min_derror_stop=.1, init_error=100.)
#learnrate_schedular = LearningRateList(learn_list)
frame_error = classifier.errors(y)
likelihood = classifier.sum(y)
#test model
test_model = theano.function(inputs = [index], outputs = likelihood, \
givens = {x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]})
#validation_model
validate_model = theano.function(inputs = [index], outputs = [frame_error, likelihood], \
givens = {x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
gradient_param = []
#calculates the gradient of cost with respect to parameters
for param in classifier.params:
gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
updates = []
for param, gradient in zip(classifier.params, gradient_param):
updates.append((param, param - learning_rate * gradient))
#training_model
train_model = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \
givens = {x: train_set_x[index * batch_size:(index + 1) * batch_size],
y: train_set_y[index * batch_size:(index + 1) * batch_size]})
#theano.printing.pydotprint(train_model, outfile = "pics/train.png", var_with_name_simple = True)
#path_save = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/mlp/saved_weights/'
print '.....training'
best_valid_loss = numpy.inf
epoch = 1
start_time = time.time()
while(learnrate_schedular.get_rate() != 0):
print 'learning_rate:', learnrate_schedular.get_rate()
print 'epoch_number:', learnrate_schedular.epoch
frames_showed, progress = 0, 0
start_epoch_time = time.time()
tqueue = TNetsCacheSimple.make_queue()
cache = TNetsCacheSimple(tqueue, shuffle_frames = True, offset=0, \
#.........这里部分代码省略.........
示例11: __init__
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def __init__(self, ndata=1000, n_hidden=10, L1_reg=0.00, L2_reg=0.0001):
train_x, train_t, test_x, test_t = get_data()
train_x = train_x[:ndata,:]
train_t = train_t[:ndata]
train_t = np.asarray(train_t, dtype="int32")
self.L1_reg = L1_reg
self.L2_reg = L2_reg
print "range of target values: ", set(train_t)
# allocate symbolic variables for the data.
# Make it shared so it cab be passed only once
x = theano.shared(value=train_x, name='x') # the data is presented as rasterized images
t = theano.shared(value=train_t, name='t') # the labels are presented as 1D vector of
# [int] labels
rng = numpy.random.RandomState(1234)
# construct the MLP class
classifier = MLP(
rng=rng,
input=x,
n_in=28 * 28,
n_hidden=n_hidden,
n_out=10
)
self.classifier = classifier
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (L1 and L2); cost is expressed
# here symbolically
cost = (
classifier.negative_log_likelihood(t)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# compute the gradient of cost with respect to theta (sotred in params)
# the resulting gradients will be stored in a list gparams
gparams = [T.grad(cost, param) for param in classifier.params]
outputs = [cost] + gparams
self.theano_cost_gradient = theano.function(
inputs=(),
outputs=outputs
)
# compute the errors applied to test set
self.theano_testset_errors = theano.function(
inputs=(),
outputs=self.classifier.errors(t),
givens={
x: test_x,
t: test_t
}
)
# res = get_gradient(train_x, train_t)
# print "result"
# print res
# print ""
self.nparams = sum([p.get_value().size for p in classifier.params])
self.param_sizes = [p.get_value().size for p in classifier.params]
self.param_shapes = [p.get_value().shape for p in classifier.params]
示例12: jobman_entrypoint
# 需要导入模块: from mlp import MLP [as 别名]
# 或者: from mlp.MLP import negative_log_likelihood [as 别名]
def jobman_entrypoint(state, channel):
global TEST_RUN
minibatch_size = state.minibatch_size
print_every = 100000
COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples
if TEST_RUN:
print_every = 100
COMPUTE_ERROR_EVERY = 1000 / minibatch_size
print "entrypoint, state is"
print state
######################
# select dataset and dataset subset, plus adjust epoch num to make number
# of examples seen independent of dataset
# exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques
# et pour le cas NIST pur (pas de transformations), il faut multiplier par 100
# en partant car on a pas les variations
# compute this in terms of the P07 dataset size (=80M)
MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size
if state.train_on == 'NIST' and state.train_subset == 'ALL':
dataset_obj = datasets.nist_all()
elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY':
dataset_obj = datasets.nist_digits()
elif state.train_on == 'NISTP' and state.train_subset == 'ALL':
dataset_obj = datasets.PNIST07()
elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY':
dataset_obj = PNIST07_digits
elif state.train_on == 'P07' and state.train_subset == 'ALL':
dataset_obj = datasets.nist_P07()
elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY':
dataset_obj = datasets.P07_digits
dataset = dataset_obj
if state.train_subset == 'ALL':
n_classes = 62
elif state.train_subset == 'DIGITS_ONLY':
n_classes = 10
else:
raise NotImplementedError()
###############################
# construct model
print "constructing model..."
x = T.matrix('x')
y = T.ivector('y')
rng = numpy.random.RandomState(state.rng_seed)
# construct the MLP class
model = MLP(rng = rng, input=x, n_in=N_INPUTS,
n_hidden_layers = state.n_hidden_layers,
n_hidden = state.n_hidden, n_out=n_classes)
# cost and training fn
cost = T.mean(model.negative_log_likelihood(y)) \
+ state.L1_reg * model.L1 \
+ state.L2_reg * model.L2_sqr
print "L1, L2: ", state.L1_reg, state.L2_reg
gradient_nll_wrt_params = []
for param in model.params:
gparam = T.grad(cost, param)
gradient_nll_wrt_params.append(gparam)
learning_rate = 10**float(state.learning_rate_log10)
print "Learning rate", learning_rate
train_updates = {}
for param, gparam in zip(model.params, gradient_nll_wrt_params):
train_updates[param] = param - learning_rate * gparam
train_fn = theano.function([x,y], cost, updates=train_updates)
#######################
# create series
basedir = os.getcwd()
h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
series = {}
add_error_series(series, "training_error", h5f,
index_names=('minibatch_idx',), use_accumulator=True,
reduce_every=REDUCE_EVERY)
##########################
# training loop
start_time = time.clock()
print "begin training..."
print "will train for", MINIBATCHES_TO_SEE, "examples"
#.........这里部分代码省略.........