本文整理匯總了Python中pylearn2.optimization.batch_gradient_descent.BatchGradientDescent類的典型用法代碼示例。如果您正苦於以下問題:Python BatchGradientDescent類的具體用法?Python BatchGradientDescent怎麽用?Python BatchGradientDescent使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了BatchGradientDescent類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_zero_optimal
def test_zero_optimal(self):
""" minimizes the kl divergence between q and p
using batch gradient descent and checks that
the result is zero"""
rng = np.random.RandomState([1,2,3])
dim = self.dim
num_trials = 3
mu = rng.randn(dim).astype(floatX)
beta = rng.uniform(.1,10.,(dim,)).astype(floatX)
self.p.mu.set_value(mu)
mu = rng.randn(dim).astype(floatX)
self.q.mu.set_value(mu)
self.p.beta.set_value(beta)
beta = rng.uniform(.1,10.,(dim,)).astype(floatX)
self.q.beta.set_value(beta)
kl = kl_divergence(self.q,self.p)
p = self.p
q = self.q
optimizer = BatchGradientDescent(
max_iter = 100,
line_search_mode = 'exhaustive',
verbose = True,
objective = kl,
conjugate = True,
params = [ p.mu, p.beta, q.mu, q.beta ],
param_constrainers = [ p.censor_updates,
q.censor_updates ])
#optimizer.verbose = True
kl = optimizer.minimize()
if kl < 0.:
if config.floatX == 'float32':
neg_tol = 4.8e-7
else:
neg_tol = 0.
if kl < - neg_tol:
raise AssertionError("KL divergence should "
"be non-negative but is "+
str(kl))
warnings.warn("KL divergence is not very numerically stable, evidently")
tol = 6e-5
if kl > tol:
print 'kl:',kl
print 'tol:',tol
assert kl <= tol
assert not (kl > tol )
示例2: test_zero_optimal
def test_zero_optimal(self):
""" minimizes the kl divergence between q and p
using batch gradient descent and checks that
the result is zero"""
rng = np.random.RandomState([1,2,3])
dim = self.dim
num_trials = 3
mu = rng.randn(dim).astype(floatX)
beta = rng.uniform(.1,10.,(dim,)).astype(floatX)
self.p.mu.set_value(mu)
mu = rng.randn(dim).astype(floatX)
self.q.mu.set_value(mu)
self.p.beta.set_value(beta)
beta = rng.uniform(.1,10.,(dim,)).astype(floatX)
self.q.beta.set_value(beta)
kl = kl_divergence(self.q,self.p)
p = self.p
q = self.q
optimizer = BatchGradientDescent(
objective = kl,
params = [ p.mu, p.beta, q.mu, q.beta ],
param_constrainers = [ p.censor_updates,
q.censor_updates ])
#optimizer.verbose = True
kl = optimizer.minimize()
if kl < 0.:
raise AssertionError("KL divergence should "
"be non-negative but is "+
str(kl))
tol = 5.4e-5
assert kl <= tol
assert not (kl > tol )
示例3: fit
def fit(self, params=None, l1=.0, l2=.0):
"""
Fit the model by minimizing the Leave One Out (LOO) loss using gradient-based optimization.
"""
loo_loss = self.loss_symbolic(self.L, self.y, self.mu, self.R, self.eta, self.eps)
if params is None:
params = [self.eta]
# Symbolic Theano variables that represent the L1 and L2 regularization terms
L1, L2 = .0, .0
for param in params:
L1 += T.sum(abs(param))
L2 += T.sum(param ** 2)
regularized_loo_loss = loo_loss + l1 * L1 + l2 * L2
minimizer = BatchGradientDescent(objective=regularized_loo_loss, params=params, inputs=[], verbose=1)
minimizer.minimize()
示例4: test_batch_gradient_descent
def test_batch_gradient_descent():
""" Verify that batch gradient descent works by checking that
it minimizes a quadratic function f(x) = x^T A x + b^T x + c
correctly for several sampled values of A, b, and c.
The ground truth minimizer is x = np.linalg.solve(A,-b)"""
n = 3
A = T.matrix(name = 'A')
b = T.vector(name = 'b')
c = T.scalar(name = 'c')
x = sharedX( np.zeros((n,)) , name = 'x')
half = np.cast[config.floatX](0.5)
obj = half * T.dot(T.dot(x,A),x)+T.dot(b,x)+c
minimizer = BatchGradientDescent(
objective = obj,
params = [ x],
inputs = [ A, b, c])
num_samples = 3
rng = np.random.RandomState([1,2,3])
for i in xrange(num_samples):
A = np.cast[config.floatX](rng.randn(1.5*n,n))
A = np.cast[config.floatX](np.dot(A.T,A))
A += np.cast[config.floatX](np.identity(n) * .02)
b = np.cast[config.floatX](rng.randn(n))
c = np.cast[config.floatX](rng.randn())
x.set_value(np.cast[config.floatX](rng.randn(n)))
analytical_x = np.linalg.solve(A,-b)
actual_obj = minimizer.minimize(A,b,c)
actual_x = x.get_value()
#Check that the value returned by the minimize method
#is the objective function value at the parameters
#chosen by the minimize method
cur_obj = minimizer.obj(A,b,c)
assert np.allclose(actual_obj, cur_obj)
x.set_value(analytical_x)
analytical_obj = minimizer.obj(A,b,c)
#make sure the objective function is accurate to first 4 digits
condition1 = not np.allclose(analytical_obj, actual_obj)
condition2 = np.abs(analytical_obj-actual_obj) >= 1e-4 * np.abs(analytical_obj)
if (config.floatX == 'float64' and condition1) \
or (config.floatX == 'float32' and condition2):
print 'objective function value came out wrong on sample ',i
print 'analytical obj', analytical_obj
print 'actual obj',actual_obj
"""
The following section of code was used to verify that numerical
error can make the objective function look non-convex
print 'Checking for numerically induced non-convex behavior'
def f(x):
return 0.5 * np.dot(x,np.dot(A,x)) + np.dot(b,x) + c
x.set_value(actual_x)
minimizer._compute_grad(A,b,c)
minimizer._normalize_grad()
d = minimizer.param_to_grad_shared[x].get_value()
x = actual_x.copy()
prev = f(x)
print prev
step_size = 1e-4
x += step_size * d
cur = f(x)
print cur
cur_sgn = np.sign(cur-prev)
flip_cnt = 0
for i in xrange(10000):
x += step_size * d
prev = cur
cur = f(x)
print cur
prev_sgn = cur_sgn
cur_sgn = np.sign(cur-prev)
if cur_sgn != prev_sgn:
print 'flip'
flip_cnt += 1
if flip_cnt > 1:
print "Non-convex!"
from matplotlib import pyplot as plt
y = []
x = actual_x.copy()
for j in xrange(10000):
y.append(f(x))
#.........這裏部分代碼省略.........
示例5: DNCE
#Get the objective function
nce = DNCE(noise_distribution)
J = nce(model,X,Y)
accs = []
for Y_i in Y:
pos_prob = 1./(1.+T.exp(model.free_energy(X)-model.free_energy(Y_i)))
acc = (pos_prob > .5).mean()
accs.append(acc)
acc = sum(accs) / float(len(accs))
print '\tinit accuracy ',function([],acc)()
#Minimize the objective function with batch gradient descent
minimizer = BatchGradientDescent( objective = J,
params = model.get_params(),
param_constrainers = [ model.censor_updates ])
print '\tinit obj:',minimizer.obj()
#minimizer.verbose = True
minimizer.minimize()
print '\tfinal obj:',minimizer.obj()
recovered_beta = model.beta.get_value()
recovered_mu = model.mu.get_value()
print '\trecovered beta:',recovered_beta
print '\trecovered mu:',recovered_mu
kl = kl_divergence(true, model)
kl = function([],kl)()
示例6: setup
#.........這裏部分代碼省略.........
obj = self.cost(model, X, Y, ** fixed_var_descr.fixed_vars)
grads, grad_updates = self.cost.get_gradients(model, X, Y, ** fixed_var_descr.fixed_vars)
ipt = (X,Y)
else:
obj = self.cost(model, X, ** fixed_var_descr.fixed_vars)
grads, grad_updates = self.cost.get_gradients(model, X, ** fixed_var_descr.fixed_vars)
ipt = X
Y = None
assert isinstance(grads, OrderedDict)
assert isinstance(grad_updates, OrderedDict)
if obj is None:
raise ValueError("BGD is incompatible with "+str(self.cost)+" because "
" it is intractable, but BGD uses the cost function value to do "
" line searches.")
if self.monitoring_dataset is not None:
if not any([dataset.has_targets() for dataset in self.monitoring_dataset.values()]):
Y = None
channels = model.get_monitoring_channels(X,Y)
if not isinstance(channels, dict):
raise TypeError("model.get_monitoring_channels must return a "
"dictionary, but it returned " + str(channels))
channels.update(self.cost.get_monitoring_channels(model, X, Y, ** fixed_var_descr.fixed_vars))
for dataset_name in self.monitoring_dataset:
if dataset_name == '':
prefix = ''
else:
prefix = dataset_name + '_'
monitoring_dataset = self.monitoring_dataset[dataset_name]
self.monitor.add_dataset(dataset=monitoring_dataset,
mode="sequential",
batch_size=self.batch_size,
num_batches=self.monitoring_batches)
# The monitor compiles all channels for the same dataset into one function, and
# runs all prereqs before calling the function. So we only need to register the
# on_load_batch prereq once per monitoring dataset.
self.monitor.add_channel(prefix + 'objective',ipt=ipt,val=obj,
dataset = monitoring_dataset, prereqs = fixed_var_descr.on_load_batch)
for name in channels:
J = channels[name]
if isinstance(J, tuple):
assert len(J) == 2
J, prereqs = J
else:
prereqs = None
if Y is not None:
ipt = (X,Y)
else:
ipt = X
self.monitor.add_channel(name= prefix + name,
ipt=ipt,
val=J,
dataset = monitoring_dataset,
prereqs=prereqs)
if self.cost.supervised:
ipts = [X, Y]
else:
ipts = [X]
params = model.get_params()
self.optimizer = BatchGradientDescent(
objective = obj,
gradients = grads,
gradient_updates = grad_updates,
params = params,
param_constrainers = [ model.censor_updates ],
lr_scalers = model.get_lr_scalers(),
inputs = ipts,
verbose = self.verbose_optimization,
max_iter = self.updates_per_batch,
reset_alpha = self.reset_alpha,
conjugate = self.conjugate,
reset_conjugate = self.reset_conjugate,
min_init_alpha = self.min_init_alpha,
line_search_mode = self.line_search_mode,
theano_function_mode=self.theano_function_mode,
init_alpha=self.init_alpha)
if self.monitoring_dataset is not None:
self.monitor.add_channel(name='ave_step_size',
ipt=ipt, val = self.optimizer.ave_step_size, dataset=self.monitoring_dataset.values()[0])
self.monitor.add_channel(name='ave_grad_size',
ipt=ipt, val = self.optimizer.ave_grad_size, dataset=self.monitoring_dataset.values()[0])
self.monitor.add_channel(name='ave_grad_mult',
ipt=ipt, val = self.optimizer.ave_grad_mult, dataset=self.monitoring_dataset.values()[0])
self.first = True
self.bSetup = True
示例7: BGD
class BGD(TrainingAlgorithm):
"""Batch Gradient Descent training algorithm class"""
def __init__(self, cost, batch_size=None, batches_per_iter=None,
updates_per_batch = 10,
monitoring_batches=None, monitoring_dataset=None,
termination_criterion = None, set_batch_size = False,
reset_alpha = True, conjugate = False,
min_init_alpha = .001,
reset_conjugate = True, line_search_mode = None,
verbose_optimization=False, scale_step=1., theano_function_mode=None,
init_alpha=None, seed=None):
"""
cost: a pylearn2 Cost
batch_size: Like the SGD TrainingAlgorithm, this TrainingAlgorithm
still iterates over minibatches of data. The difference
is that this class uses partial line searches to choose
the step size along each gradient direction, and can do
repeated updates on the same batch. The assumption is
that you use big enough minibatches with this algorithm that
a large step size will generalize reasonably well to other
minibatches.
To implement true Batch Gradient Descent, set the batch_size
to the total number of examples available.
If batch_size is None, it will revert to the model's force_batch_size
attribute.
set_batch_size: If True, BGD will attempt to override the model's force_batch_size
attribute by calling set_batch_size on it.
updates_per_batch: Passed through to the optimization.BatchGradientDescent's
max_iters parameter
reset_alpha, conjugate, reset_conjugate: passed through to the
optimization.BatchGradientDescent parameters of the same names
monitoring_dataset: A Dataset or a dictionary mapping string dataset names to Datasets
"""
self.__dict__.update(locals())
del self.self
if monitoring_dataset is None:
assert monitoring_batches == None
self._set_monitoring_dataset(monitoring_dataset)
self.bSetup = False
self.termination_criterion = termination_criterion
if seed is None:
seed = [2012, 10, 16]
self.rng = np.random.RandomState(seed)
def setup(self, model, dataset):
"""
Allows the training algorithm to do some preliminary configuration
*before* we actually start training the model. The dataset is provided
in case other derived training algorithms need to modify model based on
the dataset.
Parameters
----------
model: a Python object representing the model to train loosely
implementing the interface of models.model.Model.
dataset: a pylearn2.datasets.dataset.Dataset object used to draw
training data
"""
self.model = model
if self.batch_size is None:
self.batch_size = model.force_batch_size
else:
batch_size = self.batch_size
if self.set_batch_size:
model.set_batch_size(batch_size)
elif hasattr(model, 'force_batch_size'):
if not (model.force_batch_size <= 0 or batch_size ==
model.force_batch_size):
raise ValueError("batch_size is %d but model.force_batch_size is %d" %
(batch_size, model.force_batch_size))
self.monitor = Monitor.get_monitor(model)
self.monitor.set_theano_function_mode(self.theano_function_mode)
X = self.model.get_input_space().make_theano_batch()
X.name = 'BGD_X'
self.topo = X.ndim != 2
Y = T.matrix()
Y.name = 'BGD_Y'
fixed_var_descr = self.cost.get_fixed_var_descr(model, X, Y)
self.on_load_batch = fixed_var_descr.on_load_batch
if not self.cost.supervised:
Y = None
if self.cost.supervised:
obj = self.cost(model, X, Y, ** fixed_var_descr.fixed_vars)
grads, grad_updates = self.cost.get_gradients(model, X, Y, ** fixed_var_descr.fixed_vars)
ipt = (X,Y)
else:
obj = self.cost(model, X, ** fixed_var_descr.fixed_vars)
grads, grad_updates = self.cost.get_gradients(model, X, ** fixed_var_descr.fixed_vars)
ipt = X
#.........這裏部分代碼省略.........
示例8: BatchGradientDescent
p, h = state
p_shape = layer.get_output_space().shape
i = p_shape[0] / 2
j = p_shape[1] / 2
act = p[0,filter_idx,i,j]
obj = - act + norm_penalty * T.square(X).sum()
assert obj.ndim == 0
optimizer = BatchGradientDescent(objective = obj,
params = [X],
inputs = None,
param_constrainers = None,
max_iter = 1000,
verbose = True,
tol = None,
init_alpha = (.001, .005, .01, .05, .1))
optimizer.minimize()
img = X.get_value()[0,:,:,:]
print 'max mag: ',np.abs(img).max()
print 'norm: ',np.square(img).sum()
print 'min: ',img.min()
print 'max: ',img.max()
img /= np.abs(img).max()
示例9: setup_impl
#.........這裏部分代碼省略.........
for i in xrange(self.num_points):
print "Evaluating cost at point ", i
point = points[i, :]
full_point = root + np.dot(basis, point)
model.set_param_vector(full_point)
cost_values[i] = cost_fn(*data)
print cost_values[i]
from pylearn2.utils import sharedX
import theano.tensor as T
print "!!!!!!!! FITTING THE QUADRATIC FUNCTION !!!!!!!!!!!!!!!!!!!"
if not hasattr(self, 'fit_quad'):
points = sharedX(points)
#from theano import config
#config.compute_test_value = 'raise'
cost_values = sharedX(cost_values)
A = sharedX(np.zeros((self.num_basis_vectors, self.num_basis_vectors)))
if self.psd:
mat = T.dot(A.T, A)
else:
mat = A
b = sharedX(np.zeros(self.num_basis_vectors))
c = sharedX(0.)
half_quad = T.dot(points, mat)
quad = (points * half_quad).sum(axis=1)
lin = T.dot(points, b)
pred = quad + lin + c
from pylearn2.optimization.batch_gradient_descent import BatchGradientDescent
mse = T.square(pred - cost_values).mean()
mae = abs(pred - cost_values).mean()
obj = locals()[self.fitting_cost]
fit_quad = BatchGradientDescent(obj, params = [A, b, c],
max_iter = self.num_basis_vectors ** 2,
verbose = 3, tol = None,
init_alpha = None, min_init_alpha = 1e-7,
reset_alpha = False, conjugate = True,
reset_conjugate = False,
line_search_mode = 'exhaustive')
self.fit_quad = fit_quad
self.A = A
self.b = b
self.c = c
self.points = points
self.cost_values = cost_values
else:
self.A.set_value(.001 * np.identity(self.A.get_value().shape[0], dtype=self.A.dtype))
self.b.set_value(self.b.get_value() * 0.)
self.c.set_value(self.c.get_value() * 0.)
self.points.set_value(points)
self.cost_values.set_value(cost_values.astype(self.cost_values.dtype))
self.fit_quad.minimize()
print "!!!!!!!!!!!!! FINDING ITS MINIMUM !!!!!!!!!!!!!!!!!!!!!!!!!!!"
if self.use_solver:
if self.psd:
示例10: DNCE_Algorithm
class DNCE_Algorithm(object):
def __init__(self, noise, batch_size=1000, batches_per_iter=10,
noise_per_clean = 30,
monitoring_batches=-1, monitoring_dataset=None):
"""
if batch_size is None, reverts to the force_batch_size field of the
model
"""
self.batch_size, self.batches_per_iter = batch_size, batches_per_iter
if monitoring_dataset is None:
assert monitoring_batches == -1
self.monitoring_dataset = monitoring_dataset
self.monitoring_batches = monitoring_batches
self.bSetup = False
self.noise = noise
self.noise_per_clean = noise_per_clean
def setup(self, model, dataset):
"""
Allows the training algorithm to do some preliminary configuration
*before* we actually start training the model. The dataset is provided
in case other derived training algorithms need to modify model based on
the dataset.
Parameters
----------
model: a Python object representing the model to train loosely
implementing the interface of models.model.Model.
dataset: a pylearn2.datasets.dataset.Dataset object used to draw
training data
"""
self.model = model
self.monitor = Monitor.get_monitor(model)
X = T.matrix()
Y = T.matrix()
dnce = DNCE( self.noise)
if self.monitoring_dataset is not None:
if not self.monitoring_dataset.has_targets():
Y = None
self.monitor.set_dataset(dataset=self.monitoring_dataset,
mode="sequential",
batch_size=self.batch_size,
num_batches=self.monitoring_batches)
X.tag.test_value = self.monitoring_dataset.get_batch_design(2)
channels = model.get_monitoring_channels(X,Y)
if not isinstance(channels, dict):
raise TypeError("model.get_monitoring_channels must return a "
"dictionary, but it returned " + str(channels))
dnce.noise_per_clean = self.noise_per_clean
obj = dnce(model,X)
dnce.noise_per_clean = None
self.monitor.add_channel('DNCE',ipt=X,val=obj)
for name in channels:
J = channels[name]
if isinstance(J, tuple):
assert len(J) == 2
J, prereqs = J
else:
prereqs = None
if Y is not None:
ipt = (X,Y)
else:
ipt = X
self.monitor.add_channel(name=name,
ipt=ipt,
val=J,
prereqs=prereqs)
X = sharedX( dataset.get_batch_design(1), 'X')
Y = []
updates = {}
for i in xrange(self.noise_per_clean):
Y_i = sharedX( X.get_value().copy() )
updates[Y_i] = self.noise.random_design_matrix(X)
Y.append(Y_i)
self.update_noise = function([], updates = updates)
obj = dnce(model,X,Y)
self.optimizer = BatchGradientDescent(
objective = obj,
params = model.get_params(),
param_constrainers = [ model.censor_updates ],
max_iter = 5)
self.X = X
self.Y = Y
self.first = True
self.bSetup = True
def train(self, dataset):
assert self.bSetup
#.........這裏部分代碼省略.........
示例11: setup
def setup(self, model, dataset):
"""
Allows the training algorithm to do some preliminary configuration
*before* we actually start training the model. The dataset is provided
in case other derived training algorithms need to modify model based on
the dataset.
Parameters
----------
model: a Python object representing the model to train loosely
implementing the interface of models.model.Model.
dataset: a pylearn2.datasets.dataset.Dataset object used to draw
training data
"""
self.model = model
if self.batch_size is None:
self.batch_size = model.force_batch_size
else:
batch_size = self.batch_size
if hasattr(model, 'force_batch_size'):
if not (model.force_batch_size <= 0 or batch_size ==
model.force_batch_size):
raise ValueError("batch_size is %d but model.force_batch_size is %d" %
(batch_size, model.force_batch_size))
self.monitor = Monitor.get_monitor(model)
X = self.model.get_input_space().make_theano_batch()
self.topo = X.ndim != 2
Y = T.matrix()
if self.monitoring_dataset is not None:
if not self.monitoring_dataset.has_targets():
Y = None
self.monitor.add_dataset(dataset=self.monitoring_dataset,
mode="sequential",
batch_size=self.batch_size,
num_batches=self.monitoring_batches)
channels = model.get_monitoring_channels(X,Y)
if not isinstance(channels, dict):
raise TypeError("model.get_monitoring_channels must return a "
"dictionary, but it returned " + str(channels))
#TODO: currently only supports unsupervised costs, support supervised too
obj = self.cost(model,X)
self.monitor.add_channel('batch_gd_objective',ipt=X,val=obj)
for name in channels:
J = channels[name]
if isinstance(J, tuple):
assert len(J) == 2
J, prereqs = J
else:
prereqs = None
if Y is not None:
ipt = (X,Y)
else:
ipt = X
self.monitor.add_channel(name=name,
ipt=ipt,
val=J,
prereqs=prereqs)
obj = self.cost(model,X)
self.optimizer = BatchGradientDescent(
objective = obj,
params = model.get_params(),
param_constrainers = [ model.censor_updates ],
lr_scalers = model.get_lr_scalers(),
inputs = [ X ],
verbose = True,
max_iter = self.updates_per_batch)
self.first = True
self.bSetup = True
示例12: sharedX
_, model_path = sys.argv
from pylearn2.utils import serial
model = serial.load(model_path)
d = model.discriminator
import gc
del model
gc.collect()
from pylearn2.utils import sharedX
X = sharedX(d.get_input_space().get_origin_batch(1))
obj = -d.fprop(X).sum()
from pylearn2.optimization.batch_gradient_descent import BatchGradientDescent as BGD
import theano.tensor as T
def norm_constraint(updates):
assert X in updates
updates[X] = updates[X] / (1e-7 + T.sqrt(T.sqr(X).sum()))
opt = BGD(objective=obj, params=[X], param_constrainers=[norm_constraint], conjugate=True, reset_conjugate=False,
reset_alpha=False, line_search_mode='exhaustive', verbose=3, max_iter=20)
results = []
import numpy as np
rng = np.random.RandomState([1, 2, 3])
for i in xrange(10):
X.set_value(rng.randn(*X.get_value().shape).astype(X.dtype) / 10.)
opt.minimize()
Xv = X.dimshuffle(3, 1, 2, 0).eval()
results.append(Xv)
X = np.concatenate(results, axis=0)
from pylearn2.gui.patch_viewer import make_viewer
v = make_viewer(X)
v.show()
示例13: setup
#.........這裏部分代碼省略.........
new_f = lambda *args: f(mapping.flatten(args, return_tuple=True))
return new_f
obj_prereqs = [capture(f) for f in fixed_var_descr.on_load_batch]
if self.monitoring_dataset is not None:
self.monitor.setup(
dataset=self.monitoring_dataset,
cost=self.cost,
batch_size=self.batch_size,
num_batches=self.monitoring_batches,
obj_prereqs=obj_prereqs,
cost_monitoring_args=fixed_var_descr.fixed_vars)
# TODO : Why is this commented?
'''
channels = model.get_monitoring_channels(theano_args)
if not isinstance(channels, dict):
raise TypeError("model.get_monitoring_channels must return a "
"dictionary, but it returned " + str(channels))
channels.update(self.cost.get_monitoring_channels(model, theano_args, ** fixed_var_descr.fixed_vars))
for dataset_name in self.monitoring_dataset:
if dataset_name == '':
prefix = ''
else:
prefix = dataset_name + '_'
monitoring_dataset = self.monitoring_dataset[dataset_name]
self.monitor.add_dataset(dataset=monitoring_dataset,
mode="sequential",
batch_size=self.batch_size,
num_batches=self.monitoring_batches)
# The monitor compiles all channels for the same dataset into one function, and
# runs all prereqs before calling the function. So we only need to register the
# on_load_batch prereq once per monitoring dataset.
self.monitor.add_channel(prefix + 'objective',ipt=ipt,val=cost_value,
dataset = monitoring_dataset, prereqs = fixed_var_descr.on_load_batch)
for name in channels:
J = channels[name]
if isinstance(J, tuple):
assert len(J) == 2
J, prereqs = J
else:
prereqs = None
self.monitor.add_channel(name= prefix + name,
ipt=ipt,
val=J,
data_specs=data_specs,
dataset = monitoring_dataset,
prereqs=prereqs)
'''
params = model.get_params()
self.optimizer = BatchGradientDescent(
objective = cost_value,
gradients = grads,
gradient_updates = grad_updates,
params = params,
param_constrainers = [ model.censor_updates ],
lr_scalers = model.get_lr_scalers(),
inputs = theano_args,
verbose = self.verbose_optimization,
max_iter = self.updates_per_batch,
reset_alpha = self.reset_alpha,
conjugate = self.conjugate,
reset_conjugate = self.reset_conjugate,
min_init_alpha = self.min_init_alpha,
line_search_mode = self.line_search_mode,
theano_function_mode=self.theano_function_mode,
init_alpha=self.init_alpha)
# These monitoring channels keep track of shared variables,
# which do not need inputs nor data.
if self.monitoring_dataset is not None:
self.monitor.add_channel(
name='ave_step_size',
ipt=None,
val=self.optimizer.ave_step_size,
data_specs=(NullSpace(), ''),
dataset=self.monitoring_dataset.values()[0])
self.monitor.add_channel(
name='ave_grad_size',
ipt=None,
val=self.optimizer.ave_grad_size,
data_specs=(NullSpace(), ''),
dataset=self.monitoring_dataset.values()[0])
self.monitor.add_channel(
name='ave_grad_mult',
ipt=None,
val=self.optimizer.ave_grad_mult,
data_specs=(NullSpace(), ''),
dataset=self.monitoring_dataset.values()[0])
self.first = True
self.bSetup = True
示例14: setup
#.........這裏部分代碼省略.........
data_specs = self.cost.get_data_specs(model)
mapping = DataSpecsMapping(data_specs)
space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
# Build a flat tuple of Theano Variables, one for each space,
# named according to the sources.
theano_args = []
for space, source in safe_zip(space_tuple, source_tuple):
name = 'BGD_[%s]' % source
arg = space.make_theano_batch(name=name)
theano_args.append(arg)
theano_args = tuple(theano_args)
# Methods of `self.cost` need args to be passed in a format compatible
# with their data_specs
nested_args = mapping.nest(theano_args)
fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
self.on_load_batch = fixed_var_descr.on_load_batch
cost_value = self.cost.expr(model, nested_args,
** fixed_var_descr.fixed_vars)
grads, grad_updates = self.cost.get_gradients(
model, nested_args, ** fixed_var_descr.fixed_vars)
assert isinstance(grads, OrderedDict)
assert isinstance(grad_updates, OrderedDict)
if cost_value is None:
raise ValueError("BGD is incompatible with " + str(self.cost) +
" because it is intractable, but BGD uses the " +
"cost function value to do line searches.")
# obj_prereqs has to be a list of function f called with f(*data),
# where data is a data tuple coming from the iterator.
# this function enables capturing "mapping" and "f", while
# enabling the "*data" syntax
def capture(f, mapping=mapping):
new_f = lambda *args: f(mapping.flatten(args, return_tuple=True))
return new_f
obj_prereqs = [capture(f) for f in fixed_var_descr.on_load_batch]
if self.monitoring_dataset is not None:
if (self.monitoring_batch_size is None and
self.monitoring_batches is None):
self.monitoring_batch_size = self.batch_size
self.monitoring_batches = self.batches_per_iter
self.monitor.setup(
dataset=self.monitoring_dataset,
cost=self.cost,
batch_size=self.monitoring_batch_size,
num_batches=self.monitoring_batches,
obj_prereqs=obj_prereqs,
cost_monitoring_args=fixed_var_descr.fixed_vars)
params = model.get_params()
self.optimizer = BatchGradientDescent(
objective=cost_value,
gradients=grads,
gradient_updates=grad_updates,
params=params,
param_constrainers=[model.modify_updates],
lr_scalers=model.get_lr_scalers(),
inputs=theano_args,
verbose=self.verbose_optimization,
max_iter=self.updates_per_batch,
reset_alpha=self.reset_alpha,
conjugate=self.conjugate,
reset_conjugate=self.reset_conjugate,
min_init_alpha=self.min_init_alpha,
line_search_mode=self.line_search_mode,
theano_function_mode=self.theano_function_mode,
init_alpha=self.init_alpha)
# These monitoring channels keep track of shared variables,
# which do not need inputs nor data.
if self.monitoring_dataset is not None:
self.monitor.add_channel(
name='ave_step_size',
ipt=None,
val=self.optimizer.ave_step_size,
data_specs=(NullSpace(), ''),
dataset=first_value(self.monitoring_dataset))
self.monitor.add_channel(
name='ave_grad_size',
ipt=None,
val=self.optimizer.ave_grad_size,
data_specs=(NullSpace(), ''),
dataset=first_value(self.monitoring_dataset))
self.monitor.add_channel(
name='ave_grad_mult',
ipt=None,
val=self.optimizer.ave_grad_mult,
data_specs=(NullSpace(), ''),
dataset=first_value(self.monitoring_dataset))
self.first = True
self.bSetup = True
示例15: BGD
class BGD(TrainingAlgorithm):
"""Batch Gradient Descent training algorithm class"""
def __init__(self, cost=None, batch_size=None, batches_per_iter=None,
updates_per_batch=10, monitoring_batches=None,
monitoring_dataset=None, termination_criterion = None,
set_batch_size=False, reset_alpha=True, conjugate=False,
min_init_alpha=.001, reset_conjugate=True,
line_search_mode=None, verbose_optimization=False,
scale_step=1., theano_function_mode=None, init_alpha=None,
seed=None):
"""
Parameters
----------
cost : pylearn2.costs.Cost
A pylearn2 Cost, or None, in which case model.get_default_cost() \
will be used
batch_size : int
Like the SGD TrainingAlgorithm, this TrainingAlgorithm still \
iterates over minibatches of data. The difference is that this \
class uses partial line searches to choose the step size along \
each gradient direction, and can do repeated updates on the same \
batch. The assumption is that you use big enough minibatches with \
this algorithm that a large step size will generalize reasonably \
well to other minibatches. To implement true Batch Gradient \
Descent, set the batch_size to the total number of examples \
available. If batch_size is None, it will revert to the model's \
force_batch_size attribute.
batches_per_iter : int
WRITEME
updates_per_batch : int
Passed through to the optimization.BatchGradientDescent's \
`max_iters parameter`
monitoring_batches : WRITEME
monitoring_dataset: Dataset or dict
A Dataset or a dictionary mapping string dataset names to Datasets
termination_criterion : WRITEME
set_batch_size : bool
If True, BGD will attempt to override the model's \
`force_batch_size` attribute by calling set_batch_size on it.
reset_alpha : bool
Passed through to the optimization.BatchGradientDescent's \
`max_iters parameter`
conjugate : bool
Passed through to the optimization.BatchGradientDescent's \
`max_iters parameter`
min_init_alpha : float
WRITEME
reset_conjugate : bool
Passed through to the optimization.BatchGradientDescent's \
`max_iters parameter`
line_search_mode : WRITEME
verbose_optimization : bool
WRITEME
scale_step : float
WRITEME
theano_function_mode : WRITEME
init_alpha : WRITEME
seed : WRITEME
"""
self.__dict__.update(locals())
del self.self
if monitoring_dataset is None:
assert monitoring_batches == None
self._set_monitoring_dataset(monitoring_dataset)
self.bSetup = False
self.termination_criterion = termination_criterion
if seed is None:
seed = [2012, 10, 16]
self.rng = np.random.RandomState(seed)
def setup(self, model, dataset):
"""
Allows the training algorithm to do some preliminary configuration
*before* we actually start training the model. The dataset is provided
in case other derived training algorithms need to modify model based on
the dataset.
Parameters
----------
model : object
A Python object representing the model to train loosely \
implementing the interface of models.model.Model.
dataset : pylearn2.datasets.dataset.Dataset
Dataset object used to draw training data
"""
self.model = model
if self.cost is None:
self.cost = model.get_default_cost()
if self.batch_size is None:
self.batch_size = model.force_batch_size
else:
batch_size = self.batch_size
if self.set_batch_size:
#.........這裏部分代碼省略.........