本文整理汇总了Python中pylearn2.training_algorithms.sgd.SGD.train方法的典型用法代码示例。如果您正苦于以下问题:Python SGD.train方法的具体用法?Python SGD.train怎么用?Python SGD.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pylearn2.training_algorithms.sgd.SGD
的用法示例。
在下文中一共展示了SGD.train方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_algorithm
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def run_algorithm():
unsupported_modes = ['random_slice', 'random_uniform']
algorithm = SGD(learning_rate,
cost,
batch_size=batch_size,
train_iteration_mode=mode,
monitoring_dataset=None,
termination_criterion=termination_criterion,
update_callbacks=None,
init_momentum=None,
set_batch_size=False)
algorithm.setup(dataset=dataset, model=model)
raised = False
try:
algorithm.train(dataset)
except ValueError:
print mode
assert mode in unsupported_modes
raised = True
if mode in unsupported_modes:
assert raised
return True
return False
示例2: test_adadelta
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_adadelta():
"""
Make sure that learning_rule.AdaDelta obtains the same parameter values as
with a hand-crafted AdaDelta implementation, given a dummy model and
learning rate scaler for each parameter.
Reference:
"AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
decay = 0.95
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=AdaDelta(decay),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
state = {}
for param in model.get_params():
param_shape = param.get_value().shape
state[param] = {}
state[param]['g2'] = np.zeros(param_shape)
state[param]['dx2'] = np.zeros(param_shape)
def adadelta_manual(model, state):
inc = []
rval = []
for scale, param in izip(scales, model.get_params()):
pstate = state[param]
param_val = param.get_value()
# begin adadelta
pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val ** 2
rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
dx_t = -rms_dx_tm1 / rms_g_t * param_val
pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t ** 2
rval += [param_val + dx_t]
return rval
manual = adadelta_manual(model, state)
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in izip(manual, model.get_params()))
manual = adadelta_manual(model, state)
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param in
izip(manual, model.get_params()))
示例3: test_lr_scalers
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_lr_scalers():
"""
Tests that SGD respects Model.get_lr_scalers
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
scales = [.01, .02, .05, 1., 5.]
shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]
learning_rate = .001
class ModelWithScalers(Model):
def __init__(self):
super(ModelWithScalers, self).__init__()
self._params = [sharedX(np.zeros(shape)) for shape in shapes]
self.input_space = VectorSpace(1)
def __call__(self, X):
# Implemented only so that DummyCost would work
return X
def get_lr_scalers(self):
return dict(zip(self._params, scales))
model = ModelWithScalers()
dataset = ArangeDataset(1)
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=Momentum(.0),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
manual = [param.get_value() for param in model.get_params()]
manual = [param - learning_rate * scale for param, scale in
zip(manual, scales)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in zip(manual, model.get_params()))
manual = [param - learning_rate * scale
for param, scale
in zip(manual, scales)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in zip(manual, model.get_params()))
示例4: test_adagrad
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_adagrad():
"""
Make sure that learning_rule.AdaGrad obtains the same parameter values as
with a hand-crafted AdaGrad implementation, given a dummy model and
learning rate scaler for each parameter.
Reference:
"Adaptive subgradient methods for online learning and
stochastic optimization", Duchi J, Hazan E, Singer Y.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=AdaGrad(),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
state = {}
for param in model.get_params():
param_shape = param.get_value().shape
state[param] = {}
state[param]['sg2'] = np.zeros(param_shape)
def adagrad_manual(model, state):
rval = []
for scale, param in izip(scales, model.get_params()):
pstate = state[param]
param_val = param.get_value()
# begin adadelta
pstate['sg2'] += param_val ** 2
dx_t = - (scale * learning_rate
/ np.sqrt(pstate['sg2'])
* param_val)
rval += [param_val + dx_t]
return rval
manual = adagrad_manual(model, state)
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in izip(manual, model.get_params()))
manual = adagrad_manual(model, state)
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param in
izip(manual, model.get_params()))
示例5: test_rmsprop
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_rmsprop():
"""
Make sure that learning_rule.RMSProp obtains the same parameter values as
with a hand-crafted RMSProp implementation, given a dummy model and
learning rate scaler for each parameter.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
scales = [.01, .02, .05, 1., 5.]
shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
learning_rate = .001
decay = 0.90
max_scaling = 1e5
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=RMSProp(decay),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
state = {}
for param in model.get_params():
param_shape = param.get_value().shape
state[param] = {}
state[param]['g2'] = np.zeros(param_shape)
def rmsprop_manual(model, state):
inc = []
rval = []
epsilon = 1. / max_scaling
for scale, param in izip(scales, model.get_params()):
pstate = state[param]
param_val = param.get_value()
# begin rmsprop
pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val ** 2
rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
dx_t = - scale * learning_rate / rms_g_t * param_val
rval += [param_val + dx_t]
return rval
manual = rmsprop_manual(model, state)
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in izip(manual, model.get_params()))
示例6: test_sgd_sequential
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_sgd_sequential():
# tests that requesting train_iteration_mode = 'sequential'
# works
dim = 1
batch_size = 3
m = 5 * batch_size
dataset = ArangeDataset(m)
model = SoftmaxModel(dim)
learning_rate = 1e-3
batch_size = 5
visited = [False] * m
def visit(X):
assert X.shape[1] == 1
assert np.all(X[1:] == X[0:-1]+1)
start = int(X[0, 0])
if start > 0:
assert visited[start - 1]
for i in xrange(batch_size):
assert not visited[start+i]
visited[start+i] = 1
data_specs = (model.get_input_space(), model.get_input_source())
cost = CallbackCost(visit, data_specs)
# We need to include this so the test actually stops running at some point
termination_criterion = EpochCounter(5)
algorithm = SGD(learning_rate,
cost,
batch_size=batch_size,
train_iteration_mode='sequential',
monitoring_dataset=None,
termination_criterion=termination_criterion,
update_callbacks=None,
init_momentum=None,
set_batch_size=False)
algorithm.setup(dataset=dataset, model=model)
algorithm.train(dataset)
assert all(visited)
示例7: test_lr_scalers_momentum
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_lr_scalers_momentum():
"""
Tests that SGD respects Model.get_lr_scalers when using
momentum.
"""
cost = SumOfParams()
scales = [ .01, .02, .05, 1., 5. ]
shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]
learning_rate = .001
class ModelWithScalers(Model):
def __init__(self):
self._params = [sharedX(np.zeros(shape)) for shape in shapes]
self.input_space = VectorSpace(1)
def get_lr_scalers(self):
return dict(zip(self._params, scales))
model = ModelWithScalers()
dataset = ArangeDataset(1)
momentum = 0.5
sgd = SGD(cost=cost, learning_rate=learning_rate, init_momentum=momentum,
batch_size=1)
sgd.setup(model=model, dataset=dataset)
manual = [param.get_value() for param in model.get_params()]
inc = [ - learning_rate * scale for param, scale in
zip(manual, scales)]
manual = [param + i for param, i in zip(manual, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value()) for manual_param,
sgd_param in zip(manual, model.get_params()))
manual = [param - learning_rate * scale + i * momentum for param, scale, i in
zip(manual, scales, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value()) for manual_param,
sgd_param in zip(manual, model.get_params()))
示例8: test_nesterov_momentum
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_nesterov_momentum():
"""
Make sure that learning_rule.Momentum obtains the same parameter values as
with a hand-crafted sgd w/ momentum implementation, given a dummy model and
learning rate scaler for each parameter.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
momentum = 0.5
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=Momentum(momentum, nesterov_momentum=True),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
manual = [param.get_value() for param in model.get_params()]
vel = [-learning_rate * scale for scale in scales]
updates = [-learning_rate * scale + v * momentum
for scale, v in izip(scales, vel)]
manual = [param + update for param, update in izip(manual, updates)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in izip(manual, model.get_params()))
vel = [-learning_rate * scale + i * momentum
for scale, i in izip(scales, vel)]
updates = [-learning_rate * scale + v * momentum
for scale, v in izip(scales, vel)]
manual = [param + update for param, update in izip(manual, updates)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in izip(manual, model.get_params()))
示例9: test_lr_scalers_momentum
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_lr_scalers_momentum():
"""
Tests that SGD respects Model.get_lr_scalers when using
momentum.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
scales = [.01, .02, .05, 1., 5.]
shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
learning_rate = .001
momentum = 0.5
sgd = SGD(cost=cost,
learning_rate=learning_rate,
init_momentum=momentum,
batch_size=1)
sgd.setup(model=model, dataset=dataset)
manual = [param.get_value() for param in model.get_params()]
inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
manual = [param + i for param, i in zip(manual, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in zip(manual, model.get_params()))
manual = [param - learning_rate * scale + i * momentum
for param, scale, i in
zip(manual, scales, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param
in zip(manual, model.get_params()))
示例10: test_momentum
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
def test_momentum():
"""
Make sure that learning_rule.Momentum obtains the same parameter values as
with a hand-crafted sgd w/ momentum implementation, given a dummy model and
learning rate scaler for each parameter.
"""
# We include a cost other than SumOfParams so that data is actually
# queried from the training set, and the expected number of updates
# are applied.
cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
scales = [.01, .02, .05, 1., 5.]
shapes = [(1,), (9,), (8, 7), (6, 5, 4), (3, 2, 2, 2)]
model = DummyModel(shapes, lr_scalers=scales)
dataset = ArangeDataset(1)
learning_rate = .001
momentum = 0.5
sgd = SGD(cost=cost,
learning_rate=learning_rate,
learning_rule=Momentum(momentum),
batch_size=1)
sgd.setup(model=model, dataset=dataset)
manual = [param.get_value() for param in model.get_params()]
inc = [-learning_rate * scale for scale in scales]
manual = [param + i for param, i in izip(manual, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param in
izip(manual, model.get_params()))
manual = [param - learning_rate * scale + i * momentum
for param, scale, i in izip(manual, scales, inc)]
sgd.train(dataset=dataset)
assert all(np.allclose(manual_param, sgd_param.get_value())
for manual_param, sgd_param in
izip(manual, model.get_params()))
示例11: SequenceTaggerNetwork
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
class SequenceTaggerNetwork(Model):
def __init__(self, dataset, w2i, t2i, featurizer,
edim=None, hdims=None, fedim=None,
max_epochs=100, use_momentum=False, lr=.01, lr_lin_decay=None,
lr_scale=False, lr_monitor_decay=False,
valid_stop=False, reg_factors=None, dropout=False,
dropout_params=None, embedding_init=None,
embedded_model=None, monitor_train=True, plot_monitor=None,
num=False):
super(SequenceTaggerNetwork, self).__init__()
self.vocab_size = dataset.vocab_size
self.window_size = dataset.window_size
self.total_feats = dataset.total_feats
self.feat_num = dataset.feat_num
self.n_classes = dataset.n_classes
self.max_epochs = max_epochs
if edim is None:
edim = 50
if hdims is None:
hdims = [100]
if fedim is None:
fedim = 5
self.edim = edim
self.fedim = fedim
self.hdims = hdims
self.w2i = w2i
self.t2i = t2i
self.featurizer = featurizer
self._create_tagger()
A_value = numpy.random.uniform(low=-.1, high=.1,
size=(self.n_classes + 2,
self.n_classes))
self.A = sharedX(A_value, name='A')
self.use_momentum = use_momentum
self.lr = lr
self.lr_lin_decay = lr_lin_decay
self.lr_monitor_decay = lr_monitor_decay
self.lr_scale = lr_scale
self.valid_stop = valid_stop
self.reg_factors = reg_factors
self.close_cache = {}
self.dropout_params = dropout_params
self.dropout = dropout or self.dropout_params is not None
self.hdims = hdims
self.monitor_train = monitor_train
self.num = num
self.plot_monitor = plot_monitor
if embedding_init is not None:
self.set_embedding_weights(embedding_init)
def _create_tagger(self):
self.tagger = WordTaggerNetwork(
self.vocab_size, self.window_size, self.total_feats,
self.feat_num, self.hdims, self.edim, self.fedim, self.n_classes)
def _create_data_specs(self, dataset):
self.input_space = CompositeSpace([
dataset.data_specs[0].components[i]
for i in xrange(len(dataset.data_specs[0].components) - 1)])
self.output_space = dataset.data_specs[0].components[-1]
self.input_source = dataset.data_specs[1][:-1]
self.target_source = dataset.data_specs[1][-1]
def __getstate__(self):
d = {}
d['vocab_size'] = self.vocab_size
d['window_size'] = self.window_size
d['feat_num'] = self.feat_num
d['total_feats'] = self.total_feats
d['n_classes'] = self.n_classes
d['input_space'] = self.input_space
d['output_space'] = self.output_space
d['input_source'] = self.input_source
d['target_source'] = self.target_source
d['A'] = self.A
d['tagger'] = self.tagger
d['w2i'] = self.w2i
d['t2i'] = self.t2i
d['featurizer'] = self.featurizer
d['max_epochs'] = self.max_epochs
d['use_momentum'] = self.use_momentum
d['lr'] = self.lr
d['lr_lin_decay'] = self.lr_lin_decay
d['lr_monitor_decay'] = self.lr_monitor_decay
d['lr_scale'] = self.lr_scale
d['valid_stop'] = self.valid_stop
d['reg_factors'] = self.reg_factors
d['dropout'] = self.dropout
d['dropout_params'] = self.dropout_params
d['monitor_train'] = self.monitor_train
d['num'] = self.num
d['plot_monitor'] = self.plot_monitor
return d
def fprop(self, data):
tagger_out = self.tagger.fprop(data)
#.........这里部分代码省略.........
示例12: DenseDesignMatrix
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
import theano
import numpy as np
n = 200
p = 2
X = np.random.normal(0, 1, (n, p))
y = X[:,0]* X[:, 1] + np.random.normal(0, .1, n)
y.shape = (n, 1)
ds = DenseDesignMatrix(X=X, y=y)
hidden_layer = Sigmoid(layer_name='hidden', dim=10, irange=.1, init_bias=1.)
output_layer = Linear(dim=1, layer_name='y', irange=.1)
trainer = SGD(learning_rate=.05, batch_size=10,
termination_criterion=EpochCounter(200))
layers = [hidden_layer, output_layer]
ann = MLP(layers, nvis=2)
trainer.setup(ann, ds)
while True:
trainer.train(dataset=ds)
ann.monitor.report_epoch()
ann.monitor()
if not trainer.continue_learning(ann):
break
inputs = X
y_est = ann.fprop(theano.shared(inputs, name='inputs')).eval()
print(y_est.shape)
示例13: MLP
# 需要导入模块: from pylearn2.training_algorithms.sgd import SGD [as 别名]
# 或者: from pylearn2.training_algorithms.sgd.SGD import train [as 别名]
istdev = .05
)
layers = [layer0, layer1, layer3]
#layers = [layer0, layer2, layer3]
ann = MLP(layers, input_space=ishape)
t_algo = SGD(learning_rate = 1e-1,
batch_size = 100,
batches_per_iter = 1,
termination_criterion=EpochCounter(2)
)
ds = DataPylearn2([train_set_x,train_set_y],[48,48,1],7)
t_algo.setup(ann, ds)
while True:
t_algo.train(dataset=ds)
ann.monitor.report_epoch()
ann.monitor()
if not t_algo.continue_learning(ann):
break
# test: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/make_submission.py
ds2 = DataPylearn2([test_set_x,test_set_y],[48,48,1],-1)
m = ds2.X.shape[0]
batch_size = 100
extra = (batch_size - m % batch_size) % batch_size
assert (m + extra) % batch_size == 0
if extra > 0:
ds2.X = np.concatenate((ds2.X, np.zeros((extra, ds2.X.shape[1]),
dtype=ds2.X.dtype)), axis=0)
assert ds2.X.shape[0] % batch_size == 0