本文整理汇总了Python中neon.models.Model.load_params方法的典型用法代码示例。如果您正苦于以下问题:Python Model.load_params方法的具体用法?Python Model.load_params怎么用?Python Model.load_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类neon.models.Model
的用法示例。
在下文中一共展示了Model.load_params方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_model
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
def create_model(model_type, model_tree, freeze, dataset_dir, model_file, img_loader):
cost = GeneralizedCost(costfunc=CrossEntropyMulti())
if model_type == "alexnet":
opt = create_alexnet_opt()
layer_func = create_alexnet_layers
elif model_type == "vgg":
opt = create_vgg_opt()
layer_func = create_vgg_layers
else:
raise NotImplementedError(model_type + " has not been implemented")
if model_tree:
ctree = ClassTaxonomy("Aves", "taxonomy_dict.p", dataset_dir)
layers = created_branched(layer_func, ctree, img_loader)
model = TaxonomicBranchModel(layers=layers)
else:
layers = layer_func(img_loader.nclass)
model = Model(layers=layers)
if freeze > 0:
saved_model = Model(layers=layer_func(1000))
saved_model.load_params(model_file)
model.initialize(img_loader)
model.initialized = False
saved_lto = saved_model.layers.layers_to_optimize
model_lto = model.layers.layers_to_optimize
keep_length = len(saved_lto) - freeze * 2
for i in range(len(saved_lto))[:keep_length]:
model_lto[i].W[:] = saved_lto[i].W
model_lto[i].optimize = False
for i in range(len(model_lto))[keep_length:]:
model_lto[i].optimize = True
model.layers = FreezeSequential(layers)
model.layers_to_optimize = model.layers.layers_to_optimize
return model, cost, opt
示例2: RecurrentSum
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
hidden_size,
init_glorot,
activation=Tanh(),
gate_activation=Logistic(),
reset_cells=True),
RecurrentSum(),
Dropout(keep=0.5),
Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax())
]
# load the weights
print("Initialized the models - ")
model_new = Model(layers=layers)
print("Loading the weights from {0}".format(args.model_weights))
model_new.load_params(args.model_weights)
model_new.initialize(dataset=(sentence_length, batch_size))
# setup buffers before accepting reviews
xdev = be.zeros((sentence_length, 1), dtype=np.int32) # bsz is 1, feature size
xbuf = np.zeros((1, sentence_length), dtype=np.int32)
oov = 2
start = 1
index_from = 3
pad_char = 0
vocab, rev_vocab = pickle.load(open(args.vocab_file, 'rb'))
while True:
line = input('Enter a Review from testData.tsv file \n')
# clean the input
示例3: TopKMisclassification
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
valmetric = TopKMisclassification(k=5)
# dummy optimizer for benchmarking
# training implementation coming soon
opt_gdm = GradientDescentMomentum(0.0, 0.0)
opt_biases = GradientDescentMomentum(0.0, 0.0)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})
# setup cost function as CrossEntropy
cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
GeneralizedCost(costfunc=CrossEntropyMulti()),
GeneralizedCost(costfunc=CrossEntropyMulti())],
weights=[1, 0., 0.]) # We only want to consider the CE of the main path
assert os.path.exists(args.model_file), 'script requires the trained weights file'
model.load_params(args.model_file)
model.initialize(test, cost)
print 'running speed benchmark...'
model.benchmark(test, cost, opt)
print '\nCalculating performance on validation set...'
test.reset()
mets = model.eval(test, metric=valmetric)
print 'Validation set metrics:'
print 'LogLoss: %.2f, Accuracy: %.1f %% (Top-1), %.1f %% (Top-5)' % (mets[0],
(1.0-mets[1])*100,
(1.0-mets[2])*100)
示例4: MergeMultistream
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
Dropout(keep=0.5),
LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]
cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
# configure callbacks
checkpoint_model_path = "~/image_caption2.pickle"
if args.callback_args['save_path'] is None:
args.callback_args['save_path'] = checkpoint_model_path
if args.callback_args['serialize'] is None:
args.callback_args['serialize'] = 1
model = Model(layers=layers)
callbacks = Callbacks(model, train_set, **args.callback_args)
opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1)
# train model
model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
# load model (if exited) and evaluate bleu score on test set
model.load_params(checkpoint_model_path)
test_set = ImageCaptionTest(path=data_path)
sents, targets = test_set.predict(model)
test_set.bleu_score(sents, targets)
示例5: LSTM
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
seq_len = 1
if return_sequences is True:
layers = [
LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False),
Affine(train_set.nfeatures, init, bias=init, activation=Identity())
]
else:
layers = [
LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False),
RecurrentLast(),
Affine(train_set.nfeatures, init, bias=init, activation=Identity())
]
model_new = Model(layers=layers)
model_new.load_params(args.save_path)
model_new.initialize(dataset=(train_set.nfeatures, seq_len))
output = np.zeros((train_set.nfeatures, num_predict))
seed = time_series.train[:seed_seq_len]
x = model_new.be.empty((train_set.nfeatures, seq_len))
for s_in in seed:
x.set(s_in.reshape(train_set.nfeatures, seq_len))
y = model_new.fprop(x, inference=False)
for i in range(num_predict):
# Take last prediction and feed into next fprop
pred = y.get()[:, -1]
output[:, i] = pred
x[:] = pred.reshape(train_set.nfeatures, seq_len)
示例6: MergeMultistream
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
Dropout(keep=0.5),
LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]
cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
# configure callbacks
checkpoint_model_path = "~/image_caption2.pkl"
if args.callback_args['save_path'] is None:
args.callback_args['save_path'] = checkpoint_model_path
if args.callback_args['serialize'] is None:
args.callback_args['serialize'] = 1
model = Model(layers=layers)
callbacks = Callbacks(model, **args.callback_args)
opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1)
# train model
model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
# load model (if exited) and evaluate bleu score on test set
if os.path.exists(args.callback_args['save_path']):
model.load_params(args.callback_args['save_path'])
sents, targets = test_set.predict(model)
test_set.bleu_score(sents, targets)
示例7: len
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
param_file_name = home_dir + "/ubuntu/model/trained_bot_model_32x32.prm"
class_names = ["forward", "left", "right", "backward"] # from ROBOT-C bot.c
nclasses = len(class_names)
size = H, W
be = gen_backend(backend='cpu', batch_size=1) # NN backend
init_uni = Uniform(low=-0.1, high=0.1) # Unnecessary NN weight initialization
bn = True # enable NN batch normalization
layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn),
Pooling((2, 2)),
Conv((3, 3, 32), init=init_uni, activation=Rectlin(), batch_norm=bn),
Pooling((2, 2)),
Affine(nout=50, init=init_uni, activation=Rectlin(), batch_norm=bn),
Affine(nout=nclasses, init=init_uni, activation=Softmax())]
model = Model(layers=layers)
model.load_params(param_file_name, load_states=False)
def usage():
print "python connect_to_vex_cortex.py"
print " Raspberry Pi records video, commands from VEX Cortex 2.0"
print " -p " + file_name_prefix + ": file name prefix"
print " -d: display received commands for debug"
print " -w " + str(w) + ": video width"
print " -h " + str(h) + ": video height"
print " -f " + str(fps) + ": video FPS, 0 for camera default"
print " -q " + str(quality) + ": quality to record video, 1..40"
print " -b " + str(bitrate) + ": bitrate e.g. 15000000, 0 for unlimited"
print " -i " + str(iso) + ": ISO 0 | 100 ... 800, see picamera doc, 0 for camera default"
print " -m: horizontal mirror"
print " -v: vertical mirror"
print " -s: shut down system on exit (must run as super user)"
示例8: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
#.........这里部分代码省略.........
def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states, axes = (1, 2, 3, 0))
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
# normalize network input between 0 and 1
self.be.divide(self.input, 255, self.input)
def train(self, minibatch, epoch):
# expand components of minibatch
prestates, actions, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 4
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
if self.target_steps and self.train_iterations % self.target_steps == 0:
# have to serialize also states for batch normalization to work
pdict = self.model.get_description(get_weights=True, keep_states=True)
self.target_model.deserialize(pdict, load_states=True)
# feed-forward pass for poststates to get Q-values
self._setInput(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# feed-forward pass for prestates
self._setInput(prestates)
preq = self.model.fprop(self.input, inference = False)
assert preq.shape == (self.num_actions, self.batch_size)
# make copy of prestate Q-values as targets
# It seems neccessary for cpu backend.
targets = preq.asnumpyarray().copy()
# clip rewards between -1 and 1
rewards = np.clip(rewards, self.min_reward, self.max_reward)
# update Q-value targets for actions taken
for i, action in enumerate(actions):
if terminals[i]:
targets[action, i] = float(rewards[i])
else:
targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
# copy targets to GPU memory
self.targets.set(targets)
# calculate errors
deltas = self.cost.get_errors(preq, self.targets)
assert deltas.shape == (self.num_actions, self.batch_size)
#assert np.count_nonzero(deltas.asnumpyarray()) == 32
# calculate cost, just in case
cost = self.cost.get_cost(preq, self.targets)
assert cost.shape == (1,1)
# clip errors
if self.clip_error:
self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)
# perform back-propagation of gradients
self.model.bprop(deltas)
# perform optimization
self.optimizer.optimize(self.model.layers_to_optimize, epoch)
# increase number of weight updates (needed for target clone interval)
self.train_iterations += 1
# calculate statistics
if self.callback:
self.callback.on_train(cost[0,0])
def predict(self, states):
# minibatch is full size, because Neon doesn't let change the minibatch size
assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)
# calculate Q-values for the states
self._setInput(states)
qvalues = self.model.fprop(self.input, inference = True)
assert qvalues.shape == (self.num_actions, self.batch_size)
if logger.isEnabledFor(logging.DEBUG):
logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))
# transpose the result, so that batch size is first dimension
return qvalues.T.asnumpyarray()
def load_weights(self, load_path):
self.model.load_params(load_path)
def save_weights(self, save_path):
self.model.save_params(save_path)
示例9: test_model_serialize
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
def test_model_serialize(backend_default, data):
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)
train_set = ArrayIterator(
[X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
init_norm = Gaussian(loc=0.0, scale=0.01)
# initialize model
path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()),
Pooling(2),
Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()),
Dropout(keep=0.5),
Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
tmp_save = 'test_model_serialize_tmp_save.pickle'
mlp = Model(layers=layers)
mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp.initialize(train_set, cost=mlp.cost)
n_test = 3
num_epochs = 3
# Train model for num_epochs and n_test batches
for epoch in range(num_epochs):
for i, (x, t) in enumerate(train_set):
x = mlp.fprop(x)
delta = mlp.cost.get_errors(x, t)
mlp.bprop(delta)
mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
if i > n_test:
break
# Get expected outputs of n_test batches and states of all layers
outputs_exp = []
pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
for i, (x, t) in enumerate(train_set):
outputs_exp.append(mlp.fprop(x, inference=True))
if i > n_test:
break
# Serialize model
mlp.save_params(tmp_save, keep_states=True)
# Load model
mlp = Model(layers=layers)
mlp.load_params(tmp_save)
outputs = []
pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
for i, (x, t) in enumerate(train_set):
outputs.append(mlp.fprop(x, inference=True))
if i > n_test:
break
# Check outputs, states, and params are the same
for output, output_exp in zip(outputs, outputs_exp):
assert np.allclose(output.get(), output_exp.get())
for pd, pd_exp in zip(pdicts, pdicts_exp):
for s, s_e in zip(pd['states'], pd_exp['states']):
if isinstance(s, list): # this is the batch norm case
for _s, _s_e in zip(s, s_e):
assert np.allclose(_s, _s_e)
else:
assert np.allclose(s, s_e)
for p, p_e in zip(pd['params'], pd_exp['params']):
assert type(p) == type(p_e)
if isinstance(p, list): # this is the batch norm case
for _p, _p_e in zip(p, p_e):
assert np.allclose(_p, _p_e)
elif isinstance(p, np.ndarray):
assert np.allclose(p, p_e)
else:
assert p == p_e
os.remove(tmp_save)
示例10: DQNNeon
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
#.........这里部分代码省略.........
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
# feed-forward pass for poststates to get Q-values
self._prepare_network_input(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.output_shape, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# average maxpostq for stats
maxpostq_avg = maxpostq.mean()
# feed-forward pass for prestates
self._prepare_network_input(prestates)
preq = self.model.fprop(self.input, inference = False)
assert preq.shape == (self.output_shape, self.batch_size)
# make copy of prestate Q-values as targets
targets = preq.asnumpyarray()
# clip rewards between -1 and 1
rewards = np.clip(rewards, self.min_reward, self.max_reward)
# update Q-value targets for each state only at actions taken
for i, action in enumerate(actions):
if terminals[i]:
targets[action, i] = float(rewards[i])
else:
targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
# copy targets to GPU memory
self.targets.set(targets)
# calculate errors
errors = self.cost_func.get_errors(preq, self.targets)
assert errors.shape == (self.output_shape, self.batch_size)
# average error where there is a error (should be 1 in every row)
#TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
# clip errors
if self.clip_error:
self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
# calculate cost, just in case
cost = self.cost_func.get_cost(preq, self.targets)
assert cost.shape == (1,1)
# perform back-propagation of gradients
self.model.bprop(errors)
# perform optimization
self.optimizer.optimize(self.model.layers_to_optimize, epoch)
# increase number of weight updates (needed for target clone interval)
self.update_iterations += 1
if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
self._copy_theta()
_logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
# update statistics
if self.callback:
self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)
def get_Q(self, state):
""" Calculates the Q-values for one mini-batch.
Args:
state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).
Returns:
q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
"""
_logger.debug("State shape = %s" % str(state.shape))
# minibatch is full size, because Neon doesn't let change the minibatch size
# so we need to run 32 forward steps to get the one we actually want
self.dummy_batch[0] = state
states = self.dummy_batch
assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
# calculate Q-values for the states
self._prepare_network_input(states)
qvalues = self.model.fprop(self.input, inference = True)
assert qvalues.shape == (self.output_shape, self.batch_size)
_logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
return qvalues.asnumpyarray()[:,0]
def _copy_theta(self):
""" Copies the weights of the current network to the target network. """
_logger.debug("Copying weights")
pdict = self.model.get_description(get_weights=True, keep_states=True)
self.target_model.deserialize(pdict, load_states=True)
def save_weights(self, target_dir, epoch):
""" Saves the current network parameters to disk.
Args:
target_dir (str): Directory where the network parameters are stored for each episode.
epoch (int): Current epoch.
"""
filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
self.model.save_params(os.path.join(target_dir, filename))
def load_weights(self, source_file):
""" Loads the network parameters from a given file.
Args:
source_file (str): Complete path to a file with network parameters.
"""
self.model.load_params(source_file)
示例11: Uniform
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
from neon.layers import Conv, Affine, Pooling
from neon.initializers import Uniform
from neon.transforms.activation import Rectlin, Softmax
init_uni = Uniform(low=-0.1, high=0.1)
layers = [Conv(fshape=(5,5,16), init=init_uni, activation=Rectlin()),
Pooling(fshape=2, strides=2),
Conv(fshape=(5,5,32), init=init_uni, activation=Rectlin()),
Pooling(fshape=2, strides=2),
Affine(nout=500, init=init_uni, activation=Rectlin()),
Affine(nout=10, init=init_uni, activation=Softmax())]
print("Before running this script, run my_cifar_train.py to train a CIFAR10 model")
print("Loading pre-trained CIFAR10 model")
from neon.models import Model
model = Model(layers)
model.load_params("cifar10_model.prm", load_states=False)
classes =["airplane", "automobile", "bird", "cat", "deer",
"dog", "frog", "horse", "ship", "truck"]
nclass = len(classes)
# Sanity check 1
# an image of a frog from wikipedia
# image_source = "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Atelopus_zeteki1.jpg/440px-Atelopus_zeteki1.jpg"
# import urllib
# urllib.urlretrieve(image_source, filename="image.jpg")
# crop and resize to 32x32
from PIL import Image
import numpy as np
示例12: zip
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
# Now construct the network
from neon.layers import ColorNoise
#layers = [ColorNoise()]
# layers.append(Affine(nout=100, init=Kaiming(local=False), batch_norm=True, activation=Softmax()))
scales = [112, 128, 160, 240]
for scale in scales:
print scale
layers = []
layers += [Conv(**conv_params(7, 32, 2))]
for nfm, stride in zip(nfms, strides):
layers.append(module_factory(nfm, stride))
layers.append(Pooling(7, op='avg'))
layers.append(Conv(fshape=(1,1,100), init=Kaiming(local=True), batch_norm=True))
layers.append(Pooling(fshape='all', op='avg'))
layers.append(Activation(Softmax()))
model = Model(layers=layers)
test = ImageLoader(set_name='validation', shuffle=False, do_transforms=False, inner_size=scale,
scale_range=scale, repo_dir=args.data_dir)
model.load_params("/home/users/hunter/bigfeat_dropout.pkl")
softmaxes = model.get_outputs(test)
from neon.util.persist import save_obj
save_obj(softmaxes, "bigfeat_dropout_SM_{}.pkl".format(scale))
示例13: ModelRunnerNeon
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_params [as 别名]
#.........这里部分代码省略.........
initializer = self.get_initializer(input_size = 7 * 7 * 64)
layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
initializer = self.get_initializer(input_size = 512)
layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
return layers
def clip_reward(self, reward):
if reward > self.args.clip_reward_high:
return self.args.clip_reward_high
elif reward < self.args.clip_reward_low:
return self.args.clip_reward_low
else:
return reward
def set_input(self, data):
if self.use_gpu_replay_mem:
self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
self.input[:] = self.input_uint8 / 255
else:
self.input.set(data.transpose(1, 2, 3, 0).copy())
self.be.divide(self.input, 255, self.input)
def predict(self, history_buffer):
self.set_input(history_buffer)
output = self.train_net.fprop(self.input, inference=True)
return output.T.asnumpyarray()[0]
def print_weights(self):
pass
def train(self, minibatch, replay_memory, learning_rate, debug):
if self.args.prioritized_replay == True:
prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
else:
prestates, actions, rewards, poststates, terminals = minibatch
# Get Q*(s, a) with targetNet
self.set_input(poststates)
post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
if self.args.double_dqn == True:
# Get Q*(s, a) with trainNet
post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
# Get Q(s, a) with trainNet
self.set_input(prestates)
pre_qvalue = self.train_net.fprop(self.input, inference=False)
label = pre_qvalue.asnumpyarray().copy()
for i in range(0, self.train_batch_size):
if self.args.clip_reward:
reward = self.clip_reward(rewards[i])
else:
reward = rewards[i]
if terminals[i]:
label[actions[i], i] = reward
else:
if self.args.double_dqn == True:
max_index = np.argmax(post_qvalue2[i])
label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
else:
label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])
# copy targets to GPU memory
self.targets.set(label)
delta = self.cost.get_errors(pre_qvalue, self.targets)
if self.args.prioritized_replay == True:
delta_value = delta.asnumpyarray()
for i in range(self.train_batch_size):
if debug:
print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i])
replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
delta.set(delta_value.copy())
if self.args.clip_loss:
self.be.clip(delta, -1.0, 1.0, out = delta)
self.train_net.bprop(delta)
self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)
def update_model(self):
# have to serialize also states for batch normalization to work
pdict = self.train_net.get_description(get_weights=True, keep_states=True)
self.target_net.deserialize(pdict, load_states=True)
#print ('Updated target model')
def finish_train(self):
self.running = False
def load(self, file_name):
self.train_net.load_params(file_name)
self.update_model()
def save(self, file_name):
self.train_net.save_params(file_name)