本文整理汇总了Python中neon.models.Model.get_description方法的典型用法代码示例。如果您正苦于以下问题:Python Model.get_description方法的具体用法?Python Model.get_description怎么用?Python Model.get_description使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类neon.models.Model
的用法示例。
在下文中一共展示了Model.get_description方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DQNNeon
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import get_description [as 别名]
#.........这里部分代码省略.........
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
# feed-forward pass for poststates to get Q-values
self._prepare_network_input(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.output_shape, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# average maxpostq for stats
maxpostq_avg = maxpostq.mean()
# feed-forward pass for prestates
self._prepare_network_input(prestates)
preq = self.model.fprop(self.input, inference = False)
assert preq.shape == (self.output_shape, self.batch_size)
# make copy of prestate Q-values as targets
targets = preq.asnumpyarray()
# clip rewards between -1 and 1
rewards = np.clip(rewards, self.min_reward, self.max_reward)
# update Q-value targets for each state only at actions taken
for i, action in enumerate(actions):
if terminals[i]:
targets[action, i] = float(rewards[i])
else:
targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
# copy targets to GPU memory
self.targets.set(targets)
# calculate errors
errors = self.cost_func.get_errors(preq, self.targets)
assert errors.shape == (self.output_shape, self.batch_size)
# average error where there is a error (should be 1 in every row)
#TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
# clip errors
if self.clip_error:
self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
# calculate cost, just in case
cost = self.cost_func.get_cost(preq, self.targets)
assert cost.shape == (1,1)
# perform back-propagation of gradients
self.model.bprop(errors)
# perform optimization
self.optimizer.optimize(self.model.layers_to_optimize, epoch)
# increase number of weight updates (needed for target clone interval)
self.update_iterations += 1
if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
self._copy_theta()
_logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
# update statistics
if self.callback:
self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)
def get_Q(self, state):
""" Calculates the Q-values for one mini-batch.
Args:
state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).
Returns:
q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
"""
_logger.debug("State shape = %s" % str(state.shape))
# minibatch is full size, because Neon doesn't let change the minibatch size
# so we need to run 32 forward steps to get the one we actually want
self.dummy_batch[0] = state
states = self.dummy_batch
assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
# calculate Q-values for the states
self._prepare_network_input(states)
qvalues = self.model.fprop(self.input, inference = True)
assert qvalues.shape == (self.output_shape, self.batch_size)
_logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
return qvalues.asnumpyarray()[:,0]
def _copy_theta(self):
""" Copies the weights of the current network to the target network. """
_logger.debug("Copying weights")
pdict = self.model.get_description(get_weights=True, keep_states=True)
self.target_model.deserialize(pdict, load_states=True)
def save_weights(self, target_dir, epoch):
""" Saves the current network parameters to disk.
Args:
target_dir (str): Directory where the network parameters are stored for each episode.
epoch (int): Current epoch.
"""
filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
self.model.save_params(os.path.join(target_dir, filename))
def load_weights(self, source_file):
""" Loads the network parameters from a given file.
Args:
source_file (str): Complete path to a file with network parameters.
"""
self.model.load_params(source_file)
示例2: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import get_description [as 别名]
#.........这里部分代码省略.........
# The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
# The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
# This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
# The final hidden layer is fully-connected and consists of 512 rectifier units.
layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
# The output layer is a fully-connected linear layer with a single output for each valid action.
layers.append(Affine(nout=num_actions, init = init_norm))
return layers
def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states, axes = (1, 2, 3, 0))
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
# normalize network input between 0 and 1
self.be.divide(self.input, 255, self.input)
def train(self, minibatch, epoch):
# expand components of minibatch
prestates, actions, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 4
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
if self.target_steps and self.train_iterations % self.target_steps == 0:
# have to serialize also states for batch normalization to work
pdict = self.model.get_description(get_weights=True, keep_states=True)
self.target_model.deserialize(pdict, load_states=True)
# feed-forward pass for poststates to get Q-values
self._setInput(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# feed-forward pass for prestates
self._setInput(prestates)
preq = self.model.fprop(self.input, inference = False)
assert preq.shape == (self.num_actions, self.batch_size)
# make copy of prestate Q-values as targets
# It seems neccessary for cpu backend.
targets = preq.asnumpyarray().copy()
# clip rewards between -1 and 1
rewards = np.clip(rewards, self.min_reward, self.max_reward)
# update Q-value targets for actions taken
for i, action in enumerate(actions):
if terminals[i]:
targets[action, i] = float(rewards[i])
else:
targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
# copy targets to GPU memory
self.targets.set(targets)
示例3: ModelRunnerNeon
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import get_description [as 别名]
#.........这里部分代码省略.........
initializer = self.get_initializer(input_size = 7 * 7 * 64)
layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
initializer = self.get_initializer(input_size = 512)
layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
return layers
def clip_reward(self, reward):
if reward > self.args.clip_reward_high:
return self.args.clip_reward_high
elif reward < self.args.clip_reward_low:
return self.args.clip_reward_low
else:
return reward
def set_input(self, data):
if self.use_gpu_replay_mem:
self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
self.input[:] = self.input_uint8 / 255
else:
self.input.set(data.transpose(1, 2, 3, 0).copy())
self.be.divide(self.input, 255, self.input)
def predict(self, history_buffer):
self.set_input(history_buffer)
output = self.train_net.fprop(self.input, inference=True)
return output.T.asnumpyarray()[0]
def print_weights(self):
pass
def train(self, minibatch, replay_memory, learning_rate, debug):
if self.args.prioritized_replay == True:
prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
else:
prestates, actions, rewards, poststates, terminals = minibatch
# Get Q*(s, a) with targetNet
self.set_input(poststates)
post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
if self.args.double_dqn == True:
# Get Q*(s, a) with trainNet
post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
# Get Q(s, a) with trainNet
self.set_input(prestates)
pre_qvalue = self.train_net.fprop(self.input, inference=False)
label = pre_qvalue.asnumpyarray().copy()
for i in range(0, self.train_batch_size):
if self.args.clip_reward:
reward = self.clip_reward(rewards[i])
else:
reward = rewards[i]
if terminals[i]:
label[actions[i], i] = reward
else:
if self.args.double_dqn == True:
max_index = np.argmax(post_qvalue2[i])
label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
else:
label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])
# copy targets to GPU memory
self.targets.set(label)
delta = self.cost.get_errors(pre_qvalue, self.targets)
if self.args.prioritized_replay == True:
delta_value = delta.asnumpyarray()
for i in range(self.train_batch_size):
if debug:
print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i])
replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
delta.set(delta_value.copy())
if self.args.clip_loss:
self.be.clip(delta, -1.0, 1.0, out = delta)
self.train_net.bprop(delta)
self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)
def update_model(self):
# have to serialize also states for batch normalization to work
pdict = self.train_net.get_description(get_weights=True, keep_states=True)
self.target_net.deserialize(pdict, load_states=True)
#print ('Updated target model')
def finish_train(self):
self.running = False
def load(self, file_name):
self.train_net.load_params(file_name)
self.update_model()
def save(self, file_name):
self.train_net.save_params(file_name)