本文整理汇总了Python中chainer.Variable.reshape方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.reshape方法的具体用法?Python Variable.reshape怎么用?Python Variable.reshape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类chainer.Variable
的用法示例。
在下文中一共展示了Variable.reshape方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: forward_one_step
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import reshape [as 别名]
def forward_one_step(self, state, action, reward, next_state, test=False):
xp = cuda.cupy if config.use_gpu else np
n_batch = state.shape[0]
state = Variable(state.reshape((n_batch, config.rl_history_length * 34)))
next_state = Variable(next_state.reshape((n_batch, config.rl_history_length * 34)))
if config.use_gpu:
state.to_gpu()
next_state.to_gpu()
q = self.compute_q_variable(state, test=test)
q_ = self.compute_q_variable(next_state, test=test)
max_action_indices = xp.argmax(q_.data, axis=1)
if config.use_gpu:
max_action_indices = cuda.to_cpu(max_action_indices)
target_q = self.compute_target_q_variable(next_state, test=test)
target = q.data.copy()
for i in xrange(n_batch):
max_action_index = max_action_indices[i]
target_value = reward[i] + config.rl_discount_factor * target_q.data[i][max_action_indices[i]]
action_index = self.get_index_for_action(action[i])
old_value = target[i, action_index]
diff = target_value - old_value
if diff > 1.0:
target_value = 1.0 + old_value
elif diff < -1.0:
target_value = -1.0 + old_value
target[i, action_index] = target_value
target = Variable(target)
loss = F.mean_squared_error(target, q)
return loss, q
示例2: Variable
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import reshape [as 别名]
action = None
action_q = q.copy()
action_q.reset_state()
while True:
if action is not None:
game.play(action)
pixmap = QPixmap.grabWindow(window_id, left, top, w, h)
image = pixmap.toImage()
bits = image.bits()
bits.setsize(image.byteCount())
screen = Image.fromarray(np.array(bits).reshape((h, w, 4))[:,:,2::-1])
reward, terminal = game.process(screen)
if reward is not None:
train_image = xp.asarray(screen.resize((train_width, train_height))).astype(np.float32).transpose((2, 0, 1))
train_image = Variable(train_image.reshape((1,) + train_image.shape) / 127.5 - 1, volatile=True)
score = action_q(train_image, train=False)
best = int(np.argmax(score.data))
action = game.randomize_action(best, random_probability)
print action, float(score.data[0][action]), best, float(score.data[0][best]), reward
index = frame % POOL_SIZE
state_pool[index] = cuda.to_cpu(train_image.data)
action_pool[index] = action
reward_pool[index - 1] = reward
average_reward = average_reward * 0.9999 + reward * 0.0001
print "average reward: ", average_reward
if terminal:
terminal_pool[index - 1] = 1
if only_result:
i = index - 2