本文整理汇总了Python中baselines.common.discount方法的典型用法代码示例。如果您正苦于以下问题:Python common.discount方法的具体用法?Python common.discount怎么用?Python common.discount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common
的用法示例。
在下文中一共展示了common.discount方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from baselines import common [as 别名]
# 或者: from baselines.common import discount [as 别名]
def run(self, update_counters=True):
ob = self.env.reset()
prev_ob = np.float32(np.zeros(ob.shape))
if self.obfilter: ob = self.obfilter(ob)
terminated = False
obs = []
acs = []
ac_dists = []
logps = []
rewards = []
for _ in range(self.max_pathlength):
if self.animate:
self.env.render()
state = np.concatenate([ob, prev_ob], -1)
obs.append(state)
ac, ac_dist, logp = self.policy.act(state)
acs.append(ac)
ac_dists.append(ac_dist)
logps.append(logp)
prev_ob = np.copy(ob)
scaled_ac = self.env.action_space.low + (ac + 1.) * 0.5 * (self.env.action_space.high - self.env.action_space.low)
scaled_ac = np.clip(scaled_ac, self.env.action_space.low, self.env.action_space.high)
ob, rew, done, _ = self.env.step(scaled_ac)
if self.obfilter: ob = self.obfilter(ob)
rewards.append(rew)
if done:
terminated = True
break
self.rewards.append(sum(rewards))
self.rewards = self.rewards[-100:]
if update_counters:
self._num_rollouts += 1
self._num_steps += len(rewards)
path = {"observation" : np.array(obs), "terminated" : terminated,
"reward" : np.array(rewards), "action" : np.array(acs),
"action_dist": np.array(ac_dists), "logp" : np.array(logps)}
rew_t = path["reward"]
value = self.policy.predict(path["observation"], path)
vtarg = common.discount(np.append(rew_t, 0.0 if path["terminated"] else value[-1]), self.gamma)[:-1]
vpred_t = np.append(value, 0.0 if path["terminated"] else value[-1])
delta_t = rew_t + self.gamma*vpred_t[1:] - vpred_t[:-1]
adv_GAE = common.discount(delta_t, self.gamma * self.lam)
if np.mean(self.rewards) >= self.score and not self.finished:
self.episodes_till_done = self._num_rollouts
self.frames_till_done = self._num_steps
self.finished = True
return path, vtarg, value, adv_GAE