当前位置: 首页>>代码示例>>Python>>正文


Python Simulator.take_action方法代码示例

本文整理汇总了Python中simulator.Simulator.take_action方法的典型用法代码示例。如果您正苦于以下问题:Python Simulator.take_action方法的具体用法?Python Simulator.take_action怎么用?Python Simulator.take_action使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在simulator.Simulator的用法示例。


在下文中一共展示了Simulator.take_action方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: update

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import take_action [as 别名]
 def update(self):
     ''' Learn for a single episode. '''
     simulator = Simulator()
     state = simulator.get_state()
     act = self.action_policy(state)
     feat = self.action_features[act](state)
     end_episode = False
     traces = [
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,))]
     while not end_episode:
         action = self.policy(state, act)
         state, reward, end_episode, _ = simulator.take_action(action)
         new_act = self.action_policy(state)
         new_feat = self.action_features[new_act](state)
         delta = reward + self.gamma * self.action_weights[new_act].dot(new_feat) - self.action_weights[act].dot(feat)
         for i in range(3):
             traces[i] *= self.lmb * self.gamma
         traces[act] += feat
         for i in range(3):
             self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
         act = new_act
         feat = new_feat
     return [reward]
开发者ID:WarwickMasson,项目名称:aaai-goal,代码行数:27,代码来源:learn.py

示例2: run_episode

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import take_action [as 别名]
 def run_episode(self, simulator=None):
     ''' Run a single episode for a maximum number of steps. '''
     if simulator == None:
         simulator = Simulator()
     state = simulator.get_state()
     states = [state]
     rewards = []
     actions = []
     end_ep = False
     act = self.action_policy(state)
     acts = [act]
     while not end_ep:
         action = self.policy(state, act)
         new_state, reward, end_ep, steps = simulator.take_action(action)
         new_act = self.action_policy(new_state)
         delta = reward - self.state_quality(state, act)
         if not end_ep:
             delta += (self.gamma**steps) * self.state_quality(new_state, new_act)
         self.tdiff += abs(delta)
         self.steps += 1.0
         state = new_state
         states.append(state)
         actions.append(action)
         rewards.append(reward)
         act = new_act
         acts.append(act)
     self.tdiffs.append(self.tdiff / self.steps)
     self.episodes += 1
     self.total += sum(rewards)
     self.returns.append(sum(rewards))
     return states, actions, rewards, acts
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:33,代码来源:learn.py

示例3: follow_action

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import take_action [as 别名]
 def follow_action(self, act):
     ''' Computes the expected return after taking action a. '''
     sim = Simulator()
     action = self.policy(sim.get_state(), act)
     reward, end = sim.take_action(action)[1:3]
     if end:
         return reward
     else:
         rewards = self.run_episode(sim)[2]
         return reward + self.gamma * self.discount(rewards)
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:12,代码来源:learn.py

示例4: run_episode

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import take_action [as 别名]
 def run_episode(self, simulator = None):
     ''' Run a single episode for a maximum number of steps. '''
     if simulator == None:
         simulator = Simulator()
     state = simulator.get_state()
     states = [state]
     rewards = []
     actions = []
     acts = []
     end_ep = False
     while not end_ep:
         act = self.action_policy(state)
         action = self.policy(state, act)
         state, reward, end_ep, _ = simulator.take_action(action)
         states.append(state)
         actions.append(action)
         rewards.append(reward)
         acts.append(act)
     return states, actions, rewards, acts
开发者ID:WarwickMasson,项目名称:aaai-goal,代码行数:21,代码来源:learn.py

示例5: update

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import take_action [as 别名]
 def update(self):
     ''' Learn for a single episode. '''
     simulator = Simulator()
     state = simulator.get_state()
     act = self.action_policy(state)
     feat = self.action_features[act](state)
     end_episode = False
     rewards = []
     traces = []
     for _ in range(self.action_count):
         traces.append(np.zeros((BASIS_COUNT,)))
     while not end_episode:
         action = self.policy(state, act)
         state, reward, end_episode, steps = simulator.take_action(action)
         new_act = self.action_policy(state)
         new_feat = self.action_features[new_act](state)
         rewards.append(reward)
         delta = reward - self.feat_quality(feat, act)
         if not end_episode:
             delta += (self.gamma)**steps * self.feat_quality(new_feat, new_act)
         self.tdiff += abs(delta)
         self.steps += 1.0
         for i in range(self.action_count):
             traces[i] *= self.lmb * self.gamma
         traces[act] += feat
         alpha_bound = self.gamma * traces[new_act].dot(new_feat) - traces[act].dot(feat)
         self.alpha = min(self.alpha, 1.0 / abs(alpha_bound))
         for i in range(self.action_count):
             self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
         act = new_act
         feat = new_feat
     self.episodes += 1
     total_ret = sum(rewards)
     self.total += total_ret
     self.temperature *= self.cooling
     self.returns.append(total_ret)
     self.tdiffs.append(self.tdiff / self.steps)
     av_ret = self.total / self.episodes
     av_diff = self.tdiff / self.steps
     print 'Step:', formatd(self.episodes), 'r:', formatf(total_ret), 'R:', formatf(av_ret), 'Delta:', formatf(av_diff)
     return rewards
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:43,代码来源:learn.py


注:本文中的simulator.Simulator.take_action方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。