当前位置: 首页>>代码示例>>Python>>正文


Python Simulator.get_state方法代码示例

本文整理汇总了Python中simulator.Simulator.get_state方法的典型用法代码示例。如果您正苦于以下问题:Python Simulator.get_state方法的具体用法?Python Simulator.get_state怎么用?Python Simulator.get_state使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在simulator.Simulator的用法示例。


在下文中一共展示了Simulator.get_state方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run_episode

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def run_episode(self, simulator=None):
     ''' Run a single episode for a maximum number of steps. '''
     if simulator == None:
         simulator = Simulator()
     state = simulator.get_state()
     states = [state]
     rewards = []
     actions = []
     end_ep = False
     act = self.action_policy(state)
     acts = [act]
     while not end_ep:
         action = self.policy(state, act)
         new_state, reward, end_ep, steps = simulator.take_action(action)
         new_act = self.action_policy(new_state)
         delta = reward - self.state_quality(state, act)
         if not end_ep:
             delta += (self.gamma**steps) * self.state_quality(new_state, new_act)
         self.tdiff += abs(delta)
         self.steps += 1.0
         state = new_state
         states.append(state)
         actions.append(action)
         rewards.append(reward)
         act = new_act
         acts.append(act)
     self.tdiffs.append(self.tdiff / self.steps)
     self.episodes += 1
     self.total += sum(rewards)
     self.returns.append(sum(rewards))
     return states, actions, rewards, acts
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:33,代码来源:learn.py

示例2: update

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def update(self):
     ''' Learn for a single episode. '''
     simulator = Simulator()
     state = simulator.get_state()
     act = self.action_policy(state)
     feat = self.action_features[act](state)
     end_episode = False
     traces = [
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,))]
     while not end_episode:
         action = self.policy(state, act)
         state, reward, end_episode, _ = simulator.take_action(action)
         new_act = self.action_policy(state)
         new_feat = self.action_features[new_act](state)
         delta = reward + self.gamma * self.action_weights[new_act].dot(new_feat) - self.action_weights[act].dot(feat)
         for i in range(3):
             traces[i] *= self.lmb * self.gamma
         traces[act] += feat
         for i in range(3):
             self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
         act = new_act
         feat = new_feat
     return [reward]
开发者ID:WarwickMasson,项目名称:aaai-goal,代码行数:27,代码来源:learn.py

示例3: follow_action

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def follow_action(self, act):
     ''' Computes the expected return after taking action a. '''
     sim = Simulator()
     action = self.policy(sim.get_state(), act)
     reward, end = sim.take_action(action)[1:3]
     if end:
         return reward
     else:
         rewards = self.run_episode(sim)[2]
         return reward + self.gamma * self.discount(rewards)
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:12,代码来源:learn.py

示例4: compare_value_function

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def compare_value_function(self, runs):
     ''' Compares the value function to the expected rewards. '''
     ret = 0.0
     rets = [0]*self.action_count
     quality = [0]*self.action_count
     sim = Simulator()
     state = sim.get_state()
     vf0 = self.value_function(state)
     for j in range(self.action_count):
         quality[j] = self.state_quality(state, j)
     for i in range(1, runs + 1):
         ret += self.discount(self.run_episode()[2]) / runs
         for j in range(self.action_count):
             rets[j] += self.follow_action(j) / runs
         print 'Step: ', formatd(i), 'V(s0): ', formatf(vf0), 'R: ', formatf(ret * runs / i)
     print "V: ", formatf(vf0)
     print "R:", formatf(ret)
     print "Q:", [formatf(qual) for qual in quality]
     print "RQ:", [formatf(retn) for retn in rets]
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:21,代码来源:learn.py

示例5: run_episode

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def run_episode(self, simulator = None):
     ''' Run a single episode for a maximum number of steps. '''
     if simulator == None:
         simulator = Simulator()
     state = simulator.get_state()
     states = [state]
     rewards = []
     actions = []
     acts = []
     end_ep = False
     while not end_ep:
         act = self.action_policy(state)
         action = self.policy(state, act)
         state, reward, end_ep, _ = simulator.take_action(action)
         states.append(state)
         actions.append(action)
         rewards.append(reward)
         acts.append(act)
     return states, actions, rewards, acts
开发者ID:WarwickMasson,项目名称:aaai-goal,代码行数:21,代码来源:learn.py

示例6: update

# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
 def update(self):
     ''' Learn for a single episode. '''
     simulator = Simulator()
     state = simulator.get_state()
     act = self.action_policy(state)
     feat = self.action_features[act](state)
     end_episode = False
     rewards = []
     traces = []
     for _ in range(self.action_count):
         traces.append(np.zeros((BASIS_COUNT,)))
     while not end_episode:
         action = self.policy(state, act)
         state, reward, end_episode, steps = simulator.take_action(action)
         new_act = self.action_policy(state)
         new_feat = self.action_features[new_act](state)
         rewards.append(reward)
         delta = reward - self.feat_quality(feat, act)
         if not end_episode:
             delta += (self.gamma)**steps * self.feat_quality(new_feat, new_act)
         self.tdiff += abs(delta)
         self.steps += 1.0
         for i in range(self.action_count):
             traces[i] *= self.lmb * self.gamma
         traces[act] += feat
         alpha_bound = self.gamma * traces[new_act].dot(new_feat) - traces[act].dot(feat)
         self.alpha = min(self.alpha, 1.0 / abs(alpha_bound))
         for i in range(self.action_count):
             self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
         act = new_act
         feat = new_feat
     self.episodes += 1
     total_ret = sum(rewards)
     self.total += total_ret
     self.temperature *= self.cooling
     self.returns.append(total_ret)
     self.tdiffs.append(self.tdiff / self.steps)
     av_ret = self.total / self.episodes
     av_diff = self.tdiff / self.steps
     print 'Step:', formatd(self.episodes), 'r:', formatf(total_ret), 'R:', formatf(av_ret), 'Delta:', formatf(av_diff)
     return rewards
开发者ID:WarwickMasson,项目名称:aaai-platformer,代码行数:43,代码来源:learn.py


注:本文中的simulator.Simulator.get_state方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。