本文整理汇总了Python中simulator.Simulator.get_state方法的典型用法代码示例。如果您正苦于以下问题:Python Simulator.get_state方法的具体用法?Python Simulator.get_state怎么用?Python Simulator.get_state使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simulator.Simulator
的用法示例。
在下文中一共展示了Simulator.get_state方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run_episode
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def run_episode(self, simulator=None):
''' Run a single episode for a maximum number of steps. '''
if simulator == None:
simulator = Simulator()
state = simulator.get_state()
states = [state]
rewards = []
actions = []
end_ep = False
act = self.action_policy(state)
acts = [act]
while not end_ep:
action = self.policy(state, act)
new_state, reward, end_ep, steps = simulator.take_action(action)
new_act = self.action_policy(new_state)
delta = reward - self.state_quality(state, act)
if not end_ep:
delta += (self.gamma**steps) * self.state_quality(new_state, new_act)
self.tdiff += abs(delta)
self.steps += 1.0
state = new_state
states.append(state)
actions.append(action)
rewards.append(reward)
act = new_act
acts.append(act)
self.tdiffs.append(self.tdiff / self.steps)
self.episodes += 1
self.total += sum(rewards)
self.returns.append(sum(rewards))
return states, actions, rewards, acts
示例2: update
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def update(self):
''' Learn for a single episode. '''
simulator = Simulator()
state = simulator.get_state()
act = self.action_policy(state)
feat = self.action_features[act](state)
end_episode = False
traces = [
np.zeros((BASIS_COUNT,)),
np.zeros((BASIS_COUNT,)),
np.zeros((BASIS_COUNT,))]
while not end_episode:
action = self.policy(state, act)
state, reward, end_episode, _ = simulator.take_action(action)
new_act = self.action_policy(state)
new_feat = self.action_features[new_act](state)
delta = reward + self.gamma * self.action_weights[new_act].dot(new_feat) - self.action_weights[act].dot(feat)
for i in range(3):
traces[i] *= self.lmb * self.gamma
traces[act] += feat
for i in range(3):
self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
act = new_act
feat = new_feat
return [reward]
示例3: follow_action
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def follow_action(self, act):
''' Computes the expected return after taking action a. '''
sim = Simulator()
action = self.policy(sim.get_state(), act)
reward, end = sim.take_action(action)[1:3]
if end:
return reward
else:
rewards = self.run_episode(sim)[2]
return reward + self.gamma * self.discount(rewards)
示例4: compare_value_function
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def compare_value_function(self, runs):
''' Compares the value function to the expected rewards. '''
ret = 0.0
rets = [0]*self.action_count
quality = [0]*self.action_count
sim = Simulator()
state = sim.get_state()
vf0 = self.value_function(state)
for j in range(self.action_count):
quality[j] = self.state_quality(state, j)
for i in range(1, runs + 1):
ret += self.discount(self.run_episode()[2]) / runs
for j in range(self.action_count):
rets[j] += self.follow_action(j) / runs
print 'Step: ', formatd(i), 'V(s0): ', formatf(vf0), 'R: ', formatf(ret * runs / i)
print "V: ", formatf(vf0)
print "R:", formatf(ret)
print "Q:", [formatf(qual) for qual in quality]
print "RQ:", [formatf(retn) for retn in rets]
示例5: run_episode
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def run_episode(self, simulator = None):
''' Run a single episode for a maximum number of steps. '''
if simulator == None:
simulator = Simulator()
state = simulator.get_state()
states = [state]
rewards = []
actions = []
acts = []
end_ep = False
while not end_ep:
act = self.action_policy(state)
action = self.policy(state, act)
state, reward, end_ep, _ = simulator.take_action(action)
states.append(state)
actions.append(action)
rewards.append(reward)
acts.append(act)
return states, actions, rewards, acts
示例6: update
# 需要导入模块: from simulator import Simulator [as 别名]
# 或者: from simulator.Simulator import get_state [as 别名]
def update(self):
''' Learn for a single episode. '''
simulator = Simulator()
state = simulator.get_state()
act = self.action_policy(state)
feat = self.action_features[act](state)
end_episode = False
rewards = []
traces = []
for _ in range(self.action_count):
traces.append(np.zeros((BASIS_COUNT,)))
while not end_episode:
action = self.policy(state, act)
state, reward, end_episode, steps = simulator.take_action(action)
new_act = self.action_policy(state)
new_feat = self.action_features[new_act](state)
rewards.append(reward)
delta = reward - self.feat_quality(feat, act)
if not end_episode:
delta += (self.gamma)**steps * self.feat_quality(new_feat, new_act)
self.tdiff += abs(delta)
self.steps += 1.0
for i in range(self.action_count):
traces[i] *= self.lmb * self.gamma
traces[act] += feat
alpha_bound = self.gamma * traces[new_act].dot(new_feat) - traces[act].dot(feat)
self.alpha = min(self.alpha, 1.0 / abs(alpha_bound))
for i in range(self.action_count):
self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
act = new_act
feat = new_feat
self.episodes += 1
total_ret = sum(rewards)
self.total += total_ret
self.temperature *= self.cooling
self.returns.append(total_ret)
self.tdiffs.append(self.tdiff / self.steps)
av_ret = self.total / self.episodes
av_diff = self.tdiff / self.steps
print 'Step:', formatd(self.episodes), 'r:', formatf(total_ret), 'R:', formatf(av_ret), 'Delta:', formatf(av_diff)
return rewards