本文整理汇总了Python中World.has_restarted方法的典型用法代码示例。如果您正苦于以下问题:Python World.has_restarted方法的具体用法?Python World.has_restarted怎么用?Python World.has_restarted使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类World
的用法示例。
在下文中一共展示了World.has_restarted方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
global discount
time.sleep(1)
alpha = 1
t = 1
while True:
# Pick the right action
s = World.player
max_act, max_val = max_Q(s)
(s, a, r, s2) = do_action(max_act)
# Update Q
max_act, max_val = max_Q(s2)
inc_Q(s, a, alpha, r + discount * max_val)
# Check if the game has restarted
t += 1.0
if World.has_restarted():
World.restart_game()
time.sleep(0.01)
t = 1.0
# Update the learning rate
alpha = pow(t, -0.1)
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.1)
示例2: run
# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
global discount
time.sleep(1)
alpha = 1
t = 1
episode_hist = []
while True:
# Pick the right action
s = World.player
max_act, max_val = max_Q(s)
chosen_act = policy(max_act)
(s, a, r, s2) = do_action(chosen_act)
episode_hist.append((s, a, r, s2))
# Update Q
max_act, max_val = max_Q(s2)
inc_Q(s, a, alpha, r + discount * max_val)
# Check if the game has restarted
t += 1.0
if World.has_restarted():
backPropagate(episode_hist, alpha)
World.restart_game()
time.sleep(0.01)
t = 1.0
episode_hist = []
# Update the learning rate
alpha = pow(t, -0.1)
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.05)
开发者ID:PhilippeMorere,项目名称:BasicReinforcementLearning,代码行数:34,代码来源:NewLearner_Q_with_eligibility_trace.py
示例3: run
# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
global gamma
global lambda_
global eligibilityTrace
global alpha
time.sleep(1)
t = 1
while True:
# Pick the right action
s = World.player
max_act, max_val = max_Q(s)
chosen_act = policy(max_act)
(s, a, r, s2) = do_action(chosen_act)
# Update Q
max_act, max_val = max_Q(s2)
policy_act = policy(max_act)
delta = r + gamma * Q[s2][policy_act] - Q[s][a]
eligibilityTrace[(s,a)] = 1.0
for (ss,aa), val in eligibilityTrace.iteritems():
inc_Q(ss, aa, alpha, delta * val)
if ss == s and aa != a:
eligibilityTrace[(ss,aa)] = 0.0
else:
eligibilityTrace[(ss,aa)] *= gamma * lambda_
# Check if the game has restarted
t += 1.0
if World.has_restarted():
eligibilityTrace = {}
World.restart_game()
time.sleep(0.01)
t = 1.0
# Update the learning rate
# alpha = pow(t, -0.1)
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.01)
示例4: run
# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
global discount
time.sleep(1)
alpha = 1.0
t = 1
act = actions[random.randint(0,len(actions)-1)]
while True:
# Pick the right action
s = World.player
(s, a, r, s2) = do_action(act)
max_act, max_val = max_Q(s2)
next_act = policy(max_act)
# Update Q
delta = r + discount * Q[s2][next_act] - Q[s][a]
e[s][a] += 1.0
for state in states:
for action in actions:
inc_Q(state, action, alpha * delta * e[state][action])
e[state][action] *= discount * lambda_
# Check if the game has restarted
t += 1.0
if World.has_restarted():
World.restart_game()
time.sleep(0.01)
t = 1.0
max_act, max_val = max_Q(World.player)
act = policy(max_act)
for state in states:
for action in actions:
e[state][action] = 0.0
# Update the learning rate
alpha = pow(t, -0.1)
act = next_act
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.02)