当前位置: 首页>>代码示例>>Python>>正文


Python World.has_restarted方法代码示例

本文整理汇总了Python中World.has_restarted方法的典型用法代码示例。如果您正苦于以下问题:Python World.has_restarted方法的具体用法?Python World.has_restarted怎么用?Python World.has_restarted使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在World的用法示例。


在下文中一共展示了World.has_restarted方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
    global discount
    time.sleep(1)
    alpha = 1
    t = 1
    while True:
        # Pick the right action
        s = World.player
        max_act, max_val = max_Q(s)
        (s, a, r, s2) = do_action(max_act)

        # Update Q
        max_act, max_val = max_Q(s2)
        inc_Q(s, a, alpha, r + discount * max_val)

        # Check if the game has restarted
        t += 1.0
        if World.has_restarted():
            World.restart_game()
            time.sleep(0.01)
            t = 1.0

        # Update the learning rate
        alpha = pow(t, -0.1)

        # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
        time.sleep(0.1)
开发者ID:cold-blue,项目名称:q_learning_demo,代码行数:29,代码来源:Learner.py

示例2: run

# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
    global discount
    time.sleep(1)
    alpha = 1
    t = 1
    episode_hist = []
    while True:
        # Pick the right action
        s = World.player
        max_act, max_val = max_Q(s)
        chosen_act = policy(max_act)
        (s, a, r, s2) = do_action(chosen_act)
        episode_hist.append((s, a, r, s2))

        # Update Q
        max_act, max_val = max_Q(s2)
        inc_Q(s, a, alpha, r + discount * max_val)

        # Check if the game has restarted
        t += 1.0
        if World.has_restarted():
            backPropagate(episode_hist, alpha)
            World.restart_game()
            time.sleep(0.01)
            t = 1.0
            episode_hist = []

        # Update the learning rate
        alpha = pow(t, -0.1)

        # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
        time.sleep(0.05)
开发者ID:PhilippeMorere,项目名称:BasicReinforcementLearning,代码行数:34,代码来源:NewLearner_Q_with_eligibility_trace.py

示例3: run

# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
    global gamma
    global lambda_
    global eligibilityTrace
    global alpha
    time.sleep(1)
    t = 1
    while True:
        # Pick the right action
        s = World.player
        max_act, max_val = max_Q(s)
        chosen_act = policy(max_act)
        (s, a, r, s2) = do_action(chosen_act)

        # Update Q
        max_act, max_val = max_Q(s2)
        policy_act = policy(max_act)
        delta = r + gamma * Q[s2][policy_act] - Q[s][a]
        eligibilityTrace[(s,a)] = 1.0
        
        for (ss,aa), val in eligibilityTrace.iteritems():
            inc_Q(ss, aa, alpha, delta * val)
            if ss == s and aa != a:
                eligibilityTrace[(ss,aa)] = 0.0
            else:
                eligibilityTrace[(ss,aa)] *= gamma * lambda_

        # Check if the game has restarted
        t += 1.0
        if World.has_restarted():
            eligibilityTrace = {}
            World.restart_game()
            time.sleep(0.01)
            t = 1.0

        # Update the learning rate
        # alpha = pow(t, -0.1)

        # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
        time.sleep(0.01)
开发者ID:PhilippeMorere,项目名称:BasicReinforcementLearning,代码行数:42,代码来源:SarsaLearner.py

示例4: run

# 需要导入模块: import World [as 别名]
# 或者: from World import has_restarted [as 别名]
def run():
    global discount
    time.sleep(1)
    alpha = 1.0
    t = 1
    act = actions[random.randint(0,len(actions)-1)] 
    while True:
        # Pick the right action
        s = World.player
        (s, a, r, s2) = do_action(act)
        max_act, max_val = max_Q(s2)
        next_act = policy(max_act)

        # Update Q
        delta = r + discount * Q[s2][next_act] - Q[s][a]
        e[s][a] += 1.0
        for state in states:
            for action in actions:
                inc_Q(state, action, alpha * delta * e[state][action])
                e[state][action] *= discount * lambda_

        # Check if the game has restarted
        t += 1.0
        if World.has_restarted():
            World.restart_game()
            time.sleep(0.01)
            t = 1.0
            max_act, max_val = max_Q(World.player)
            act = policy(max_act)
            for state in states:
                for action in actions:
                    e[state][action] = 0.0


        # Update the learning rate
        alpha = pow(t, -0.1)
        act = next_act

        # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
        time.sleep(0.02)
开发者ID:PhilippeMorere,项目名称:BasicReinforcementLearning,代码行数:42,代码来源:SarsaLambdaLearner.py


注:本文中的World.has_restarted方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。