当前位置: 首页>>代码示例>>Python>>正文


Python GameState.update方法代码示例

本文整理汇总了Python中game_state.GameState.update方法的典型用法代码示例。如果您正苦于以下问题:Python GameState.update方法的具体用法?Python GameState.update怎么用?Python GameState.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在game_state.GameState的用法示例。


在下文中一共展示了GameState.update方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Game

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
class Game(object):

    def __init__(self):
        self.clock = pygame.time.Clock()
        self.display = Display()
        self.game_state = GameState()
        self.control_state = ControlState()

    def run(self):
        pygame.init()
        while True:
            self.control_state.update()
            self.game_state.update(self.control_state)
            self.display.update(self.game_state)
            self.clock.tick(60)
开发者ID:nimbusgo,项目名称:squares,代码行数:17,代码来源:game.py

示例2: read_dialog_script

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
def read_dialog_script(dialog):
    """
    Reads in JSON array of dialog boxes to be spoken
    """
    if not dialog:
        # Return if no dialog given (i.e. no intro script)
        return

    logging.debug(pprint.pformat(dialog))
    for line in dialog:
        char = Character.load_character(line['character'])
        speech_box(line['text'], speaker=char.name)
        if 'unlocks' in line:
            from game_state import GameState
            GameState.update(line['unlocks'])
开发者ID:GunnarHolwerda,项目名称:PythonStoryGameEngine,代码行数:17,代码来源:ui.py

示例3: test_process

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
    def test_process(self):
        game_state = GameState(0)

        before_s_t = np.array( game_state.s_t )

        for i in range(1000):
            bef1 = game_state.s_t[:,:, 1]
            bef2 = game_state.s_t[:,:, 2]
            bef3 = game_state.s_t[:,:, 3]

            game_state.process(1)
            game_state.update()

            aft0 = game_state.s_t[:,:, 0]
            aft1 = game_state.s_t[:,:, 1]
            aft2 = game_state.s_t[:,:, 2]

            # values should be shifted
            self.assertTrue( (bef1.flatten() == aft0.flatten()).all() )
            self.assertTrue( (bef2.flatten() == aft1.flatten()).all() )
            self.assertTrue( (bef3.flatten() == aft2.flatten()).all() )

            # all element should be less [0.0~1.0]
            self.assertTrue( np.less_equal(bef1, 1.0).all() )
            self.assertTrue( np.less_equal(bef2, 1.0).all() )
            self.assertTrue( np.less_equal(bef3, 1.0).all() )
            self.assertTrue( np.greater_equal(bef1, 0.0).all() )
            self.assertTrue( np.greater_equal(bef2, 0.0).all() )
            self.assertTrue( np.greater_equal(bef3, 0.0).all() )

            self.assertTrue( np.less_equal(aft0, 1.0).all() )
            self.assertTrue( np.less_equal(aft1, 1.0).all() )
            self.assertTrue( np.less_equal(aft2, 1.0).all() )
            self.assertTrue( np.greater_equal(aft0, 0.0).all() )
            self.assertTrue( np.greater_equal(aft1, 0.0).all() )
            self.assertTrue( np.greater_equal(aft2, 0.0).all() )
开发者ID:salsasepp,项目名称:ufcnn-keras,代码行数:38,代码来源:game_state_test.py

示例4: choose_action

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
    daily_pnl = 0

    #new
    if i > 0:
        game_state.environment.reset() 

    while not terminal:
        pi_values = global_network.run_policy(sess, game_state.s_t)

        action = choose_action(pi_values, use_argmax=True)
        game_state.process(action)

        reward = game_state.reward
        terminal = game_state.terminal

        game_state.update()

    game_state.environment.create_plot(game_state.environment.iday)
    daily_pnl = sum(t.pnl() for t in game_state.environment.trades)
    total_pnl += daily_pnl
    game_state.environment.daily_pnl = 0

    print("Day", i, ",Realized PnL:", daily_pnl)

print("Total Realized PnL:", total_pnl)


for i in range(testing_days):
    print("Potting day", i)
    
开发者ID:lukovkin,项目名称:ufcnn-keras,代码行数:31,代码来源:Tradingresults.py

示例5: main

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
def main():
    state = GameState()
    while True:
        state.update()
        time.sleep(DEBOUNCE)
开发者ID:lirien,项目名称:A_bears_phone,代码行数:7,代码来源:kumaden.py

示例6: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]

#.........这里部分代码省略.........
    states = []
    actions = []
    rewards = []
    values = []

    terminal_end = False

    # reset accumulated gradients
    sess.run( self.reset_gradients )

    # copy weights from shared to local
    sess.run( self.sync )

    start_local_t = self.local_t
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
      pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)      
      action = self.choose_action(pi_)

      states.append(self.game_state.s_t)
      actions.append(action)
      values.append(value_)

      if (self.thread_index == 0) and (self.local_t % 100) == 0:
        print "pi=", pi_
        print " V=", value_

      # process game
      self.game_state.process(action)

      # receive game result
      reward = self.game_state.reward
      terminal = self.game_state.terminal

      self.episode_reward += reward

      # clip reward
      rewards.append( np.clip(reward, -1, 1) )

      self.local_t += 1

      # s_t1 -> s_t
      self.game_state.update()
      
      if terminal:
        terminal_end = True
        print "score=", self.episode_reward

        self._record_score(sess, summary_writer, summary_op, score_input,
                           self.episode_reward, global_t)
          
        self.episode_reward = 0
        self.game_state.reset()
        break

    R = 0.0
    if not terminal_end:
      R = self.local_network.run_value(sess, self.game_state.s_t)

    actions.reverse()
    states.reverse()
    rewards.reverse()
    values.reverse()

    batch_si = []
    batch_a = []
    batch_td = []
    batch_R = []

    # compute and accmulate gradients
    for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
      R = ri + GAMMA * R
      td = R - Vi
      a = np.zeros([ACTION_SIZE])
      a[ai] = 1

      batch_si.append(si)
      batch_a.append(a)
      batch_td.append(td)
      batch_R.append(R)

    sess.run( self.accum_gradients,
              feed_dict = {
                self.local_network.s: batch_si,
                self.local_network.a: batch_a,
                self.local_network.td: batch_td,
                self.local_network.r: batch_R } )
      
    cur_learning_rate = self._anneal_learning_rate(global_t)

    sess.run( self.apply_gradients,
              feed_dict = { self.learning_rate_input: cur_learning_rate } )

    if (self.thread_index == 0) and (self.local_t % 100) == 0:
      print "TIMESTEP", self.local_t

    # return advanced local step size
    diff_local_t = self.local_t - start_local_t
    return diff_local_t
开发者ID:csdlrl,项目名称:async_deep_reinforce,代码行数:104,代码来源:a3c_training_thread.py

示例7: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]

#.........这里部分代码省略.........
    })
    summary_writer.add_summary(summary_str, global_t)
    
  def process(self, sess, global_t, summary_writer, summary_op):
    states = []
    actions = []
    rewards = []
    values = []

    terminal_end = False

    # 加算された勾配をリセット
    sess.run( self.policy_reset_gradients )
    sess.run( self.value_reset_gradients )

    # shared から localにweightをコピー
    sess.run( self.sync )

    start_local_t = self.local_t
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
      pi_ = self.local_network.run_policy(sess, self.game_state.s_t)
      action = self.choose_action(pi_)

      states.append(self.game_state.s_t)
      actions.append(action)
      value_ = self.local_network.run_value(sess, self.game_state.s_t)
      values.append(value_)

      if (self.thread_index == 0) and (self.local_t % 100) == 0:
        print "pi=", pi_
        print " V=", value_

      # gameを実行
      self.game_state.process(action)

      # 実行した結果
      reward = self.game_state.reward
      terminal = self.game_state.terminal

      self.episode_reward += reward

      rewards.append(reward)

      self.local_t += 1

      self.game_state.update()
      
      if terminal:
        terminal_end = True
        print "score=", self.episode_reward

        if self.thread_index == 0:        
          self._record_score(sess, summary_writer, summary_op, self.episode_reward, global_t)
          
        self.episode_reward = 0
        break

    R = 0.0
    if not terminal_end:
      R = self.local_network.run_value(sess, self.game_state.s_t)

    actions.reverse()
    states.reverse()
    rewards.reverse()
    values.reverse()

    # 勾配を算出して加算していく
    for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
      R = ri + GAMMA * R
      td = R - Vi
      a = np.zeros([ACTION_SIZE])
      a[ai] = 1

      sess.run( self.policy_accum_gradients,
                feed_dict = {
                    self.local_network.s: [si],
                    self.local_network.a: [a],
                    self.local_network.td: [td] } )
      
      sess.run( self.value_accum_gradients,
                feed_dict = {
                    self.local_network.s: [si],
                    self.local_network.r: [R] } )

    cur_learning_rate = self._anneal_learning_rate(global_t)

    sess.run( self.policy_apply_gradients,
              feed_dict = { self.learning_rate_input: cur_learning_rate } )
    # Learning rate for Critic is half of Actor's
    sess.run( self.value_apply_gradients,
              feed_dict = { self.learning_rate_input: cur_learning_rate * 0.5 } )

    if (self.thread_index == 0) and (self.local_t % 100) == 0:
      print "TIMESTEP", self.local_t

    # 進んだlocal step数を返す
    diff_local_t = self.local_t - start_local_t
    return diff_local_t
开发者ID:expressoman,项目名称:async_deep_reinforce,代码行数:104,代码来源:a3c_training_thread.py

示例8: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]

#.........这里部分代码省略.........
        if self.options.clear_history_on_death and (liveses[-2] > liveses[-1]):
          self.episode_states = []
          self.episode_actions = []
          self.episode_rewards = []
          self.episode_values = []
          self.episode_liveses = self.episode_liveses[-2:]
 
      self.local_t += 1

      if self.options.record_new_record_dir is not None \
         or self.options.record_new_room_dir is not None:
        screen = self.game_state.uncropped_screen
        if self.options.compress_frame:
          screen = lzma.compress(screen.tobytes(), preset=0)
        self.episode_screens.append(screen)

      # terminate if the play time is too long
      self.steps += 1
      if self.steps > self.options.max_play_steps:
        terminal = True

      # requirement for OpenAI Gym: --terminate-on-lives-lost=False
      # terminate if lives lost
      if self.terminate_on_lives_lost and (liveses[-2] > liveses[-1]):
        terminal = True

      # count no reward steps
      if self.game_state.reward == 0.0:
        self.no_reward_steps += 1
      else:
        self.no_reward_steps = 0

      # s_t1 -> s_t
      self.game_state.update()
      
      if self.local_t % self.options.score_log_interval == 0:
        elapsed_time = time.time() - self.start_time
        print("t={:6.0f},s={:9d},th={}:{}r={:3.0f}RM{:02d}| l={:.0f},v={:.5f},pr={:.5f}".format(
              elapsed_time, global_t, self.thread_index, self.indent,
              self.episode_reward, self.game_state.room_no,
              self.game_state.lives, value_, self.game_state.psc_reward))

      # if self.game_state.room_no != self.game_state.prev_room_no:
      #   elapsed_time = time.time() - self.start_time
      #   print("t={:6.0f},s={:9d},th={}:{}RM{:02d}>RM{:02d}| l={:.0f},v={:.5f},pr={:.5f}".format(
      #         elapsed_time, global_t, self.thread_index, self.indent, 
      #         self.game_state.prev_room_no, self.game_state.room_no,
      #         self.game_state.lives, value_, self.game_state.psc_reward))

      if self.tes > 0:
        if self.game_state.lives < self.episode_liveses[-2]:
          elapsed_time = time.time() - self.start_time
          print("t={:6.0f},s={:9d},th={}:{}l={:.0f}>{:.0f}RM{:02d}|".format(
                elapsed_time, global_t, self.thread_index, self.indent, 
                self.episode_liveses[-2], self.game_state.lives, self.game_state.room_no))

      # seperate steps after getting reward
      if self.game_state.reward > 0:
        if not terminal:
          break

      if terminal:
        terminal_end = True
        elapsed_time = time.time() - self.start_time
        end_mark = "end" if self.terminate_on_lives_lost else "END"
        print("t={:6.0f},s={:9d},th={}:{}r={:3.0f}@{}|".format(
开发者ID:Itsukara,项目名称:async_deep_reinforce,代码行数:70,代码来源:a3c_training_thread.py

示例9: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]

#.........这里部分代码省略.........

        # t_max times loop
        start_local_t = self.local_t
        terminal_end = False
        for i in range(LOCAL_T_MAX):
            pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
            action = choose_action(pi_)

            states.append(self.game_state.s_t)
            actions.append(action)
            values.append(value_)

            # Debug output for progress
            if (self.thread_index == 0) and (self.local_t % 100) == 0:
                print(('local_t = {:10}  pi = ' + '{:7.5f} ' * len(pi_) + ' V = {:8.4f} (thread {})').format(self.local_t,
                                                                                                             *pi_, value_, self.thread_index))

            # process game
            self.game_state.process(action)

            # receive game result
            reward = self.game_state.reward
            terminal = self.game_state.terminal

            self.episode_reward += reward

            # clip reward
            # TODO: Does this make sense?
            rewards.append(np.clip(reward, -1, 1))

            self.local_t += 1

            # s_t1 -> s_t
            self.game_state.update()

            if terminal:
                terminal_end = True
                print ("score=", self.episode_reward)

                self._record_score(
                    sess, summary_writer, summary_op, score_input,
                                   self.episode_reward, global_t)

                self.episode_reward = 0
                self.game_state.reset()
                if USE_LSTM:
                    self.local_network.reset_state()
                break

        # Compute and accmulate gradients

        R = 0.0 if terminal_end else self.local_network.run_value(sess, self.game_state.s_t)

        actions.reverse()
        states.reverse()
        rewards.reverse()
        values.reverse()

        # What is the meaning of these values?
        batch_si = []
        batch_a = []
        batch_td = []
        batch_R = []

        for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
            R = ri + GAMMA * R
开发者ID:salsasepp,项目名称:ufcnn-keras,代码行数:70,代码来源:a3c_training_thread.py

示例10: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]

#.........这里部分代码省略.........
    start_local_t = self.local_t

    if USE_LSTM:
      start_lstm_state = self.local_network.lstm_state_out
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
      pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
      action = self.choose_action(pi_)

      states.append(self.game_state.s_t)
      actions.append(action)
      values.append(value_)

      if (self.thread_index == 0) and (self.local_t % LOG_INTERVAL == 0):
        print("pi={}".format(pi_))
        print(" V={}".format(value_))

      # process game
      self.game_state.process(action)

      # receive game result
      reward = self.game_state.reward
      terminal = self.game_state.terminal

      self.episode_reward += reward

      # clip reward
      rewards.append( np.clip(reward, -1, 1) )

      self.local_t += 1

      # s_t1 -> s_t
      self.game_state.update()
      
      if terminal:
        terminal_end = True
        print("score={}".format(self.episode_reward))

        self._record_score(sess, summary_writer, summary_op, score_input,
                           self.episode_reward, global_t)
          
        self.episode_reward = 0
        self.game_state.reset()
        if USE_LSTM:
          self.local_network.reset_state()
        break

    R = 0.0
    if not terminal_end:
      R = self.local_network.run_value(sess, self.game_state.s_t)

    actions.reverse()
    states.reverse()
    rewards.reverse()
    values.reverse()

    batch_si = []
    batch_a = []
    batch_td = []
    batch_R = []

    # compute and accmulate gradients
    for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
      R = ri + GAMMA * R
      td = R - Vi
开发者ID:JorritvandenBerg,项目名称:async_deep_reinforce,代码行数:70,代码来源:a3c_training_thread.py


注:本文中的game_state.GameState.update方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。