当前位置: 首页>>代码示例>>Python>>正文


Python GameState.process方法代码示例

本文整理汇总了Python中game_state.GameState.process方法的典型用法代码示例。如果您正苦于以下问题:Python GameState.process方法的具体用法?Python GameState.process怎么用?Python GameState.process使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在game_state.GameState的用法示例。


在下文中一共展示了GameState.process方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_process

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
    def test_process(self):
        game_state = GameState(0)

        before_s_t = np.array( game_state.s_t )

        for i in range(1000):
            bef1 = game_state.s_t[:,:, 1]
            bef2 = game_state.s_t[:,:, 2]
            bef3 = game_state.s_t[:,:, 3]

            game_state.process(1)
            game_state.update()

            aft0 = game_state.s_t[:,:, 0]
            aft1 = game_state.s_t[:,:, 1]
            aft2 = game_state.s_t[:,:, 2]

            # values should be shifted
            self.assertTrue( (bef1.flatten() == aft0.flatten()).all() )
            self.assertTrue( (bef2.flatten() == aft1.flatten()).all() )
            self.assertTrue( (bef3.flatten() == aft2.flatten()).all() )

            # all element should be less [0.0~1.0]
            self.assertTrue( np.less_equal(bef1, 1.0).all() )
            self.assertTrue( np.less_equal(bef2, 1.0).all() )
            self.assertTrue( np.less_equal(bef3, 1.0).all() )
            self.assertTrue( np.greater_equal(bef1, 0.0).all() )
            self.assertTrue( np.greater_equal(bef2, 0.0).all() )
            self.assertTrue( np.greater_equal(bef3, 0.0).all() )

            self.assertTrue( np.less_equal(aft0, 1.0).all() )
            self.assertTrue( np.less_equal(aft1, 1.0).all() )
            self.assertTrue( np.less_equal(aft2, 1.0).all() )
            self.assertTrue( np.greater_equal(aft0, 0.0).all() )
            self.assertTrue( np.greater_equal(aft1, 0.0).all() )
            self.assertTrue( np.greater_equal(aft2, 0.0).all() )
开发者ID:salsasepp,项目名称:ufcnn-keras,代码行数:38,代码来源:game_state_test.py

示例2: range

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
total_pnl = 0

for i in range(testing_days):
    print("Working on day ",i)
    terminal = False
    daily_pnl = 0

    #new
    if i > 0:
        game_state.environment.reset() 

    while not terminal:
        pi_values = global_network.run_policy(sess, game_state.s_t)

        action = choose_action(pi_values, use_argmax=True)
        game_state.process(action)

        reward = game_state.reward
        terminal = game_state.terminal

        game_state.update()

    game_state.environment.create_plot(game_state.environment.iday)
    daily_pnl = sum(t.pnl() for t in game_state.environment.trades)
    total_pnl += daily_pnl
    game_state.environment.daily_pnl = 0

    print("Day", i, ",Realized PnL:", daily_pnl)

print("Total Realized PnL:", total_pnl)
开发者ID:lukovkin,项目名称:ufcnn-keras,代码行数:32,代码来源:Tradingresults.py

示例3: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
  def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    self.local_network = GameACNetwork(ACTION_SIZE, device)
    self.local_network.prepare_loss(ENTROPY_BETA)

    # TODO: don't need accum trainer anymore with batch
    self.trainer = AccumTrainer(device)
    self.trainer.prepare_minimize( self.local_network.total_loss,
                                   self.local_network.get_vars() )
    
    self.accum_gradients = self.trainer.accumulate_gradients()
    self.reset_gradients = self.trainer.reset_gradients()
  
    self.apply_gradients = grad_applier.apply_gradients(
      global_network.get_vars(),
      self.trainer.get_accum_grad_list() )

    self.sync = self.local_network.sync_from(global_network)
    
    self.game_state = GameState(113 * thread_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0


  def _anneal_learning_rate(self, global_time_step):
    learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
    if learning_rate < 0.0:
      learning_rate = 0.0
    return learning_rate

  def choose_action(self, pi_values):
    values = []
    sum = 0.0
    for rate in pi_values:
      sum = sum + rate
      value = sum
      values.append(value)
    
    r = random.random() * sum
    for i in range(len(values)):
      if values[i] >= r:
        return i;
    #fail safe
    return len(values)-1

  def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
    summary_str = sess.run(summary_op, feed_dict={
      score_input: score
    })
    summary_writer.add_summary(summary_str, global_t)
    
  def process(self, sess, global_t, summary_writer, summary_op, score_input):
    states = []
    actions = []
    rewards = []
    values = []

    terminal_end = False

    # reset accumulated gradients
    sess.run( self.reset_gradients )

    # copy weights from shared to local
    sess.run( self.sync )

    start_local_t = self.local_t
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
      pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)      
      action = self.choose_action(pi_)

      states.append(self.game_state.s_t)
      actions.append(action)
      values.append(value_)

      if (self.thread_index == 0) and (self.local_t % 100) == 0:
        print "pi=", pi_
        print " V=", value_

      # process game
      self.game_state.process(action)

      # receive game result
#.........这里部分代码省略.........
开发者ID:csdlrl,项目名称:async_deep_reinforce,代码行数:103,代码来源:a3c_training_thread.py

示例4: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
  def __init__(self, thread_index, global_network, initial_learning_rate,
               learning_rate_input,
               policy_applier, value_applier,
               max_global_time_step):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    self.local_network = GameACNetwork(ACTION_SIZE)
    self.local_network.prepare_loss(ENTROPY_BETA)

    # policy
    self.policy_trainer = AccumTrainer()
    self.policy_trainer.prepare_minimize( self.local_network.policy_loss,
                                          self.local_network.get_policy_vars(),
                                          GRAD_NORM_CLIP )
    
    self.policy_accum_gradients = self.policy_trainer.accumulate_gradients()
    self.policy_reset_gradients = self.policy_trainer.reset_gradients()
  
    self.policy_apply_gradients = policy_applier.apply_gradients(
        global_network.get_policy_vars(),
        self.policy_trainer.get_accum_grad_list() )

    # value
    self.value_trainer = AccumTrainer()
    self.value_trainer.prepare_minimize( self.local_network.value_loss,
                                         self.local_network.get_value_vars(),
                                         GRAD_NORM_CLIP )
    self.value_accum_gradients = self.value_trainer.accumulate_gradients()
    self.value_reset_gradients = self.value_trainer.reset_gradients()
  

    self.value_apply_gradients = value_applier.apply_gradients(
        global_network.get_value_vars(),
        self.value_trainer.get_accum_grad_list() )
    
    self.sync = self.local_network.sync_from(global_network)
    
    self.game_state = GameState(113 * thread_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0

    # thread0 will record score for TensorBoard
    if self.thread_index == 0:
      self.score_input = tf.placeholder(tf.int32)
      tf.scalar_summary("score", self.score_input)

  def _anneal_learning_rate(self, global_time_step):
    learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
    if learning_rate < 0.0:
      learning_rate = 0.0
    return learning_rate

  def choose_action(self, pi_values):
    values = []
    sum = 0.0
    for rate in pi_values:
      sum = sum + rate
      value = sum
      values.append(value)
    
    r = random.random() * sum
    for i in range(len(values)):
      if values[i] >= r:
        return i;
    #fail safe
    return len(values)-1

  def _record_score(self, sess, summary_writer, summary_op, score, global_t):
    summary_str = sess.run(summary_op, feed_dict={
      self.score_input: score
    })
    summary_writer.add_summary(summary_str, global_t)
    
  def process(self, sess, global_t, summary_writer, summary_op):
    states = []
    actions = []
    rewards = []
    values = []

    terminal_end = False

    # 加算された勾配をリセット
    sess.run( self.policy_reset_gradients )
    sess.run( self.value_reset_gradients )

    # shared から localにweightをコピー
    sess.run( self.sync )

    start_local_t = self.local_t
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
#.........这里部分代码省略.........
开发者ID:expressoman,项目名称:async_deep_reinforce,代码行数:103,代码来源:a3c_training_thread.py

示例5: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]

#.........这里部分代码省略.........
    r = random.random()
    if r < self.greediness:
      action =  int(r * len(pi_values))
    elif r < self.repeat_action_ratio:
      action = self.prev_action
    else:
      # Increase randomness of choice if no reward term is too long
      if self.no_reward_steps > self.options.no_reward_steps:
        randomness = (self.no_reward_steps - self.options.no_reward_steps) * self.options.randomness
        pi_values += randomness
        pi_values /= sum(pi_values)
        if self.local_t % self.options.randomness_log_interval == 0:
          elapsed_time = time.time() - self.start_time
          print("t={:6.0f},s={:9d},th={}:{}randomness={:.8f}".format(
                elapsed_time, global_t, self.thread_index, self.indent, randomness))

      pi_values -= np.finfo(np.float32).epsneg
      action_samples = np.random.multinomial(self.options.num_experiments, pi_values)
      action = action_samples.argmax(0)

    self.prev_action = action
    return action

  def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
    summary_str = sess.run(summary_op, feed_dict={
      score_input: score
    })
    summary_writer.add_summary(summary_str, global_t)
    
  def set_start_time(self, start_time):
    self.start_time = start_time

  #@profile
  def process(self, sess, global_t, summary_writer, summary_op, score_input):
    states = []
    actions = []
    rewards = []
    values = []
    liveses = [self.game_state.lives]
    if self.tes > 0:
      if self.episode_liveses == []:
        self.episode_liveses.append(self.game_state.lives)

    terminal_end = False

    # reset accumulated gradients
    sess.run( self.reset_gradients )

    # copy weights from shared to local
    sess.run( self.sync )

    start_local_t = self.local_t

    if self.options.use_lstm:
      start_lstm_state = self.local_network.lstm_state_out
    
    # t_max times loop
    for i in range(self.options.local_t_max):
      pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
      action = self.choose_action(pi_, global_t)

      states.append(self.game_state.s_t)
      actions.append(action)
      values.append(value_)
      liveses.append(self.game_state.lives)
开发者ID:Itsukara,项目名称:async_deep_reinforce,代码行数:69,代码来源:a3c_training_thread.py

示例6: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):

    def __init__(self,
                 thread_index,
                 global_network,
                 initial_learning_rate,
                 learning_rate_input,
                 grad_applier,
                 max_global_time_step,
                 device):

        self.thread_index = thread_index
        self.learning_rate_input = learning_rate_input
        self.max_global_time_step = max_global_time_step

        if USE_LSTM:
            self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
        else:
            self.local_network = GameACFFNetwork(ACTION_SIZE, device)

        self.local_network.prepare_loss(ENTROPY_BETA)

        # TODO: don't need accum trainer anymore with batch
        self.trainer = AccumTrainer(device)
        self.trainer.prepare_minimize(self.local_network.total_loss,
                                      self.local_network.get_vars())

        self.accum_gradients = self.trainer.accumulate_gradients()
        self.reset_gradients = self.trainer.reset_gradients()

        self.apply_gradients = grad_applier.apply_gradients(
            global_network.get_vars(),
          self.trainer.get_accum_grad_list())

        self.sync = self.local_network.sync_from(global_network)

        self.game_state = GameState(113 * thread_index)

        self.local_t = 0

        self.initial_learning_rate = initial_learning_rate

        self.episode_reward = 0

    def _anneal_learning_rate(self, global_time_step):
        learning_rate = self.initial_learning_rate * \
            (self.max_global_time_step - global_time_step) / \
             self.max_global_time_step
        assert learning_rate > 0, 'Learning rate {} is not >0'.format(
            learning_rate)
        return learning_rate

    def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
        summary_str = sess.run(summary_op, feed_dict={
                               score_input: score
                               })
        summary_writer.add_summary(summary_str, global_t)

    def process(self, sess, global_t, summary_writer, summary_op, score_input):
        states = []
        actions = []
        rewards = []
        values = []

        # reset accumulated gradients
        sess.run(self.reset_gradients)

        # copy weights from shared to local
        sess.run(self.sync)

        if USE_LSTM:
            start_lstm_state = self.local_network.lstm_state_out

        # t_max times loop
        start_local_t = self.local_t
        terminal_end = False
        for i in range(LOCAL_T_MAX):
            pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
            action = choose_action(pi_)

            states.append(self.game_state.s_t)
            actions.append(action)
            values.append(value_)

            # Debug output for progress
            if (self.thread_index == 0) and (self.local_t % 100) == 0:
                print(('local_t = {:10}  pi = ' + '{:7.5f} ' * len(pi_) + ' V = {:8.4f} (thread {})').format(self.local_t,
                                                                                                             *pi_, value_, self.thread_index))

            # process game
            self.game_state.process(action)

            # receive game result
            reward = self.game_state.reward
            terminal = self.game_state.terminal

            self.episode_reward += reward

            # clip reward
            # TODO: Does this make sense?
#.........这里部分代码省略.........
开发者ID:salsasepp,项目名称:ufcnn-keras,代码行数:103,代码来源:a3c_training_thread.py

示例7: A3CTrainingThread

# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
  def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    if USE_LSTM:
      self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
    else:
      self.local_network = GameACFFNetwork(ACTION_SIZE, device)

    self.local_network.prepare_loss(ENTROPY_BETA)

    # TODO: don't need accum trainer anymore with batch
    self.trainer = AccumTrainer(device)
    self.trainer.prepare_minimize( self.local_network.total_loss,
                                   self.local_network.get_vars() )
    
    self.accum_gradients = self.trainer.accumulate_gradients()
    self.reset_gradients = self.trainer.reset_gradients()
  
    self.apply_gradients = grad_applier.apply_gradients(
      global_network.get_vars(),
      self.trainer.get_accum_grad_list() )

    self.sync = self.local_network.sync_from(global_network)
    
    self.game_state = GameState(113 * thread_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0

    # variable controling log output
    self.prev_local_t = 0

  def _anneal_learning_rate(self, global_time_step):
    learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
    if learning_rate < 0.0:
      learning_rate = 0.0
    return learning_rate

  def choose_action(self, pi_values):
    values = []
    sum = 0.0
    for rate in pi_values:
      sum = sum + rate
      value = sum
      values.append(value)
    
    r = random.random() * sum
    for i in range(len(values)):
      if values[i] >= r:
        return i;
    #fail safe
    return len(values)-1

  def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
    summary_str = sess.run(summary_op, feed_dict={
      score_input: score
    })
    summary_writer.add_summary(summary_str, global_t)
    
  def set_start_time(self, start_time):
    self.start_time = start_time

  def process(self, sess, global_t, summary_writer, summary_op, score_input):
    states = []
    actions = []
    rewards = []
    values = []

    terminal_end = False

    # reset accumulated gradients
    sess.run( self.reset_gradients )

    # copy weights from shared to local
    sess.run( self.sync )

    start_local_t = self.local_t

    if USE_LSTM:
      start_lstm_state = self.local_network.lstm_state_out
    
    # t_max times loop
    for i in range(LOCAL_T_MAX):
      pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
      action = self.choose_action(pi_)

#.........这里部分代码省略.........
开发者ID:JorritvandenBerg,项目名称:async_deep_reinforce,代码行数:103,代码来源:a3c_training_thread.py


注:本文中的game_state.GameState.process方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。