本文整理汇总了Python中game_state.GameState.update方法的典型用法代码示例。如果您正苦于以下问题:Python GameState.update方法的具体用法?Python GameState.update怎么用?Python GameState.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类game_state.GameState
的用法示例。
在下文中一共展示了GameState.update方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Game
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
class Game(object):
def __init__(self):
self.clock = pygame.time.Clock()
self.display = Display()
self.game_state = GameState()
self.control_state = ControlState()
def run(self):
pygame.init()
while True:
self.control_state.update()
self.game_state.update(self.control_state)
self.display.update(self.game_state)
self.clock.tick(60)
示例2: read_dialog_script
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
def read_dialog_script(dialog):
"""
Reads in JSON array of dialog boxes to be spoken
"""
if not dialog:
# Return if no dialog given (i.e. no intro script)
return
logging.debug(pprint.pformat(dialog))
for line in dialog:
char = Character.load_character(line['character'])
speech_box(line['text'], speaker=char.name)
if 'unlocks' in line:
from game_state import GameState
GameState.update(line['unlocks'])
示例3: test_process
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
def test_process(self):
game_state = GameState(0)
before_s_t = np.array( game_state.s_t )
for i in range(1000):
bef1 = game_state.s_t[:,:, 1]
bef2 = game_state.s_t[:,:, 2]
bef3 = game_state.s_t[:,:, 3]
game_state.process(1)
game_state.update()
aft0 = game_state.s_t[:,:, 0]
aft1 = game_state.s_t[:,:, 1]
aft2 = game_state.s_t[:,:, 2]
# values should be shifted
self.assertTrue( (bef1.flatten() == aft0.flatten()).all() )
self.assertTrue( (bef2.flatten() == aft1.flatten()).all() )
self.assertTrue( (bef3.flatten() == aft2.flatten()).all() )
# all element should be less [0.0~1.0]
self.assertTrue( np.less_equal(bef1, 1.0).all() )
self.assertTrue( np.less_equal(bef2, 1.0).all() )
self.assertTrue( np.less_equal(bef3, 1.0).all() )
self.assertTrue( np.greater_equal(bef1, 0.0).all() )
self.assertTrue( np.greater_equal(bef2, 0.0).all() )
self.assertTrue( np.greater_equal(bef3, 0.0).all() )
self.assertTrue( np.less_equal(aft0, 1.0).all() )
self.assertTrue( np.less_equal(aft1, 1.0).all() )
self.assertTrue( np.less_equal(aft2, 1.0).all() )
self.assertTrue( np.greater_equal(aft0, 0.0).all() )
self.assertTrue( np.greater_equal(aft1, 0.0).all() )
self.assertTrue( np.greater_equal(aft2, 0.0).all() )
示例4: choose_action
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
daily_pnl = 0
#new
if i > 0:
game_state.environment.reset()
while not terminal:
pi_values = global_network.run_policy(sess, game_state.s_t)
action = choose_action(pi_values, use_argmax=True)
game_state.process(action)
reward = game_state.reward
terminal = game_state.terminal
game_state.update()
game_state.environment.create_plot(game_state.environment.iday)
daily_pnl = sum(t.pnl() for t in game_state.environment.trades)
total_pnl += daily_pnl
game_state.environment.daily_pnl = 0
print("Day", i, ",Realized PnL:", daily_pnl)
print("Total Realized PnL:", total_pnl)
for i in range(testing_days):
print("Potting day", i)
示例5: main
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
def main():
state = GameState()
while True:
state.update()
time.sleep(DEBOUNCE)
示例6: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
#.........这里部分代码省略.........
states = []
actions = []
rewards = []
values = []
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "pi=", pi_
print " V=", value_
# process game
self.game_state.process(action)
# receive game result
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
# clip reward
rewards.append( np.clip(reward, -1, 1) )
self.local_t += 1
# s_t1 -> s_t
self.game_state.update()
if terminal:
terminal_end = True
print "score=", self.episode_reward
self._record_score(sess, summary_writer, summary_op, score_input,
self.episode_reward, global_t)
self.episode_reward = 0
self.game_state.reset()
break
R = 0.0
if not terminal_end:
R = self.local_network.run_value(sess, self.game_state.s_t)
actions.reverse()
states.reverse()
rewards.reverse()
values.reverse()
batch_si = []
batch_a = []
batch_td = []
batch_R = []
# compute and accmulate gradients
for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
R = ri + GAMMA * R
td = R - Vi
a = np.zeros([ACTION_SIZE])
a[ai] = 1
batch_si.append(si)
batch_a.append(a)
batch_td.append(td)
batch_R.append(R)
sess.run( self.accum_gradients,
feed_dict = {
self.local_network.s: batch_si,
self.local_network.a: batch_a,
self.local_network.td: batch_td,
self.local_network.r: batch_R } )
cur_learning_rate = self._anneal_learning_rate(global_t)
sess.run( self.apply_gradients,
feed_dict = { self.learning_rate_input: cur_learning_rate } )
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "TIMESTEP", self.local_t
# return advanced local step size
diff_local_t = self.local_t - start_local_t
return diff_local_t
示例7: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
#.........这里部分代码省略.........
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# 加算された勾配をリセット
sess.run( self.policy_reset_gradients )
sess.run( self.value_reset_gradients )
# shared から localにweightをコピー
sess.run( self.sync )
start_local_t = self.local_t
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_ = self.local_network.run_policy(sess, self.game_state.s_t)
action = self.choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
value_ = self.local_network.run_value(sess, self.game_state.s_t)
values.append(value_)
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "pi=", pi_
print " V=", value_
# gameを実行
self.game_state.process(action)
# 実行した結果
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
rewards.append(reward)
self.local_t += 1
self.game_state.update()
if terminal:
terminal_end = True
print "score=", self.episode_reward
if self.thread_index == 0:
self._record_score(sess, summary_writer, summary_op, self.episode_reward, global_t)
self.episode_reward = 0
break
R = 0.0
if not terminal_end:
R = self.local_network.run_value(sess, self.game_state.s_t)
actions.reverse()
states.reverse()
rewards.reverse()
values.reverse()
# 勾配を算出して加算していく
for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
R = ri + GAMMA * R
td = R - Vi
a = np.zeros([ACTION_SIZE])
a[ai] = 1
sess.run( self.policy_accum_gradients,
feed_dict = {
self.local_network.s: [si],
self.local_network.a: [a],
self.local_network.td: [td] } )
sess.run( self.value_accum_gradients,
feed_dict = {
self.local_network.s: [si],
self.local_network.r: [R] } )
cur_learning_rate = self._anneal_learning_rate(global_t)
sess.run( self.policy_apply_gradients,
feed_dict = { self.learning_rate_input: cur_learning_rate } )
# Learning rate for Critic is half of Actor's
sess.run( self.value_apply_gradients,
feed_dict = { self.learning_rate_input: cur_learning_rate * 0.5 } )
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "TIMESTEP", self.local_t
# 進んだlocal step数を返す
diff_local_t = self.local_t - start_local_t
return diff_local_t
示例8: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
#.........这里部分代码省略.........
if self.options.clear_history_on_death and (liveses[-2] > liveses[-1]):
self.episode_states = []
self.episode_actions = []
self.episode_rewards = []
self.episode_values = []
self.episode_liveses = self.episode_liveses[-2:]
self.local_t += 1
if self.options.record_new_record_dir is not None \
or self.options.record_new_room_dir is not None:
screen = self.game_state.uncropped_screen
if self.options.compress_frame:
screen = lzma.compress(screen.tobytes(), preset=0)
self.episode_screens.append(screen)
# terminate if the play time is too long
self.steps += 1
if self.steps > self.options.max_play_steps:
terminal = True
# requirement for OpenAI Gym: --terminate-on-lives-lost=False
# terminate if lives lost
if self.terminate_on_lives_lost and (liveses[-2] > liveses[-1]):
terminal = True
# count no reward steps
if self.game_state.reward == 0.0:
self.no_reward_steps += 1
else:
self.no_reward_steps = 0
# s_t1 -> s_t
self.game_state.update()
if self.local_t % self.options.score_log_interval == 0:
elapsed_time = time.time() - self.start_time
print("t={:6.0f},s={:9d},th={}:{}r={:3.0f}RM{:02d}| l={:.0f},v={:.5f},pr={:.5f}".format(
elapsed_time, global_t, self.thread_index, self.indent,
self.episode_reward, self.game_state.room_no,
self.game_state.lives, value_, self.game_state.psc_reward))
# if self.game_state.room_no != self.game_state.prev_room_no:
# elapsed_time = time.time() - self.start_time
# print("t={:6.0f},s={:9d},th={}:{}RM{:02d}>RM{:02d}| l={:.0f},v={:.5f},pr={:.5f}".format(
# elapsed_time, global_t, self.thread_index, self.indent,
# self.game_state.prev_room_no, self.game_state.room_no,
# self.game_state.lives, value_, self.game_state.psc_reward))
if self.tes > 0:
if self.game_state.lives < self.episode_liveses[-2]:
elapsed_time = time.time() - self.start_time
print("t={:6.0f},s={:9d},th={}:{}l={:.0f}>{:.0f}RM{:02d}|".format(
elapsed_time, global_t, self.thread_index, self.indent,
self.episode_liveses[-2], self.game_state.lives, self.game_state.room_no))
# seperate steps after getting reward
if self.game_state.reward > 0:
if not terminal:
break
if terminal:
terminal_end = True
elapsed_time = time.time() - self.start_time
end_mark = "end" if self.terminate_on_lives_lost else "END"
print("t={:6.0f},s={:9d},th={}:{}r={:3.0f}@{}|".format(
示例9: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
#.........这里部分代码省略.........
# t_max times loop
start_local_t = self.local_t
terminal_end = False
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
# Debug output for progress
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print(('local_t = {:10} pi = ' + '{:7.5f} ' * len(pi_) + ' V = {:8.4f} (thread {})').format(self.local_t,
*pi_, value_, self.thread_index))
# process game
self.game_state.process(action)
# receive game result
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
# clip reward
# TODO: Does this make sense?
rewards.append(np.clip(reward, -1, 1))
self.local_t += 1
# s_t1 -> s_t
self.game_state.update()
if terminal:
terminal_end = True
print ("score=", self.episode_reward)
self._record_score(
sess, summary_writer, summary_op, score_input,
self.episode_reward, global_t)
self.episode_reward = 0
self.game_state.reset()
if USE_LSTM:
self.local_network.reset_state()
break
# Compute and accmulate gradients
R = 0.0 if terminal_end else self.local_network.run_value(sess, self.game_state.s_t)
actions.reverse()
states.reverse()
rewards.reverse()
values.reverse()
# What is the meaning of these values?
batch_si = []
batch_a = []
batch_td = []
batch_R = []
for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
R = ri + GAMMA * R
示例10: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import update [as 别名]
#.........这里部分代码省略.........
start_local_t = self.local_t
if USE_LSTM:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
if (self.thread_index == 0) and (self.local_t % LOG_INTERVAL == 0):
print("pi={}".format(pi_))
print(" V={}".format(value_))
# process game
self.game_state.process(action)
# receive game result
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
# clip reward
rewards.append( np.clip(reward, -1, 1) )
self.local_t += 1
# s_t1 -> s_t
self.game_state.update()
if terminal:
terminal_end = True
print("score={}".format(self.episode_reward))
self._record_score(sess, summary_writer, summary_op, score_input,
self.episode_reward, global_t)
self.episode_reward = 0
self.game_state.reset()
if USE_LSTM:
self.local_network.reset_state()
break
R = 0.0
if not terminal_end:
R = self.local_network.run_value(sess, self.game_state.s_t)
actions.reverse()
states.reverse()
rewards.reverse()
values.reverse()
batch_si = []
batch_a = []
batch_td = []
batch_R = []
# compute and accmulate gradients
for(ai, ri, si, Vi) in zip(actions, rewards, states, values):
R = ri + GAMMA * R
td = R - Vi