本文整理汇总了Python中game_state.GameState.process方法的典型用法代码示例。如果您正苦于以下问题:Python GameState.process方法的具体用法?Python GameState.process怎么用?Python GameState.process使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类game_state.GameState
的用法示例。
在下文中一共展示了GameState.process方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_process
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
def test_process(self):
game_state = GameState(0)
before_s_t = np.array( game_state.s_t )
for i in range(1000):
bef1 = game_state.s_t[:,:, 1]
bef2 = game_state.s_t[:,:, 2]
bef3 = game_state.s_t[:,:, 3]
game_state.process(1)
game_state.update()
aft0 = game_state.s_t[:,:, 0]
aft1 = game_state.s_t[:,:, 1]
aft2 = game_state.s_t[:,:, 2]
# values should be shifted
self.assertTrue( (bef1.flatten() == aft0.flatten()).all() )
self.assertTrue( (bef2.flatten() == aft1.flatten()).all() )
self.assertTrue( (bef3.flatten() == aft2.flatten()).all() )
# all element should be less [0.0~1.0]
self.assertTrue( np.less_equal(bef1, 1.0).all() )
self.assertTrue( np.less_equal(bef2, 1.0).all() )
self.assertTrue( np.less_equal(bef3, 1.0).all() )
self.assertTrue( np.greater_equal(bef1, 0.0).all() )
self.assertTrue( np.greater_equal(bef2, 0.0).all() )
self.assertTrue( np.greater_equal(bef3, 0.0).all() )
self.assertTrue( np.less_equal(aft0, 1.0).all() )
self.assertTrue( np.less_equal(aft1, 1.0).all() )
self.assertTrue( np.less_equal(aft2, 1.0).all() )
self.assertTrue( np.greater_equal(aft0, 0.0).all() )
self.assertTrue( np.greater_equal(aft1, 0.0).all() )
self.assertTrue( np.greater_equal(aft2, 0.0).all() )
示例2: range
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
total_pnl = 0
for i in range(testing_days):
print("Working on day ",i)
terminal = False
daily_pnl = 0
#new
if i > 0:
game_state.environment.reset()
while not terminal:
pi_values = global_network.run_policy(sess, game_state.s_t)
action = choose_action(pi_values, use_argmax=True)
game_state.process(action)
reward = game_state.reward
terminal = game_state.terminal
game_state.update()
game_state.environment.create_plot(game_state.environment.iday)
daily_pnl = sum(t.pnl() for t in game_state.environment.trades)
total_pnl += daily_pnl
game_state.environment.daily_pnl = 0
print("Day", i, ",Realized PnL:", daily_pnl)
print("Total Realized PnL:", total_pnl)
示例3: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
self.local_network = GameACNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize( self.local_network.total_loss,
self.local_network.get_vars() )
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values):
values = []
sum = 0.0
for rate in pi_values:
sum = sum + rate
value = sum
values.append(value)
r = random.random() * sum
for i in range(len(values)):
if values[i] >= r:
return i;
#fail safe
return len(values)-1
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "pi=", pi_
print " V=", value_
# process game
self.game_state.process(action)
# receive game result
#.........这里部分代码省略.........
示例4: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
def __init__(self, thread_index, global_network, initial_learning_rate,
learning_rate_input,
policy_applier, value_applier,
max_global_time_step):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
self.local_network = GameACNetwork(ACTION_SIZE)
self.local_network.prepare_loss(ENTROPY_BETA)
# policy
self.policy_trainer = AccumTrainer()
self.policy_trainer.prepare_minimize( self.local_network.policy_loss,
self.local_network.get_policy_vars(),
GRAD_NORM_CLIP )
self.policy_accum_gradients = self.policy_trainer.accumulate_gradients()
self.policy_reset_gradients = self.policy_trainer.reset_gradients()
self.policy_apply_gradients = policy_applier.apply_gradients(
global_network.get_policy_vars(),
self.policy_trainer.get_accum_grad_list() )
# value
self.value_trainer = AccumTrainer()
self.value_trainer.prepare_minimize( self.local_network.value_loss,
self.local_network.get_value_vars(),
GRAD_NORM_CLIP )
self.value_accum_gradients = self.value_trainer.accumulate_gradients()
self.value_reset_gradients = self.value_trainer.reset_gradients()
self.value_apply_gradients = value_applier.apply_gradients(
global_network.get_value_vars(),
self.value_trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
# thread0 will record score for TensorBoard
if self.thread_index == 0:
self.score_input = tf.placeholder(tf.int32)
tf.scalar_summary("score", self.score_input)
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values):
values = []
sum = 0.0
for rate in pi_values:
sum = sum + rate
value = sum
values.append(value)
r = random.random() * sum
for i in range(len(values)):
if values[i] >= r:
return i;
#fail safe
return len(values)-1
def _record_score(self, sess, summary_writer, summary_op, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
self.score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# 加算された勾配をリセット
sess.run( self.policy_reset_gradients )
sess.run( self.value_reset_gradients )
# shared から localにweightをコピー
sess.run( self.sync )
start_local_t = self.local_t
# t_max times loop
for i in range(LOCAL_T_MAX):
#.........这里部分代码省略.........
示例5: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
#.........这里部分代码省略.........
r = random.random()
if r < self.greediness:
action = int(r * len(pi_values))
elif r < self.repeat_action_ratio:
action = self.prev_action
else:
# Increase randomness of choice if no reward term is too long
if self.no_reward_steps > self.options.no_reward_steps:
randomness = (self.no_reward_steps - self.options.no_reward_steps) * self.options.randomness
pi_values += randomness
pi_values /= sum(pi_values)
if self.local_t % self.options.randomness_log_interval == 0:
elapsed_time = time.time() - self.start_time
print("t={:6.0f},s={:9d},th={}:{}randomness={:.8f}".format(
elapsed_time, global_t, self.thread_index, self.indent, randomness))
pi_values -= np.finfo(np.float32).epsneg
action_samples = np.random.multinomial(self.options.num_experiments, pi_values)
action = action_samples.argmax(0)
self.prev_action = action
return action
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def set_start_time(self, start_time):
self.start_time = start_time
#@profile
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
liveses = [self.game_state.lives]
if self.tes > 0:
if self.episode_liveses == []:
self.episode_liveses.append(self.game_state.lives)
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
if self.options.use_lstm:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
for i in range(self.options.local_t_max):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_, global_t)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
liveses.append(self.game_state.lives)
示例6: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
if USE_LSTM:
self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
else:
self.local_network = GameACFFNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize(self.local_network.total_loss,
self.local_network.get_vars())
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list())
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * \
(self.max_global_time_step - global_time_step) / \
self.max_global_time_step
assert learning_rate > 0, 'Learning rate {} is not >0'.format(
learning_rate)
return learning_rate
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
# reset accumulated gradients
sess.run(self.reset_gradients)
# copy weights from shared to local
sess.run(self.sync)
if USE_LSTM:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
start_local_t = self.local_t
terminal_end = False
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
# Debug output for progress
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print(('local_t = {:10} pi = ' + '{:7.5f} ' * len(pi_) + ' V = {:8.4f} (thread {})').format(self.local_t,
*pi_, value_, self.thread_index))
# process game
self.game_state.process(action)
# receive game result
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
# clip reward
# TODO: Does this make sense?
#.........这里部分代码省略.........
示例7: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import process [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
if USE_LSTM:
self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
else:
self.local_network = GameACFFNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize( self.local_network.total_loss,
self.local_network.get_vars() )
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
# variable controling log output
self.prev_local_t = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values):
values = []
sum = 0.0
for rate in pi_values:
sum = sum + rate
value = sum
values.append(value)
r = random.random() * sum
for i in range(len(values)):
if values[i] >= r:
return i;
#fail safe
return len(values)-1
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def set_start_time(self, start_time):
self.start_time = start_time
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
if USE_LSTM:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
#.........这里部分代码省略.........