本文整理汇总了Python中game_state.GameState.reset方法的典型用法代码示例。如果您正苦于以下问题:Python GameState.reset方法的具体用法?Python GameState.reset怎么用?Python GameState.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类game_state.GameState
的用法示例。
在下文中一共展示了GameState.reset方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
class Game:
def __init__(self):
self.pastStateData = []
self.currentState = State()
def reset(self):
self.currentState.reset()
self.pastStateData = []
def makeMove(self, move):
self.pastStateData.append((self.currentState.toString(), -self.currentState.getPlayer()))
self.currentState.makeMove(move)
def getHeuristic(self):
if (self.currentState.getPlayer() == 1):
return self.starterHeuristic
else:
return self.followerHeuristic
def setHeuristic(self, heuristic, player):
heuristic.setState(self.currentState)
if (player == 1):
self.starterHeuristic = heuristic
else:
self.followerHeuristic = heuristic
def takeTurn(self):
move = self.getHeuristic().getForcedMove()
if move is None:
move = self.getHeuristic().getMove()
self.makeMove(move)
def play(self):
result = board.UNFINISHED
while result is board.UNFINISHED:
self.takeTurn()
result = self.getHeuristic().getGameResult()
return (self.pastStateData[1:], result)
示例2: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
self.local_network = GameACNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize( self.local_network.total_loss,
self.local_network.get_vars() )
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values):
values = []
sum = 0.0
for rate in pi_values:
sum = sum + rate
value = sum
values.append(value)
r = random.random() * sum
for i in range(len(values)):
if values[i] >= r:
return i;
#fail safe
return len(values)-1
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print "pi=", pi_
print " V=", value_
# process game
self.game_state.process(action)
# receive game result
#.........这里部分代码省略.........
示例3: print
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
decay = RMSP_ALPHA,
momentum = 0.0,
epsilon = RMSP_EPSILON,
clip_norm = GRAD_NORM_CLIP,
device = device)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR)
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(sess, checkpoint.model_checkpoint_path)
print("checkpoint loaded:", checkpoint.model_checkpoint_path)
else:
print("Could not find old checkpoint")
game_state = GameState(0, display=True, no_op_max=0)
while True:
pi_values = global_network.run_policy(sess, game_state.s_t)
action = choose_action(pi_values)
game_state.process(action)
if game_state.terminal:
game_state.reset()
else:
game_state.update()
示例4: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device,
options):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
self.options = options
if options.use_lstm:
self.local_network = GameACLSTMNetwork(options.action_size, thread_index, device)
else:
self.local_network = GameACFFNetwork(options.action_size, device)
self.local_network.prepare_loss(options.entropy_beta)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize( self.local_network.total_loss,
self.local_network.get_vars() )
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(random.randint(0, 2**16), options, thread_index = thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
self.indent = " |" * self.thread_index
self.steps = 0
self.no_reward_steps = 0
self.terminate_on_lives_lost = options.terminate_on_lives_lost and (self.thread_index != 0)
if self.options.train_episode_steps > 0:
self.max_reward = 0.0
self.max_episode_reward = 0.0
self.episode_states = []
self.episode_actions = []
self.episode_rewards = []
self.episode_values = []
self.episode_liveses = []
self.episode_scores = Episode_scores(options)
self.tes = self.options.train_episode_steps
if self.options.tes_list is not None:
self.tes = self.options.tes_list[thread_index]
print("[DIVERSITY]th={}:tes={}".format(thread_index, self.tes))
self.initial_lives = self.game_state.initial_lives
self.max_history = int(self.tes * self.options.tes_extend_ratio * 2.1)
if self.options.record_new_record_dir is not None:
if self.thread_index == 0:
if not os.path.exists(self.options.record_new_record_dir):
os.makedirs(self.options.record_new_record_dir)
self.episode_screens = []
if self.options.record_new_room_dir is not None:
if self.thread_index == 0:
if not os.path.exists(self.options.record_new_room_dir):
os.makedirs(self.options.record_new_room_dir)
self.episode_screens = []
self.greediness = options.greediness
self.repeat_action_ratio = options.repeat_action_ratio
self.prev_action = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values, global_t):
# Add greediness for broader exploration
r = random.random()
if r < self.greediness:
action = int(r * len(pi_values))
elif r < self.repeat_action_ratio:
action = self.prev_action
else:
# Increase randomness of choice if no reward term is too long
#.........这里部分代码省略.........
示例5: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
if USE_LSTM:
self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
else:
self.local_network = GameACFFNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize(self.local_network.total_loss,
self.local_network.get_vars())
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list())
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * \
(self.max_global_time_step - global_time_step) / \
self.max_global_time_step
assert learning_rate > 0, 'Learning rate {} is not >0'.format(
learning_rate)
return learning_rate
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
# reset accumulated gradients
sess.run(self.reset_gradients)
# copy weights from shared to local
sess.run(self.sync)
if USE_LSTM:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
start_local_t = self.local_t
terminal_end = False
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = choose_action(pi_)
states.append(self.game_state.s_t)
actions.append(action)
values.append(value_)
# Debug output for progress
if (self.thread_index == 0) and (self.local_t % 100) == 0:
print(('local_t = {:10} pi = ' + '{:7.5f} ' * len(pi_) + ' V = {:8.4f} (thread {})').format(self.local_t,
*pi_, value_, self.thread_index))
# process game
self.game_state.process(action)
# receive game result
reward = self.game_state.reward
terminal = self.game_state.terminal
self.episode_reward += reward
# clip reward
# TODO: Does this make sense?
#.........这里部分代码省略.........
示例6: A3CTrainingThread
# 需要导入模块: from game_state import GameState [as 别名]
# 或者: from game_state.GameState import reset [as 别名]
class A3CTrainingThread(object):
def __init__(self,
thread_index,
global_network,
initial_learning_rate,
learning_rate_input,
grad_applier,
max_global_time_step,
device):
self.thread_index = thread_index
self.learning_rate_input = learning_rate_input
self.max_global_time_step = max_global_time_step
if USE_LSTM:
self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
else:
self.local_network = GameACFFNetwork(ACTION_SIZE, device)
self.local_network.prepare_loss(ENTROPY_BETA)
# TODO: don't need accum trainer anymore with batch
self.trainer = AccumTrainer(device)
self.trainer.prepare_minimize( self.local_network.total_loss,
self.local_network.get_vars() )
self.accum_gradients = self.trainer.accumulate_gradients()
self.reset_gradients = self.trainer.reset_gradients()
self.apply_gradients = grad_applier.apply_gradients(
global_network.get_vars(),
self.trainer.get_accum_grad_list() )
self.sync = self.local_network.sync_from(global_network)
self.game_state = GameState(113 * thread_index)
self.local_t = 0
self.initial_learning_rate = initial_learning_rate
self.episode_reward = 0
# variable controling log output
self.prev_local_t = 0
def _anneal_learning_rate(self, global_time_step):
learning_rate = self.initial_learning_rate * (self.max_global_time_step - global_time_step) / self.max_global_time_step
if learning_rate < 0.0:
learning_rate = 0.0
return learning_rate
def choose_action(self, pi_values):
values = []
sum = 0.0
for rate in pi_values:
sum = sum + rate
value = sum
values.append(value)
r = random.random() * sum
for i in range(len(values)):
if values[i] >= r:
return i;
#fail safe
return len(values)-1
def _record_score(self, sess, summary_writer, summary_op, score_input, score, global_t):
summary_str = sess.run(summary_op, feed_dict={
score_input: score
})
summary_writer.add_summary(summary_str, global_t)
def set_start_time(self, start_time):
self.start_time = start_time
def process(self, sess, global_t, summary_writer, summary_op, score_input):
states = []
actions = []
rewards = []
values = []
terminal_end = False
# reset accumulated gradients
sess.run( self.reset_gradients )
# copy weights from shared to local
sess.run( self.sync )
start_local_t = self.local_t
if USE_LSTM:
start_lstm_state = self.local_network.lstm_state_out
# t_max times loop
for i in range(LOCAL_T_MAX):
pi_, value_ = self.local_network.run_policy_and_value(sess, self.game_state.s_t)
action = self.choose_action(pi_)
#.........这里部分代码省略.........