本文整理汇总了Python中emulator.Emulator.terminal方法的典型用法代码示例。如果您正苦于以下问题:Python Emulator.terminal方法的具体用法?Python Emulator.terminal怎么用?Python Emulator.terminal使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类emulator.Emulator
的用法示例。
在下文中一共展示了Emulator.terminal方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Emulator
# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import terminal [as 别名]
if __name__ == '__main__':
emulator = Emulator(rom='SPCINVAD.BIN')
cv2.startWindowThread()
cv2.namedWindow("preview")
emulator.reset()
reward_episode = 0
print("Num frames per episode: {}".format(emulator.max_num_frames_per_episode))
for frame in range(emulator.max_num_frames_per_episode):
action_idx = int(input())
reward = emulator.act(emulator.actions[action_idx])
print("Instead: {}, i.e. {}, reward = {}".format(action_idx, emulator.actions[action_idx], reward))
if emulator.terminal():
break
reward_episode += reward
actions = np.zeros([len(emulator.actions)])
actions[action_idx] = 1
new_images = np.dstack((np.reshape(emulator.image(), (80, 80, 1)), images[:,:,1:]))
images = new_images
cv2.imshow('preview', emulator.image())
示例2: Engine
# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import terminal [as 别名]
class Engine(object):
def __init__(self):
self.session = tf.InteractiveSession()
self.emulator = Emulator(settings)
settings['num_actions'] = len(self.emulator.actions)
self.replay = ReplayDB(settings)
with tf.variable_scope('model'):
self.model = Model(settings)
self.summary = tf.merge_all_summaries()
self.writer = tf.train.SummaryWriter('summary-log', self.session.graph_def)
self.session.run(tf.initialize_all_variables())
self.saver = tf.train.Saver(max_to_keep=1000000)
checkpoint = tf.train.get_checkpoint_state("networks")
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.session, checkpoint.model_checkpoint_path)
print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path))
else:
print("Unable to load checkpoint")
self.summary_cnt = 0
self.episode_cnt = 0
self.timer = self.session.run(self.model.global_step)
self.no_op = tf.no_op()
def epsilon(self, test=False):
e0 = settings['initial_epsilon']
e1 = settings['final_epsilon']
lim = settings['epsilon_anneal_length']
if test:
return e1
return e1 + max(0, (e0 - e1) * (lim - self.timer) / lim)
def choose_action(self, test=False):
if np.random.rand() < self.epsilon(test):
return random.randrange(len(self.emulator.actions))
else:
predictions = self.model.act_network.readout.eval({
self.model.images: [self.images]
})[0]
return np.argmax(predictions)
def episode(self, test=False, push_to=None):
self.emulator.reset()
self.images = np.dstack((self.emulator.image(),) * settings['phi_length'])
total_reward = 0
updates = 0
while True:
action = self.choose_action(test)
reward = self.emulator.act(action)
image = self.emulator.image()
terminal = self.emulator.terminal()
if not test:
self.replay.push(
image=image,
reward=reward,
action=action,
terminal=terminal
)
if push_to is not None:
push_to.append(action)
if terminal:
break
if not test and len(self.replay) >= settings['replay_start']:
if updates % settings['update_frequency'] == 0:
self.train()
updates += 1
self.images = np.dstack((image, self.images[:,:,1:]))
total_reward += reward
if not test:
self.episode_cnt += 1
if len(self.replay) >= settings['replay_start']:
self.writer.flush()
if self.episode_cnt % settings['save_every_episodes'] == 0:
self.saver.save(self.session, 'networks/checkpoint', global_step=self.timer)
return total_reward
def train(self):
minibatch = self.replay.sample()
action_mask = np.zeros((len(minibatch), settings['num_actions']))
for i, sample in enumerate(minibatch):
action_mask[i][sample.action] = 1
#.........这里部分代码省略.........
示例3: Visualize
# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import terminal [as 别名]
class Visualize(object):
def __init__(self):
self.session = tf.InteractiveSession()
self.emulator = Emulator(settings)
settings['num_actions'] = len(self.emulator.actions)
with tf.variable_scope('model'):
self.model = Model(settings)
self.session.run(tf.initialize_all_variables())
self.saver = tf.train.Saver(max_to_keep=1000000)
checkpoint = tf.train.get_checkpoint_state("networks")
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.session, checkpoint.model_checkpoint_path)
print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path))
else:
raise RuntimeError("Unable to load checkpoint")
cv2.startWindowThread()
cv2.namedWindow("preview")
cv2.namedWindow("full")
def epsilon(self, test=False):
return settings['final_epsilon']
def choose_action(self, test=False):
if np.random.rand() < self.epsilon(test):
return random.randrange(len(self.emulator.actions))
else:
predictions = self.model.act_network.readout.eval({
self.model.images: [self.images]
})[0]
print predictions, np.argmax(predictions)
return np.argmax(predictions)
def episode(self, test=False, push_to=None):
self.emulator.reset()
self.images = np.dstack((self.emulator.image(),) * settings['phi_length'])
total_reward = 0
updates = 0
while True:
action = self.choose_action(test)
reward = self.emulator.act(action)
image = self.emulator.image()
cv2.imshow('preview', image)
cv2.imshow('full', self.emulator.full_image())
terminal = self.emulator.terminal()
if reward > 0:
print "reward:", reward
if terminal:
break
self.images = np.dstack((image, self.images[:,:,1:]))
total_reward += reward
time.sleep(0.1)
return total_reward