Python Emulator.act方法代码示例

本文整理汇总了Python中emulator.Emulator.act方法的典型用法代码示例。如果您正苦于以下问题：Python Emulator.act方法的具体用法？Python Emulator.act怎么用？Python Emulator.act使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类emulator.Emulator的用法示例。

在下文中一共展示了Emulator.act方法的3个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Emulator

# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import act [as 别名]
if __name__ == '__main__':
    emulator = Emulator(rom='SPCINVAD.BIN')

    cv2.startWindowThread()
    cv2.namedWindow("preview")

    emulator.reset()

    reward_episode = 0

    print("Num frames per episode: {}".format(emulator.max_num_frames_per_episode))

    for frame in range(emulator.max_num_frames_per_episode):
        action_idx = int(input())
        reward = emulator.act(emulator.actions[action_idx])
        print("Instead: {}, i.e. {}, reward = {}".format(action_idx, emulator.actions[action_idx], reward))
        
        if emulator.terminal():
            break

        reward_episode += reward

        actions = np.zeros([len(emulator.actions)])
        actions[action_idx] = 1

        new_images = np.dstack((np.reshape(emulator.image(), (80, 80, 1)), images[:,:,1:]))

        images = new_images

        cv2.imshow('preview', emulator.image())

开发者ID:amharc，项目名称:jnp3，代码行数:32，代码来源:human.py

示例2: Engine

# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import act [as 别名]
class Engine(object):
    def __init__(self):
        self.session = tf.InteractiveSession()

        self.emulator = Emulator(settings)
        settings['num_actions'] = len(self.emulator.actions)
        self.replay = ReplayDB(settings)

        with tf.variable_scope('model'):
            self.model = Model(settings)

        self.summary = tf.merge_all_summaries()
        self.writer = tf.train.SummaryWriter('summary-log', self.session.graph_def)

        self.session.run(tf.initialize_all_variables())

        self.saver = tf.train.Saver(max_to_keep=1000000)
        checkpoint = tf.train.get_checkpoint_state("networks")
        if checkpoint and checkpoint.model_checkpoint_path:
            self.saver.restore(self.session, checkpoint.model_checkpoint_path)
            print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path))
        else:
            print("Unable to load checkpoint")

        self.summary_cnt = 0
        self.episode_cnt = 0
        self.timer = self.session.run(self.model.global_step)
        self.no_op = tf.no_op()

    def epsilon(self, test=False):
        e0 = settings['initial_epsilon']
        e1 = settings['final_epsilon']
        lim = settings['epsilon_anneal_length']

        if test:
            return e1

        return e1 + max(0, (e0 - e1) * (lim - self.timer) / lim)

    def choose_action(self, test=False):
        if np.random.rand() < self.epsilon(test):
            return random.randrange(len(self.emulator.actions)) 
        else:
            predictions = self.model.act_network.readout.eval({
                self.model.images: [self.images]
            })[0]
            return np.argmax(predictions)

    def episode(self, test=False, push_to=None):
        self.emulator.reset()
        self.images = np.dstack((self.emulator.image(),) * settings['phi_length'])

        total_reward = 0
        updates = 0

        while True:
            action = self.choose_action(test)
            reward = self.emulator.act(action)
            image = self.emulator.image()
            terminal = self.emulator.terminal()

            if not test:
                self.replay.push(
                        image=image,
                        reward=reward,
                        action=action,
                        terminal=terminal
                    )

            if push_to is not None:
                push_to.append(action)

            if terminal:
                break

            if not test and len(self.replay) >= settings['replay_start']:
                if updates % settings['update_frequency'] == 0:
                    self.train()
                updates += 1

            self.images = np.dstack((image, self.images[:,:,1:]))
            total_reward += reward

        if not test:
            self.episode_cnt += 1
            if len(self.replay) >= settings['replay_start']:
                self.writer.flush()

            if self.episode_cnt % settings['save_every_episodes'] == 0:
                self.saver.save(self.session, 'networks/checkpoint', global_step=self.timer)

        return total_reward

    def train(self):
        minibatch = self.replay.sample()
        action_mask = np.zeros((len(minibatch), settings['num_actions']))

        for i, sample in enumerate(minibatch):
            action_mask[i][sample.action] = 1

#.........这里部分代码省略.........

开发者ID:amharc，项目名称:jnp3，代码行数:103，代码来源:engine.py

示例3: Visualize

# 需要导入模块: from emulator import Emulator [as 别名]
# 或者: from emulator.Emulator import act [as 别名]
class Visualize(object):
    def __init__(self):
        self.session = tf.InteractiveSession()

        self.emulator = Emulator(settings)
        settings['num_actions'] = len(self.emulator.actions)

        with tf.variable_scope('model'):
            self.model = Model(settings)

        self.session.run(tf.initialize_all_variables())

        self.saver = tf.train.Saver(max_to_keep=1000000)
        checkpoint = tf.train.get_checkpoint_state("networks")
        if checkpoint and checkpoint.model_checkpoint_path:
            self.saver.restore(self.session, checkpoint.model_checkpoint_path)
            print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path))
        else:
            raise RuntimeError("Unable to load checkpoint")

        cv2.startWindowThread()
        cv2.namedWindow("preview")
        cv2.namedWindow("full")

    def epsilon(self, test=False):
        return settings['final_epsilon']

    def choose_action(self, test=False):
        if np.random.rand() < self.epsilon(test):
            return random.randrange(len(self.emulator.actions)) 
        else:
            predictions = self.model.act_network.readout.eval({
                self.model.images: [self.images]
            })[0]
            print predictions, np.argmax(predictions)
            return np.argmax(predictions)

    def episode(self, test=False, push_to=None):
        self.emulator.reset()
        self.images = np.dstack((self.emulator.image(),) * settings['phi_length'])

        total_reward = 0
        updates = 0

        while True:
            action = self.choose_action(test)
            reward = self.emulator.act(action)
            image = self.emulator.image()
            cv2.imshow('preview', image)
            cv2.imshow('full', self.emulator.full_image())
            terminal = self.emulator.terminal()

            if reward > 0:
                print "reward:", reward

            if terminal:
                break

            self.images = np.dstack((image, self.images[:,:,1:]))
            total_reward += reward

            time.sleep(0.1)

        return total_reward

开发者ID:amharc，项目名称:jnp3，代码行数:66，代码来源:visualize.py

注：本文中的emulator.Emulator.act方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。