当前位置: 首页>>代码示例>>Python>>正文


Python Agent.observe方法代码示例

本文整理汇总了Python中agent.Agent.observe方法的典型用法代码示例。如果您正苦于以下问题:Python Agent.observe方法的具体用法?Python Agent.observe怎么用?Python Agent.observe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在agent.Agent的用法示例。


在下文中一共展示了Agent.observe方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import observe [as 别名]
class Environment:
    def __init__(self, config):
        config.model_type = Config.MODEL_TYPE_CONV2D

        print(config.device)
        self.config = config
        self.env = wrap_deepmind(make_atari(config.env), frame_stack=True)
        self.num_states = self.env.observation_space.shape[-1]
        self.num_actions = self.env.action_space.n
        self.agent = Agent(config, self.num_states, self.num_actions, self.config.num_atoms)
        self.total_step = np.zeros(100)

        self.data_path = config.data_path
        if self.data_path != Config.DATA_PATH_DEFAULT:
            self.agent.load_model()

    def prepro(self, observation):
        ret = np.zeros((4, 84, 84))
        ret[0] = observation[:, :, 0]
        ret[1] = observation[:, :, 1]
        ret[2] = observation[:, :, 2]
        ret[3] = observation[:, :, 3]
        return ret

    def run_episode(self, episode, steps_accumulated=0):
        start_time = time.time()
        total_reward = 0
        observation = self.prepro(self.env.reset())
        state = torch.from_numpy(observation).to(self.config.device, dtype=torch.uint8).unsqueeze(0)

        for step in range(self.config.num_steps):
            if self.config.is_render:
                time.sleep(0.064)
                self.env.render()

            action = self.agent.get_action(state, step + steps_accumulated)

            observation_next, reward, done, _ = self.env.step(action.item())

            if done:
                state_next = None
                self.total_step = np.hstack((self.total_step[1:], step + 1))
            else:
                state_next = self.prepro(observation_next)
                state_next = torch.from_numpy(state_next).to(self.config.device, dtype=torch.uint8).unsqueeze(0)

            total_reward += reward
            reward = torch.tensor([reward], dtype=torch.uint8, device=self.config.device)

            if not self.config.is_render:
                self.agent.observe(state, action, state_next, reward)
                if step % self.config.replay_interval == 0:
                    self.agent.learn(episode)

            state = state_next

            if done:
                elapsed_time = round(time.time() - start_time, 3)
                print('episode: {0}, steps: {1}, mean steps {2}, time: {3}, reward: {4}'.format(episode, step, self.total_step.mean(), elapsed_time, total_reward))
                return step + 1

        return self.config.num_steps

    def run(self):
        if not self.config.is_render:
            steps = 0
            while True:
                steps += self.run_episode(-1)
                if self.config.steps_learning_start <= steps:
                    break

        steps = 0
        for episode in range(self.config.num_episodes):
            steps += self.run_episode(episode, steps)

        self.env.close()

        if self.config.is_saved:
            self.agent.save_model()
开发者ID:y-kamiya,项目名称:machine-learning-samples,代码行数:81,代码来源:atari_rainbow.py

示例2: __init__

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import observe [as 别名]
class Environment:
    def __init__(self, config):
        print(config.device)
        self.config = config
        self.env = gym.make(ENV)
        # self.env = wrappers.Monitor(self.env, '/tmp/gym/cartpole_dqn', force=True)
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n
        self.agent = Agent(config, self.num_states, self.num_actions, config.num_atoms)
        self.total_step = np.zeros(100)

    def is_success_episode(self, step):
        return NUM_STEPS_TO_SUCCEED <= step

    def run_episode(self, episode, steps_accumulated=0):
        start_time = time.time()
        observation = self.env.reset()
        state = torch.from_numpy(observation).to(self.config.device, dtype=torch.float32).unsqueeze(0)

        for step in range(MAX_STEPS):
            action = self.agent.get_action(state, step + steps_accumulated)

            observation_next, _, done, _ = self.env.step(action.item())

            if done:
                state_next = None
                self.total_step = np.hstack((self.total_step[1:], step + 1))
                if self.is_success_episode(step):
                    reward = torch.tensor([1.0], dtype=torch.float32, device=self.config.device)
                else:
                    reward = torch.tensor([-1.0], dtype=torch.float32, device=self.config.device)

            else:
                reward = torch.tensor([0.0], dtype=torch.float32, device=self.config.device)
                state_next = torch.from_numpy(observation_next).to(self.config.device, dtype=torch.float32).unsqueeze(0)

            self.agent.observe(state, action, state_next, reward)
            if step % self.config.replay_interval == 0:
                self.agent.learn(episode)

            state = state_next

            if done:
                elapsed_time = round(time.time() - start_time, 3)
                print('episode: {0}, steps: {1}, mean steps {2}, time: {3}'.format(episode, step, self.total_step.mean(), elapsed_time))
                return step + 1

        return MAX_STEPS

    def run(self):
        steps = 0
        while True:
            steps += self.run_episode(-1)
            if self.config.steps_learning_start < steps:
                break

        steps = 0
        for episode in range(self.config.num_episodes):
            if MEAN_STEPS_TO_SUCCEED <= self.total_step.mean():
                print('over {0} steps of average last 100 episodes, last episode: {1}, steps: {2}'.format(MEAN_STEPS_TO_SUCCEED, episode, steps))
                break

            steps += self.run_episode(episode, steps)

        self.env.close()
开发者ID:y-kamiya,项目名称:machine-learning-samples,代码行数:67,代码来源:cartpole_rainbow.py


注:本文中的agent.Agent.observe方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。