当前位置: 首页>>代码示例>>Python>>正文


Python base.Step方法代码示例

本文整理汇总了Python中rllab.envs.base.Step方法的典型用法代码示例。如果您正苦于以下问题:Python base.Step方法的具体用法?Python base.Step怎么用?Python base.Step使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rllab.envs.base的用法示例。


在下文中一共展示了base.Step方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        if isinstance(self._wrapped_env.action_space, Box):
            # rescale the action
            lb, ub = self._wrapped_env.action_space.bounds
            scaled_action = lb + (action + 1.) * 0.5 * (ub - lb)
            scaled_action = np.clip(scaled_action, lb, ub)
        else:
            scaled_action = action
        wrapped_step = self._wrapped_env.step(scaled_action)
        next_obs, reward, done, info = wrapped_step
        info['orig_obs'] = next_obs
        if self._normalize_obs:
            next_obs = self._apply_normalize_obs(next_obs)
        if self._normalize_reward:
            reward = self._apply_normalize_reward(reward)
        return Step(next_obs, reward * self._scale_reward, done, **info) 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:18,代码来源:normalized_env.py

示例2: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        vel = next_obs[5]
        height = next_obs[0]
        ang = next_obs[1]
        reward = vel - \
            0.5 * self.ctrl_cost_coeff * np.sum(np.square(action / scaling)) -\
            np.sum(np.maximum(np.abs(next_obs[2:]) - 100, 0)) -\
            10*np.maximum(0.45 - height, 0) - \
            10*np.maximum(abs(ang) - .2, 0)
        # notdone = np.isfinite(state).all() and \
        #     (np.abs(state[3:]) < 100).all() and (state[0] > .7) and \
        #     (abs(state[2]) < .2)
        # done = not notdone
        done = False
        return Step(next_obs, reward, done) 
开发者ID:thanard,项目名称:me-trpo,代码行数:21,代码来源:com_hopper_env.py

示例3: reset

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def reset(self):
        self.patient.reset()
        self.sensor.reset()
        self.pump.reset()
        self.scenario.reset()
        self._reset()
        CGM = self.sensor.measure(self.patient)
        obs = Observation(CGM=CGM)
        return Step(
            observation=obs,
            reward=0,
            done=False,
            sample_time=self.sample_time,
            patient_name=self.patient.name,
            meal=0,
            patient_state=self.patient.state) 
开发者ID:jxx123,项目名称:simglucose,代码行数:18,代码来源:env.py

示例4: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        forward_reward = np.linalg.norm(self.get_body_comvel("torso"))  # swimmer has no problem of jumping reward
        reward = forward_reward - ctrl_cost
        done = False
        if self.sparse_rew:
            if abs(self.get_body_com("torso")[0]) > 100.0:
                reward = 1.0
                done = True
            else:
                reward = 0.
        com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1)
        ori = self.get_ori()
        return Step(next_obs, reward, done, com=com, ori=ori) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:21,代码来源:swimmer_env.py

示例5: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        comvel = self.get_body_comvel("torso")
        forward_reward = comvel[0]
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
        contact_cost = 0.5 * 1e-3 * np.sum(
            np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
        survive_reward = 0.05
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self._state
        notdone = np.isfinite(state).all() \
            and state[2] >= 0.2 and state[2] <= 1.0
        done = not notdone
        ob = self.get_current_obs()
        return Step(ob, float(reward), done) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:19,代码来源:regular_ant_env.py

示例6: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        next_obs, reward, done, info = self.env.step(action)
        return Step(next_obs, reward, done, **info) 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:5,代码来源:gym_env.py

示例7: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        forward_reward = self.get_body_comvel("torso")[0]
        reward = forward_reward - ctrl_cost
        done = False
        return Step(next_obs, reward, done) 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:13,代码来源:swimmer_env.py

示例8: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        sess = tf.get_default_session()
        action = np.clip(action, *self.action_space.bounds)
        index = np.random.randint(self.n_models)
        next_observation = sess.run(self.dynamics_outs[index],
                                    feed_dict={self.dynamics_in: np.concatenate([self._state, action])[None]})
        reward = - self.cost_np(self._state[None], action[None], next_observation)
        done = self.is_done(self._state[None], next_observation)[0]
        self._state = np.reshape(next_observation, -1)
        return Step(observation=self._state, reward=reward, done=done) 
开发者ID:thanard,项目名称:me-trpo,代码行数:12,代码来源:env_helpers.py

示例9: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        # ctrl_cost = - 0.5 * self.ctrl_cost_coeff * np.sum(
        # np.log(1+1e-8 -(action/scaling)**2))
        forward_reward = self.get_body_comvel("torso")[0]
        reward = forward_reward - ctrl_cost
        done = False
        # Modified reward here
        return Step(next_obs, reward, done) 
开发者ID:thanard,项目名称:me-trpo,代码行数:16,代码来源:com_swimmer_env.py

示例10: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)

        next_obs = self.get_current_obs()

        alive_bonus = self.alive_bonus
        data = self.model.data

        # comvel = self.get_body_comvel("torso")
        idx = self.model.geom_names.index("head")
        head_h = self.model.data.geom_xpos[idx][-1]

        # lin_vel_reward = comvel[0]
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 1e-2 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        # impact_cost = .5 * self.impact_cost_coeff * np.sum(
        #     np.square(np.clip(data.cfrc_ext, -1, 1)))
        # vel_deviation_cost = 0.5 * self.vel_deviation_cost_coeff * np.sum(
        #     np.square(comvel[1:]))
        # reward = lin_vel_reward + alive_bonus - ctrl_cost - \
        #     impact_cost - vel_deviation_cost
        # done = data.qpos[2] < 0.8 or data.qpos[2] > 2.0
        reward = -(head_h - 1.5) ** 2 - ctrl_cost
        done = False

        return Step(next_obs, reward, done) 
开发者ID:thanard,项目名称:me-trpo,代码行数:30,代码来源:com_simple_humanoid_env.py

示例11: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        assert np.all(self.qpos) and np.all(self.qvel) and np.all(self.goal), \
            "call env.reset before step."
        # Clipping action
        action = np.clip(action, *self.action_space.bounds)
        action = np.reshape(action, -1)
        qpos = self.qpos
        qvel = self.qvel
        for i in range(self.frame_skip):
            qpos = np.clip(qpos + qvel*self.dt, *self.boundary)
            # qvel = np.clip(qvel + (action/self.mass)*self.dt, *self.vel_bounds)
            qvel = np.clip(self.A@qvel + self.B@action + self.c, *self.vel_bounds)
        self.qpos = qpos
        self.qvel = qvel
        return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False) 
开发者ID:thanard,项目名称:me-trpo,代码行数:17,代码来源:point_mass_env.py

示例12: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        action = np.clip(action, *self.action_bounds)
        ctrl_cost = self.ctrl_cost_coeff * 0.5 * np.sum(np.square(action))
        run_cost = -1 * self.get_body_comvel("torso")[0]
        cost = ctrl_cost + run_cost
        reward = -cost
        reward = np.clip(reward, -10, 10)
        done = False
        return Step(next_obs, reward, done) 
开发者ID:thanard,项目名称:me-trpo,代码行数:13,代码来源:com_half_cheetah_env.py

示例13: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        assert self.state is not None, "call env.reset before step."
        # Clipping action
        action = np.clip(action, *self.action_space.bounds)
        action = np.reshape(action, -1)
        next_state = self.transition['A']@self.state + \
                     self.transition['B']@action + \
                     self.transition['c']
        next_state = np.clip(next_state, *self.observation_space.bounds)
        self.state = next_state
        return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False) 
开发者ID:thanard,项目名称:me-trpo,代码行数:13,代码来源:point2D_env.py

示例14: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        
        ball_position = self.get_body_com("object")
        
        mitt_position = self.get_body_com("jaco_link_finger_1") + self.get_body_com("jaco_link_finger_2") + self.get_body_com("jaco_link_finger_3")
        mitt_position = mitt_position/3.
        difference = np.linalg.norm((ball_position - mitt_position))
        
        reward = 1 if (difference < .15 and ball_position[2] > .05) else 0
        
        done = False
        return Step(self.get_current_obs(), float(reward), done, distance=difference, in_hand=(reward == 1)) 
开发者ID:KamyarGh,项目名称:rl_swiss,代码行数:15,代码来源:catch.py

示例15: step

# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        action = np.clip(action, *self.action_bounds)
        ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action))
        run_cost = 1.*np.abs(self.get_body_comvel("torso")[0] - self._goal_vel)
        cost = ctrl_cost + run_cost
        reward = -cost
        done = False
        return Step(next_obs, reward, done) 
开发者ID:KamyarGh,项目名称:rl_swiss,代码行数:12,代码来源:half_cheetah_env_rand.py


注:本文中的rllab.envs.base.Step方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。