當前位置: 首頁>>代碼示例>>Python>>正文


Python base.Step方法代碼示例

本文整理匯總了Python中rllab.envs.base.Step方法的典型用法代碼示例。如果您正苦於以下問題:Python base.Step方法的具體用法?Python base.Step怎麽用?Python base.Step使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在rllab.envs.base的用法示例。


在下文中一共展示了base.Step方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        if isinstance(self._wrapped_env.action_space, Box):
            # rescale the action
            lb, ub = self._wrapped_env.action_space.bounds
            scaled_action = lb + (action + 1.) * 0.5 * (ub - lb)
            scaled_action = np.clip(scaled_action, lb, ub)
        else:
            scaled_action = action
        wrapped_step = self._wrapped_env.step(scaled_action)
        next_obs, reward, done, info = wrapped_step
        info['orig_obs'] = next_obs
        if self._normalize_obs:
            next_obs = self._apply_normalize_obs(next_obs)
        if self._normalize_reward:
            reward = self._apply_normalize_reward(reward)
        return Step(next_obs, reward * self._scale_reward, done, **info) 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:18,代碼來源:normalized_env.py

示例2: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        vel = next_obs[5]
        height = next_obs[0]
        ang = next_obs[1]
        reward = vel - \
            0.5 * self.ctrl_cost_coeff * np.sum(np.square(action / scaling)) -\
            np.sum(np.maximum(np.abs(next_obs[2:]) - 100, 0)) -\
            10*np.maximum(0.45 - height, 0) - \
            10*np.maximum(abs(ang) - .2, 0)
        # notdone = np.isfinite(state).all() and \
        #     (np.abs(state[3:]) < 100).all() and (state[0] > .7) and \
        #     (abs(state[2]) < .2)
        # done = not notdone
        done = False
        return Step(next_obs, reward, done) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:21,代碼來源:com_hopper_env.py

示例3: reset

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def reset(self):
        self.patient.reset()
        self.sensor.reset()
        self.pump.reset()
        self.scenario.reset()
        self._reset()
        CGM = self.sensor.measure(self.patient)
        obs = Observation(CGM=CGM)
        return Step(
            observation=obs,
            reward=0,
            done=False,
            sample_time=self.sample_time,
            patient_name=self.patient.name,
            meal=0,
            patient_state=self.patient.state) 
開發者ID:jxx123,項目名稱:simglucose,代碼行數:18,代碼來源:env.py

示例4: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        forward_reward = np.linalg.norm(self.get_body_comvel("torso"))  # swimmer has no problem of jumping reward
        reward = forward_reward - ctrl_cost
        done = False
        if self.sparse_rew:
            if abs(self.get_body_com("torso")[0]) > 100.0:
                reward = 1.0
                done = True
            else:
                reward = 0.
        com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1)
        ori = self.get_ori()
        return Step(next_obs, reward, done, com=com, ori=ori) 
開發者ID:florensacc,項目名稱:snn4hrl,代碼行數:21,代碼來源:swimmer_env.py

示例5: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        comvel = self.get_body_comvel("torso")
        forward_reward = comvel[0]
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
        contact_cost = 0.5 * 1e-3 * np.sum(
            np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
        survive_reward = 0.05
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self._state
        notdone = np.isfinite(state).all() \
            and state[2] >= 0.2 and state[2] <= 1.0
        done = not notdone
        ob = self.get_current_obs()
        return Step(ob, float(reward), done) 
開發者ID:florensacc,項目名稱:snn4hrl,代碼行數:19,代碼來源:regular_ant_env.py

示例6: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        next_obs, reward, done, info = self.env.step(action)
        return Step(next_obs, reward, done, **info) 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:5,代碼來源:gym_env.py

示例7: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        forward_reward = self.get_body_comvel("torso")[0]
        reward = forward_reward - ctrl_cost
        done = False
        return Step(next_obs, reward, done) 
開發者ID:ahq1993,項目名稱:inverse_rl,代碼行數:13,代碼來源:swimmer_env.py

示例8: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        sess = tf.get_default_session()
        action = np.clip(action, *self.action_space.bounds)
        index = np.random.randint(self.n_models)
        next_observation = sess.run(self.dynamics_outs[index],
                                    feed_dict={self.dynamics_in: np.concatenate([self._state, action])[None]})
        reward = - self.cost_np(self._state[None], action[None], next_observation)
        done = self.is_done(self._state[None], next_observation)[0]
        self._state = np.reshape(next_observation, -1)
        return Step(observation=self._state, reward=reward, done=done) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:12,代碼來源:env_helpers.py

示例9: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        # ctrl_cost = - 0.5 * self.ctrl_cost_coeff * np.sum(
        # np.log(1+1e-8 -(action/scaling)**2))
        forward_reward = self.get_body_comvel("torso")[0]
        reward = forward_reward - ctrl_cost
        done = False
        # Modified reward here
        return Step(next_obs, reward, done) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:16,代碼來源:com_swimmer_env.py

示例10: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)

        next_obs = self.get_current_obs()

        alive_bonus = self.alive_bonus
        data = self.model.data

        # comvel = self.get_body_comvel("torso")
        idx = self.model.geom_names.index("head")
        head_h = self.model.data.geom_xpos[idx][-1]

        # lin_vel_reward = comvel[0]
        lb, ub = self.action_bounds
        scaling = (ub - lb) * 0.5
        ctrl_cost = 1e-2 * self.ctrl_cost_coeff * np.sum(
            np.square(action / scaling))
        # impact_cost = .5 * self.impact_cost_coeff * np.sum(
        #     np.square(np.clip(data.cfrc_ext, -1, 1)))
        # vel_deviation_cost = 0.5 * self.vel_deviation_cost_coeff * np.sum(
        #     np.square(comvel[1:]))
        # reward = lin_vel_reward + alive_bonus - ctrl_cost - \
        #     impact_cost - vel_deviation_cost
        # done = data.qpos[2] < 0.8 or data.qpos[2] > 2.0
        reward = -(head_h - 1.5) ** 2 - ctrl_cost
        done = False

        return Step(next_obs, reward, done) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:30,代碼來源:com_simple_humanoid_env.py

示例11: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        assert np.all(self.qpos) and np.all(self.qvel) and np.all(self.goal), \
            "call env.reset before step."
        # Clipping action
        action = np.clip(action, *self.action_space.bounds)
        action = np.reshape(action, -1)
        qpos = self.qpos
        qvel = self.qvel
        for i in range(self.frame_skip):
            qpos = np.clip(qpos + qvel*self.dt, *self.boundary)
            # qvel = np.clip(qvel + (action/self.mass)*self.dt, *self.vel_bounds)
            qvel = np.clip(self.A@qvel + self.B@action + self.c, *self.vel_bounds)
        self.qpos = qpos
        self.qvel = qvel
        return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:17,代碼來源:point_mass_env.py

示例12: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        action = np.clip(action, *self.action_bounds)
        ctrl_cost = self.ctrl_cost_coeff * 0.5 * np.sum(np.square(action))
        run_cost = -1 * self.get_body_comvel("torso")[0]
        cost = ctrl_cost + run_cost
        reward = -cost
        reward = np.clip(reward, -10, 10)
        done = False
        return Step(next_obs, reward, done) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:13,代碼來源:com_half_cheetah_env.py

示例13: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        assert self.state is not None, "call env.reset before step."
        # Clipping action
        action = np.clip(action, *self.action_space.bounds)
        action = np.reshape(action, -1)
        next_state = self.transition['A']@self.state + \
                     self.transition['B']@action + \
                     self.transition['c']
        next_state = np.clip(next_state, *self.observation_space.bounds)
        self.state = next_state
        return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False) 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:13,代碼來源:point2D_env.py

示例14: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        
        ball_position = self.get_body_com("object")
        
        mitt_position = self.get_body_com("jaco_link_finger_1") + self.get_body_com("jaco_link_finger_2") + self.get_body_com("jaco_link_finger_3")
        mitt_position = mitt_position/3.
        difference = np.linalg.norm((ball_position - mitt_position))
        
        reward = 1 if (difference < .15 and ball_position[2] > .05) else 0
        
        done = False
        return Step(self.get_current_obs(), float(reward), done, distance=difference, in_hand=(reward == 1)) 
開發者ID:KamyarGh,項目名稱:rl_swiss,代碼行數:15,代碼來源:catch.py

示例15: step

# 需要導入模塊: from rllab.envs import base [as 別名]
# 或者: from rllab.envs.base import Step [as 別名]
def step(self, action):
        self.forward_dynamics(action)
        next_obs = self.get_current_obs()
        action = np.clip(action, *self.action_bounds)
        ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action))
        run_cost = 1.*np.abs(self.get_body_comvel("torso")[0] - self._goal_vel)
        cost = ctrl_cost + run_cost
        reward = -cost
        done = False
        return Step(next_obs, reward, done) 
開發者ID:KamyarGh,項目名稱:rl_swiss,代碼行數:12,代碼來源:half_cheetah_env_rand.py


注:本文中的rllab.envs.base.Step方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。