本文整理汇总了Python中rllab.envs.base.Step方法的典型用法代码示例。如果您正苦于以下问题:Python base.Step方法的具体用法?Python base.Step怎么用?Python base.Step使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.envs.base
的用法示例。
在下文中一共展示了base.Step方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
if isinstance(self._wrapped_env.action_space, Box):
# rescale the action
lb, ub = self._wrapped_env.action_space.bounds
scaled_action = lb + (action + 1.) * 0.5 * (ub - lb)
scaled_action = np.clip(scaled_action, lb, ub)
else:
scaled_action = action
wrapped_step = self._wrapped_env.step(scaled_action)
next_obs, reward, done, info = wrapped_step
info['orig_obs'] = next_obs
if self._normalize_obs:
next_obs = self._apply_normalize_obs(next_obs)
if self._normalize_reward:
reward = self._apply_normalize_reward(reward)
return Step(next_obs, reward * self._scale_reward, done, **info)
示例2: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
vel = next_obs[5]
height = next_obs[0]
ang = next_obs[1]
reward = vel - \
0.5 * self.ctrl_cost_coeff * np.sum(np.square(action / scaling)) -\
np.sum(np.maximum(np.abs(next_obs[2:]) - 100, 0)) -\
10*np.maximum(0.45 - height, 0) - \
10*np.maximum(abs(ang) - .2, 0)
# notdone = np.isfinite(state).all() and \
# (np.abs(state[3:]) < 100).all() and (state[0] > .7) and \
# (abs(state[2]) < .2)
# done = not notdone
done = False
return Step(next_obs, reward, done)
示例3: reset
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def reset(self):
self.patient.reset()
self.sensor.reset()
self.pump.reset()
self.scenario.reset()
self._reset()
CGM = self.sensor.measure(self.patient)
obs = Observation(CGM=CGM)
return Step(
observation=obs,
reward=0,
done=False,
sample_time=self.sample_time,
patient_name=self.patient.name,
meal=0,
patient_state=self.patient.state)
示例4: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
np.square(action / scaling))
forward_reward = np.linalg.norm(self.get_body_comvel("torso")) # swimmer has no problem of jumping reward
reward = forward_reward - ctrl_cost
done = False
if self.sparse_rew:
if abs(self.get_body_com("torso")[0]) > 100.0:
reward = 1.0
done = True
else:
reward = 0.
com = np.concatenate([self.get_body_com("torso").flat]).reshape(-1)
ori = self.get_ori()
return Step(next_obs, reward, done, com=com, ori=ori)
示例5: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
comvel = self.get_body_comvel("torso")
forward_reward = comvel[0]
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1))),
survive_reward = 0.05
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self._state
notdone = np.isfinite(state).all() \
and state[2] >= 0.2 and state[2] <= 1.0
done = not notdone
ob = self.get_current_obs()
return Step(ob, float(reward), done)
示例6: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
next_obs, reward, done, info = self.env.step(action)
return Step(next_obs, reward, done, **info)
示例7: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
np.square(action / scaling))
forward_reward = self.get_body_comvel("torso")[0]
reward = forward_reward - ctrl_cost
done = False
return Step(next_obs, reward, done)
示例8: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
sess = tf.get_default_session()
action = np.clip(action, *self.action_space.bounds)
index = np.random.randint(self.n_models)
next_observation = sess.run(self.dynamics_outs[index],
feed_dict={self.dynamics_in: np.concatenate([self._state, action])[None]})
reward = - self.cost_np(self._state[None], action[None], next_observation)
done = self.is_done(self._state[None], next_observation)[0]
self._state = np.reshape(next_observation, -1)
return Step(observation=self._state, reward=reward, done=done)
示例9: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
np.square(action / scaling))
# ctrl_cost = - 0.5 * self.ctrl_cost_coeff * np.sum(
# np.log(1+1e-8 -(action/scaling)**2))
forward_reward = self.get_body_comvel("torso")[0]
reward = forward_reward - ctrl_cost
done = False
# Modified reward here
return Step(next_obs, reward, done)
示例10: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
alive_bonus = self.alive_bonus
data = self.model.data
# comvel = self.get_body_comvel("torso")
idx = self.model.geom_names.index("head")
head_h = self.model.data.geom_xpos[idx][-1]
# lin_vel_reward = comvel[0]
lb, ub = self.action_bounds
scaling = (ub - lb) * 0.5
ctrl_cost = 1e-2 * self.ctrl_cost_coeff * np.sum(
np.square(action / scaling))
# impact_cost = .5 * self.impact_cost_coeff * np.sum(
# np.square(np.clip(data.cfrc_ext, -1, 1)))
# vel_deviation_cost = 0.5 * self.vel_deviation_cost_coeff * np.sum(
# np.square(comvel[1:]))
# reward = lin_vel_reward + alive_bonus - ctrl_cost - \
# impact_cost - vel_deviation_cost
# done = data.qpos[2] < 0.8 or data.qpos[2] > 2.0
reward = -(head_h - 1.5) ** 2 - ctrl_cost
done = False
return Step(next_obs, reward, done)
示例11: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
assert np.all(self.qpos) and np.all(self.qvel) and np.all(self.goal), \
"call env.reset before step."
# Clipping action
action = np.clip(action, *self.action_space.bounds)
action = np.reshape(action, -1)
qpos = self.qpos
qvel = self.qvel
for i in range(self.frame_skip):
qpos = np.clip(qpos + qvel*self.dt, *self.boundary)
# qvel = np.clip(qvel + (action/self.mass)*self.dt, *self.vel_bounds)
qvel = np.clip(self.A@qvel + self.B@action + self.c, *self.vel_bounds)
self.qpos = qpos
self.qvel = qvel
return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False)
示例12: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
action = np.clip(action, *self.action_bounds)
ctrl_cost = self.ctrl_cost_coeff * 0.5 * np.sum(np.square(action))
run_cost = -1 * self.get_body_comvel("torso")[0]
cost = ctrl_cost + run_cost
reward = -cost
reward = np.clip(reward, -10, 10)
done = False
return Step(next_obs, reward, done)
示例13: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
assert self.state is not None, "call env.reset before step."
# Clipping action
action = np.clip(action, *self.action_space.bounds)
action = np.reshape(action, -1)
next_state = self.transition['A']@self.state + \
self.transition['B']@action + \
self.transition['c']
next_state = np.clip(next_state, *self.observation_space.bounds)
self.state = next_state
return Step(observation=self.get_obs(), reward=self.get_reward(action), done=False)
示例14: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
ball_position = self.get_body_com("object")
mitt_position = self.get_body_com("jaco_link_finger_1") + self.get_body_com("jaco_link_finger_2") + self.get_body_com("jaco_link_finger_3")
mitt_position = mitt_position/3.
difference = np.linalg.norm((ball_position - mitt_position))
reward = 1 if (difference < .15 and ball_position[2] > .05) else 0
done = False
return Step(self.get_current_obs(), float(reward), done, distance=difference, in_hand=(reward == 1))
示例15: step
# 需要导入模块: from rllab.envs import base [as 别名]
# 或者: from rllab.envs.base import Step [as 别名]
def step(self, action):
self.forward_dynamics(action)
next_obs = self.get_current_obs()
action = np.clip(action, *self.action_bounds)
ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action))
run_cost = 1.*np.abs(self.get_body_comvel("torso")[0] - self._goal_vel)
cost = ctrl_cost + run_cost
reward = -cost
done = False
return Step(next_obs, reward, done)