本文整理汇总了Python中gym.wrappers.TimeLimit方法的典型用法代码示例。如果您正苦于以下问题:Python wrappers.TimeLimit方法的具体用法?Python wrappers.TimeLimit怎么用?Python wrappers.TimeLimit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.wrappers
的用法示例。
在下文中一共展示了wrappers.TimeLimit方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_env
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def make_env(env_id: str):
"""Make a basic gym environment, without any special wrappers.
Parameters
----------
env_id: str
The environment's id, e.g. 'FrozenLake-v0'.
Returns
-------
gym.Env
A gym environment.
"""
assert env_id in ENV_IDS
if not env_id in ENV_IDS_NON_GYM:
env = gym.make(env_id)
else:
if env_id == 'MazeWorld0-v0':
env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200)
elif env_id == 'MazeWorld1-v0':
env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200)
else:
raise NotImplementedError()
return env
示例2: _attach_env_methods
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def _attach_env_methods(self):
"""Attach self._env_step to the TimeLimit wrapper of the environment or if not present, to
the unwrapped environment. Attach to TimeLimit in order to track the true env reset signals"""
# Get the TimeLimit Wrapper or the unwrapped env
currentenv = self.env
while True:
if isinstance(currentenv, (TimeLimit, MaxEpisodeLen)):
break
elif isinstance(currentenv, Wrapper):
currentenv = currentenv.env
else:
break
# Attach the **env** step function
self.base_env = currentenv
self.base_env_step = self.base_env.step
self.base_env_reset = self.base_env.reset
self.base_env_render = self.base_env.render
self.base_env.step = self._env_step
self.base_env.reset = self._env_reset
self.base_env.render = self._env_render
#pylint: disable=method-hidden
示例3: __init__
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def __init__(self, env, max_steps=1000, test_mode=False):
assert isinstance(env.observation_space, gym.spaces.Box)
# Add a time feature to the observation
low, high = env.observation_space.low, env.observation_space.high
low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
super(TimeFeatureWrapper, self).__init__(env)
if isinstance(env, TimeLimit):
self._max_steps = env._max_episode_steps
else:
self._max_steps = max_steps
self._current_step = 0
self._test_mode = test_mode
示例4: get_create_env_fun
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def get_create_env_fun(batch_env_fn, time_limit):
"""TODO(konradczechowski): Add doc-string."""
def create_env_fun(game_name, sticky_actions=True):
del game_name, sticky_actions
batch_env = batch_env_fn(in_graph=False)
env = FlatBatchEnv(batch_env)
env = TimeLimit(env, max_episode_steps=time_limit)
env = ResizeObservation(env) # pylint: disable=redefined-variable-type
env = GameOverOnDone(env)
return env
return create_env_fun
示例5: __init__
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def __init__(self, env, max_steps=None, key='reset'):
super().__init__(env)
self.reset_key = key
from gym.wrappers import TimeLimit
self.enforce = bool(max_steps)
if max_steps is None:
tl = get_wrapper_by_class(env, TimeLimit)
max_steps = 1 << 31 if tl is None else tl._max_episode_steps
# print("TimeLimitResetWrapper.max_steps =", max_steps)
self.max_steps = max_steps
self.t = 0
示例6: mujoco_wrapper
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def mujoco_wrapper(entry_point, **kwargs):
normalization_scale = kwargs.pop('normalization_scale', 1.)
max_episode_steps = kwargs.pop('max_episode_steps', 200)
# Load the environment from its entry point
env_cls = load(entry_point)
env = env_cls(**kwargs)
# Normalization wrapper
env = NormalizedActionWrapper(env, scale=normalization_scale)
# Time limit
env = TimeLimit(env, max_episode_steps=max_episode_steps)
return env
示例7: _env_step
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def _env_step(self, action):
"""Corresponds to the step function of the TimeLimit wrapper or the unwrapped environment"""
self._before_env_step(action)
# Call the actual env.step function
obs, reward, done, info = self.base_env_step(action)
self._after_env_step(obs, reward, done, info)
return obs, reward, done, info
示例8: __init__
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def __init__(self,
domain,
task,
*args,
env=None,
normalize=True,
observation_keys=None,
unwrap_time_limit=True,
**kwargs):
assert not args, (
"Gym environments don't support args. Use kwargs instead.")
self.normalize = normalize
self.observation_keys = observation_keys
self.unwrap_time_limit = unwrap_time_limit
self._Serializable__initialize(locals())
super(GymAdapter, self).__init__(domain, task, *args, **kwargs)
if env is None:
assert (domain is not None and task is not None), (domain, task)
env_id = f"{domain}-{task}"
env = gym.envs.make(env_id, **kwargs)
else:
assert domain is None and task is None, (domain, task)
if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit:
# Remove the TimeLimit wrapper that sets 'done = True' when
# the time limit specified for each environment has been passed and
# therefore the environment is not Markovian (terminal condition
# depends on time rather than state).
env = env.env
if isinstance(env.observation_space, spaces.Dict):
observation_keys = (
observation_keys or list(env.observation_space.spaces.keys()))
if normalize:
env = NormalizeActionWrapper(env)
self._env = env
示例9: test_ddpg
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def test_ddpg():
import gym_mix
env = gym.make('ContinuousCopyRand-v0')
env = wrappers.TimeLimit(env, max_episode_steps=0)
@model(optimizer=tf.train.AdamOptimizer(0.0001),
tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
def actor(x):
x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer())
a = layers.fully_connected(x, env.action_space.shape[0], None,
weights_initializer=tf.random_normal_initializer(0, 1e-4))
return a
@model(optimizer=tf.train.AdamOptimizer(.001),
tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
def critic(x, a):
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
x = tf.concat([x, a], axis=1)
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4))
return tf.squeeze(q, 1)
agent = DdpgAgent(env, actor, critic)
for ep in range(10000):
R, _ = agent.play_episode()
if ep % 100 == 0:
print(f'Return after episode {ep} is {R}')
示例10: test
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def test(self):
steps = 15
env = gym.make('CartPole-v1')
# unwrap default TimeLimit and wrap with new one to simulate done=True
# at step 5
self.assertIsInstance(env, TimeLimit)
env = env.env # unwrap
env = TimeLimit(env, max_episode_steps=5) # wrap
tmpdir = tempfile.mkdtemp()
try:
env = chainerrl.wrappers.Monitor(
env, directory=tmpdir, video_callable=lambda episode_id: True)
episode_idx = 0
episode_len = 0
t = 0
_ = env.reset()
while True:
_, _, done, info = env.step(env.action_space.sample())
episode_len += 1
t += 1
if episode_idx == 1 and episode_len >= 3:
info['needs_reset'] = True # simulate ContinuingTimeLimit
if done or info.get('needs_reset', False) or t == steps:
if episode_idx + 1 == self.n_episodes or t == steps:
break
env.reset()
episode_idx += 1
episode_len = 0
# `env.close()` is called when `env` is gabage-collected
# (or explicitly deleted/closed).
del(env)
# check if videos & meta files were generated
files = os.listdir(tmpdir)
mp4s = [f for f in files if f.endswith('.mp4')]
metas = [f for f in files if f.endswith('.meta.json')]
stats = [f for f in files if f.endswith('.stats.json')]
manifests = [f for f in files if f.endswith('.manifest.json')]
self.assertEqual(len(mp4s), self.n_episodes)
self.assertEqual(len(metas), self.n_episodes)
self.assertEqual(len(stats), 1)
self.assertEqual(len(manifests), 1)
finally:
shutil.rmtree(tmpdir)
示例11: __init__
# 需要导入模块: from gym import wrappers [as 别名]
# 或者: from gym.wrappers import TimeLimit [as 别名]
def __init__(self, env, log_dir, mode, log_period=None, video_spec=None, eval_period=None):
"""
Args:
log_dir: str. The directory where to save the monitor videos and stats
log_period: int. The period for logging statistic to stdout and to TensorBoard
mode: str. Either 't' (train) or 'e' (eval) for the mode in which to start the monitor
video_spec: lambda, int, False or None. Specifies how often to record episodes.
- If lambda, it must take the episode number and return True/False if a video should be recorded.
- `int` specifies a period in episodes
- `False`, disables video recording
- If `None`, every 1000th episode is recorded
eval_period: int. Required only in evaluation mode. Needed to compute the correct logging step.
"""
assert mode in ['t', 'e']
super().__init__(env)
log_dir = os.path.join(log_dir, "monitor")
video_dir = os.path.join(log_dir, "videos")
# Member data
self.video_dir = video_dir
self.log_dir = log_dir
self.enable_video = self._get_video_callable(video_spec)
# Create the monitor directory
self._make_log_dir()
# Composition objects
self.stats_recorder = StatsRecorder(log_dir, mode, log_period, eval_period)
self.video_plotter = VideoPlotter(self.env, mode=mode)
self.video_recorder = None
# Attach StatsRecorder agent methods
self._before_agent_step = self.stats_recorder.before_agent_step
self._after_agent_step = self.stats_recorder.after_agent_step
self._before_agent_reset = self.stats_recorder.before_agent_reset
self._after_agent_reset = self.stats_recorder.after_agent_reset
# Find TimeLimit wrapper and attach step(), reset() and render()
self.base_env = None
self.base_env_step = None
self.base_env_reset = None
self.base_env_render = None
self._attach_env_methods()
# Attach member methods
self.enable_video_plots = self.video_plotter.activate
self.set_stdout_logs = self.stats_recorder.set_stdout_logs
self.set_summary_getter = self.stats_recorder.set_summary_getter
self.save = self.stats_recorder.save
self.log_stats = self.stats_recorder.log_stats