本文整理汇总了Python中gym.wrappers.time_limit.TimeLimit方法的典型用法代码示例。如果您正苦于以下问题:Python time_limit.TimeLimit方法的具体用法?Python time_limit.TimeLimit怎么用?Python time_limit.TimeLimit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.wrappers.time_limit
的用法示例。
在下文中一共展示了time_limit.TimeLimit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_is_unwrappable_to
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def test_is_unwrappable_to():
assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper)
env = feature_wrapper.make('FrozenLake-v0')
reward_function = FeatureBasedRewardFunction(env, 'random')
env = RewardWrapper(env, reward_function)
assert is_unwrappable_to(env, RewardWrapper)
assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
assert is_unwrappable_to(env, DiscreteEnv)
assert is_unwrappable_to(env, gym.Env)
示例2: make
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id):
logger.info('Making new env: %s', id)
spec = self.spec(id)
env = spec.make()
# We used to have people override _reset/_step rather than
# reset/step. Set _gym_disable_underscore_compat = True on
# your environment if you use these methods and don't want
# compatibility code to be invoked.
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
patch_deprecated_methods(env)
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
示例3: make
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id, kwargs):
logger.info('Making new env: %s', id)
spec = registration.spec(id)
env = spec.make()
# We used to have people override _reset/_step rather than
# reset/step. Set _gym_disable_underscore_compat = True on
# your environment if you use these methods and don't want
# compatibility code to be invoked.
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
patch_deprecated_methods(env)
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
示例4: __init__
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, id, entry_point=None, reward_threshold=None, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None):
self.id = id
# Evaluation parameters
self.reward_threshold = reward_threshold
# Environment properties
self.nondeterministic = nondeterministic
self.entry_point = entry_point
if tags is None:
tags = {}
self.tags = tags
tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
self.max_episode_steps = max_episode_steps
# We may make some of these other parameters public if they're
# useful.
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
self._env_name = match.group(1)
self._kwargs = {} if kwargs is None else kwargs
示例5: make
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, path, **kwargs):
if len(kwargs) > 0:
logger.info('Making new env: %s (%s)', path, kwargs)
else:
logger.info('Making new env: %s', path)
spec = self.spec(path)
env = spec.make(**kwargs)
# We used to have people override _reset/_step rather than
# reset/step. Set _gym_disable_underscore_compat = True on
# your environment if you use these methods and don't want
# compatibility code to be invoked.
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
patch_deprecated_methods(env)
if (env.spec.max_episode_steps is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps)
return env
示例6: create_multi_agent_curried_policy_wrapper
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def create_multi_agent_curried_policy_wrapper(
mon_dir, env_name, num_envs, embed_index, max_steps, state_shape=None, add_zoo=False, num_zoo=5
):
def episode_limit(env):
return time_limit.TimeLimit(env, max_episode_steps=max_steps)
def env_fn(i):
return make_env(env_name, seed=42, i=i, out_dir=mon_dir, pre_wrappers=[episode_limit])
vec_env = make_dummy_vec_multi_env([lambda: env_fn(i) for i in range(num_envs)])
zoo = load_policy(
policy_path="1",
policy_type="zoo",
env=vec_env,
env_name=env_name,
index=1 - embed_index,
transparent_params=None,
)
half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index)
policies = [
_get_constant_policy(
half_env, constant_value=half_env.action_space.sample(), state_shape=state_shape
)
for _ in range(10)
]
if add_zoo:
policies += [zoo] * num_zoo
policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs)
vec_env = CurryVecEnv(
venv=vec_env, policy=policy_wrapper, agent_idx=embed_index, deterministic=False
)
vec_env = FlattenSingletonVecEnv(vec_env)
yield vec_env, policy_wrapper, zoo
policy_wrapper.close()
示例7: test_lstm_train
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def test_lstm_train():
"""Test that LSTM models are able to achieve >=150 (out of 500) reward on CartPoleNoVelEnv.
This environment requires memory to perform well in."""
def make_env(i):
env = CartPoleNoVelEnv()
env = TimeLimit(env, max_episode_steps=500)
env = bench.Monitor(env, None, allow_early_resets=True)
env.seed(i)
return env
env = SubprocVecEnv([lambda: make_env(i) for i in range(NUM_ENVS)])
env = VecNormalize(env)
model = PPO2(MlpLstmPolicy, env, n_steps=128, nminibatches=NUM_ENVS, lam=0.95, gamma=0.99,
noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1)
eprewmeans = []
def reward_callback(local, _):
nonlocal eprewmeans
eprewmeans.append(safe_mean([ep_info['r'] for ep_info in local['ep_info_buf']]))
model.learn(total_timesteps=100000, callback=reward_callback)
# Maximum episode reward is 500.
# In CartPole-v1, a non-recurrent policy can easily get >= 450.
# In CartPoleNoVelEnv, a non-recurrent policy doesn't get more than ~50.
# LSTM policies can reach above 400, but it varies a lot between runs; consistently get >=150.
# See PR #244 for more detailed benchmarks.
average_reward = sum(eprewmeans[-NUM_EPISODES_FOR_SCORE:]) / NUM_EPISODES_FOR_SCORE
assert average_reward >= 150, "Mean reward below 150; per-episode rewards {}".format(average_reward)
示例8: __new__
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __new__(cls, *args, **kwargs):
"""Returns environment specific wrapper based on input environment type.
Args:
args: positional arguments
kwargs: keyword arguments
Returns:
garage.envs.bullet.BulletEnv: if the environment is a bullet-based
environment. Else returns a garage.envs.GarageEnv
"""
# Determine if the input env is a bullet-based gym environment
env = None
if 'env' in kwargs: # env passed as a keyword arg
env = kwargs['env']
elif len(args) >= 1 and isinstance(args[0], TimeLimit):
# env passed as a positional arg
# only checks env created by gym.make(), which has type TimeLimit
env = args[0]
if env and any(env.env.spec.id == name
for name in _get_bullet_env_list()):
return BulletEnv(env)
env_name = ''
if 'env_name' in kwargs: # env_name as a keyword arg
env_name = kwargs['env_name']
elif len(args) >= 2:
# env_name as a positional arg
env_name = args[1]
if env_name != '' and any(env_name == name
for name in _get_bullet_env_list()):
return BulletEnv(gym.make(env_name))
return super(GarageEnv, cls).__new__(cls)
示例9: __init__
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, env=None, env_name='', is_image=False):
"""Initializes a GarageEnv.
Args:
env (gym.wrappers.time_limit): A gym.wrappers.time_limit.TimeLimit
object wrapping a gym.Env created via gym.make().
env_name (str): If the env_name is speficied, a gym environment
with that name will be created. If such an environment does not
exist, a `gym.error` is thrown.
is_image (bool): True if observations contain pixel values,
false otherwise. Setting this to true converts a gym.Spaces.Box
obs space to an akro.Image and normalizes pixel values.
"""
# Needed for deserialization
self._env_name = env_name
self._env = env
if env_name:
super().__init__(gym.make(env_name))
else:
super().__init__(env)
self.action_space = akro.from_gym(self.env.action_space)
self.observation_space = akro.from_gym(self.env.observation_space,
is_image=is_image)
self._spec = EnvSpec(action_space=self.action_space,
observation_space=self.observation_space)
示例10: step
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def step(self, action):
"""Call step on wrapped env.
This method is necessary to suppress a deprecated warning
thrown by gym.Wrapper.
Args:
action (np.ndarray): An action provided by the agent.
Returns:
np.ndarray: Agent's observation of the current environment
float: Amount of reward returned after previous action
bool: Whether the episode has ended, in which case further step()
calls will return undefined results
dict: Contains auxiliary diagnostic information (helpful for
debugging, and sometimes learning)
"""
observation, reward, done, info = self.env.step(action)
# gym envs that are wrapped in TimeLimit wrapper modify
# the done/termination signal to be true whenever a time
# limit expiration occurs. The following statement sets
# the done signal to be True only if caused by an
# environment termination, and not a time limit
# termination. The time limit termination signal
# will be saved inside env_infos as
# 'GarageEnv.TimeLimitTerminated'
if 'TimeLimit.truncated' in info:
info['GarageEnv.TimeLimitTerminated'] = done # done = True always
done = not info['TimeLimit.truncated']
return observation, reward, done, info
示例11: __init__
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
self.id = id
# Evaluation parameters
self.trials = trials
self.reward_threshold = reward_threshold
# Environment properties
self.nondeterministic = nondeterministic
if tags is None:
tags = {}
self.tags = tags
# BACKWARDS COMPAT 2017/1/18
if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
# TODO: Add the following deprecation warning after 2017/02/18
# warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")
tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
######
# BACKWARDS COMPAT 2017/1/31
if timestep_limit is not None:
max_episode_steps = timestep_limit
# TODO: Add the following deprecation warning after 2017/03/01
# warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
######
self.max_episode_steps = max_episode_steps
self.max_episode_seconds = max_episode_seconds
# We may make some of these other parameters public if they're
# useful.
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
self._env_name = match.group(1)
self._entry_point = entry_point
self._local_only = local_only
self._kwargs = {} if kwargs is None else kwargs
示例12: make
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id):
logger.info('Making new env: %s', id)
spec = self.spec(id)
env = spec.make()
if hasattr(env, "_reset") and hasattr(env, "_step"):
patch_deprecated_methods(env)
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
示例13: meta_reset
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def meta_reset(self, seed):
np.random.seed(seed)
env = RandomWeightHopperEnv(rand_mass=self.rand_mass,
rand_gravity=self.rand_gravity,
rand_friction=self.rand_friction,
rand_thickness=self.rand_thickness)
# Based on Hopper-v2
spec = EnvSpec(
'RandomWeightHopperEnv-v0',
entry_point='generic_rl.envs.mujoco:RandomWeightHopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0
)
env._spec = spec
env.seed(seed)
# Wrap the env as needed
env = TimeLimit(
env,
max_episode_steps=spec.max_episode_steps,
max_episode_seconds=spec.max_episode_seconds
)
self.env = env
# Fix for done flags.
self.env.reset()
self.step = env.step
self.render = env.render
self.reset = env.reset
示例14: _make
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def _make(id_, env_kwargs=None):
"""
Recreating the gym make function from gym/envs/registration.py
as such as it can support extra arguments for the environment
:param id_: (str) The environment ID
:param env_kwargs: (dict) The extra arguments for the environment
"""
if env_kwargs is None:
env_kwargs = {}
# getting the spec from the ID we want
spec = registry.spec(id_)
# Keeping the checks and safe guards of the old code
assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \
'(HINT: is there a newer registered version of this env?)'.format(spec.id_)
if callable(spec._entry_point):
env = spec._entry_point(**env_kwargs)
else:
cls = load(spec._entry_point)
# create the env, with the original kwargs, and the new ones overriding them if needed
env = cls(**{**spec._kwargs, **env_kwargs})
# Make the enviroment aware of which spec it came from.
env.unwrapped.spec = spec
# Keeping the old patching system for _reset, _step and timestep limit
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
patch_deprecated_methods(env)
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
示例15: get_timesteps_per_episode
# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def get_timesteps_per_episode(env):
if hasattr(env, "_max_episode_steps"):
return env._max_episode_steps
if hasattr(env, "spec"):
return env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")
if hasattr(env, "env"):
return get_timesteps_per_episode(env.env)
return None