本文整理汇总了Python中gym.Env方法的典型用法代码示例。如果您正苦于以下问题:Python gym.Env方法的具体用法?Python gym.Env怎么用?Python gym.Env使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym
的用法示例。
在下文中一共展示了gym.Env方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_wrapper_by_name
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def get_wrapper_by_name(env, classname):
"""Given an a gym environment possibly wrapped multiple times, returns a wrapper
of class named classname or raises ValueError if no such wrapper was applied
Parameters
----------
env: gym.Env of gym.Wrapper
gym environment
classname: str
name of the wrapper
Returns
-------
wrapper: gym.Wrapper
wrapper named classname
"""
currentenv = env
while True:
if classname == currentenv.class_name():
return currentenv
elif isinstance(currentenv, gym.Wrapper):
currentenv = currentenv.env
else:
raise ValueError("Couldn't find wrapper named %s" % classname)
示例2: make_mujoco_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_mujoco_env(env_id, seed, reward_scale=1.0):
"""
Create a wrapped, monitored gym.Env for MuJoCo.
"""
rank = MPI.COMM_WORLD.Get_rank()
myseed = seed + 1000 * rank if seed is not None else None
set_global_seeds(myseed)
env = gym.make(env_id)
logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
env = Monitor(env, logger_path, allow_early_resets=True)
env.seed(seed)
if reward_scale != 1.0:
from baselines.common.retro_wrappers import RewardScaler
env = RewardScaler(env, reward_scale)
return env
示例3: run
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def run(self, *agent: Agent, **agents: Agent) -> Union[float, Dict[str, float]]:
"""Run one agent or multiple named agents
Parameters
----------
*agent: Agent (optional)
the agent to play a single-agent environment
**agents: Agent
the named agents to play a multi-agent environment
Returns
-------
float:
the mean reward (possibly for each agent)
"""
san = "single_agent_name"
sum_rewards: Dict[str, float] = {name: 0.0 for name in agents} if agents else {san: 0.0}
for _ in range(self.num_repetitions):
rewards = self._run_once(*agent, **agents)
for name, value in rewards.items():
sum_rewards[name] += value
mean_rewards = {name: float(value) / self.num_repetitions for name, value in sum_rewards.items()}
if isinstance(self.env, gym.Env):
return mean_rewards[san]
return mean_rewards
示例4: __init__
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self,
env: gym.Env,
parameters: Union[None, str, np.ndarray] = None,
action_in_domain: bool = False,
next_state_in_domain: bool = False):
""" The abstract base class for reward functions
Parameters
----------
env: gym.Env
A gym environment for which the reward function is defined.
parameters: Union[None, str, np.ndarray]
A numpy ndarray containing the parameters. If value is 'random',
initializes with random parameters (mean 0, standard deviation 1).
action_in_domain: bool
Indicates whether actions are in the domain, i.e. R(s, a) or R(s, a, s')
next_state_in_domain: bool
Indicates whether next states are in the domain, i.e. R(s, a, s')
"""
self.env = env
self.action_in_domain = action_in_domain
if next_state_in_domain:
assert action_in_domain
self.next_state_in_domain = next_state_in_domain
self.parameters = parameters
示例5: unwrap_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def unwrap_env(env: gym.Env,
until_class: Union[None, gym.Env] = None) -> gym.Env:
"""Unwrap wrapped env until we get an instance that is a until_class.
If until_class is None, env will be unwrapped until the lowest layer.
"""
if until_class is None:
while hasattr(env, 'env'):
env = env.env
return env
while hasattr(env, 'env') and not isinstance(env, until_class):
env = env.env
if not isinstance(env, until_class):
raise ValueError(
"Unwrapping env did not yield an instance of class {}".format(
until_class))
return env
示例6: test_is_unwrappable_to
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def test_is_unwrappable_to():
assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper)
env = feature_wrapper.make('FrozenLake-v0')
reward_function = FeatureBasedRewardFunction(env, 'random')
env = RewardWrapper(env, reward_function)
assert is_unwrappable_to(env, RewardWrapper)
assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
assert is_unwrappable_to(env, DiscreteEnv)
assert is_unwrappable_to(env, gym.Env)
示例7: make_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_env(env_id: str):
"""Make a basic gym environment, without any special wrappers.
Parameters
----------
env_id: str
The environment's id, e.g. 'FrozenLake-v0'.
Returns
-------
gym.Env
A gym environment.
"""
assert env_id in ENV_IDS
if not env_id in ENV_IDS_NON_GYM:
env = gym.make(env_id)
else:
if env_id == 'MazeWorld0-v0':
env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200)
elif env_id == 'MazeWorld1-v0':
env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200)
else:
raise NotImplementedError()
return env
示例8: init_rl
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def init_rl(
env: Union[gym.Env, VecEnv],
model_class: Type[BaseRLModel] = stable_baselines.PPO2,
policy_class: Type[BasePolicy] = MlpPolicy,
**model_kwargs,
):
"""Instantiates a policy for the provided environment.
Args:
env: The (vector) environment.
model_class: A Stable Baselines RL algorithm.
policy_class: A Stable Baselines compatible policy network class.
model_kwargs (dict): kwargs passed through to the algorithm.
Note: anything specified in `policy_kwargs` is passed through by the
algorithm to the policy network.
Returns:
An RL algorithm.
"""
return model_class(
policy_class, env, **model_kwargs
) # pytype: disable=not-instantiable
示例9: test_model_based
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def test_model_based(env: gym.Env) -> None:
"""Smoke test for each of the ModelBasedEnv methods with type checks.
Raises:
AssertionError if test fails.
"""
state = env.initial_state()
assert env.state_space.contains(state)
action = env.action_space.sample()
new_state = env.transition(state, action)
assert env.state_space.contains(new_state)
reward = env.reward(state, action, new_state)
assert isinstance(reward, float)
done = env.terminal(state, 0)
assert isinstance(done, bool)
obs = env.obs_from_state(state)
assert env.observation_space.contains(obs)
next_obs = env.obs_from_state(new_state)
assert env.observation_space.contains(next_obs)
示例10: __init__
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self, verbose: int = 0):
super(BaseCallback, self).__init__()
# The RL model
self.model = None # type: Optional[BaseRLModel]
# An alias for self.model.get_env(), the environment used for training
self.training_env = None # type: Union[gym.Env, VecEnv, None]
# Number of time the callback was called
self.n_calls = 0 # type: int
# n_envs * n times env.step() was called
self.num_timesteps = 0 # type: int
self.verbose = verbose
self.locals = None # type: Optional[Dict[str, Any]]
self.globals = None # type: Optional[Dict[str, Any]]
self.logger = None # type: Optional[logger.Logger]
# Sometimes, for event callback, it is useful
# to have access to the parent object
self.parent = None # type: Optional[BaseCallback]
# Type hint as string to avoid circular import
示例11: sync_envs_normalization
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def sync_envs_normalization(env: Union[gym.Env, VecEnv], eval_env: Union[gym.Env, VecEnv]) -> None:
"""
Sync eval and train environments when using VecNormalize
:param env: (Union[gym.Env, VecEnv]))
:param eval_env: (Union[gym.Env, VecEnv]))
"""
env_tmp, eval_env_tmp = env, eval_env
# Special case for the _UnvecWrapper
# Avoid circular import
from stable_baselines.common.base_class import _UnvecWrapper
if isinstance(env_tmp, _UnvecWrapper):
return
while isinstance(env_tmp, VecEnvWrapper):
if isinstance(env_tmp, VecNormalize):
# sync reward and observation scaling
eval_env_tmp.obs_rms = deepcopy(env_tmp.obs_rms)
eval_env_tmp.ret_rms = deepcopy(env_tmp.ret_rms)
env_tmp = env_tmp.venv
# Make pytype happy, in theory env and eval_env have the same type
assert isinstance(eval_env_tmp, VecEnvWrapper), "the second env differs from the first env"
eval_env_tmp = eval_env_tmp.venv
示例12: make_robotics_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True):
"""
Create a wrapped, monitored gym.Env for MuJoCo.
:param env_id: (str) the environment ID
:param seed: (int) the initial seed for RNG
:param rank: (int) the rank of the environment (for logging)
:param allow_early_resets: (bool) allows early reset of the environment
:return: (Gym Environment) The robotic environment
"""
set_global_seeds(seed)
env = gym.make(env_id)
keys = ['observation', 'desired_goal']
# TODO: remove try-except once most users are running modern Gym
try: # for modern Gym (>=0.15.4)
from gym.wrappers import FilterObservation, FlattenObservation
env = FlattenObservation(FilterObservation(env, keys))
except ImportError: # for older gym (<=0.15.3)
from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error
env = FlattenDictWrapper(env, keys)
env = Monitor(
env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
info_keywords=('is_success',), allow_early_resets=allow_early_resets)
env.seed(seed)
return env
示例13: __init__
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self, *, env: Union[gym.Env, VecEnv], model: 'BaseRLModel', n_steps: int):
"""
Collect experience by running `n_steps` in the environment.
Note: if this is a `VecEnv`, the total number of steps will
be `n_steps * n_envs`.
:param env: (Union[gym.Env, VecEnv]) The environment to learn from
:param model: (BaseRLModel) The model to learn
:param n_steps: (int) The number of steps to run for each environment
"""
self.env = env
self.model = model
n_envs = env.num_envs
self.batch_ob_shape = (n_envs * n_steps,) + env.observation_space.shape
self.obs = np.zeros((n_envs,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
self.obs[:] = env.reset()
self.n_steps = n_steps
self.states = model.initial_state
self.dones = [False for _ in range(n_envs)]
self.callback = None # type: Optional[BaseCallback]
self.continue_training = True
self.n_envs = n_envs
示例14: step
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def step(self, action):
"""gym.Env step function.
Args:
action (int): action taken.
Returns:
np.ndarray: augmented observation.
float: reward.
bool: terminal signal.
dict: environment info.
"""
next_obs, reward, done, info = self.env.step(action)
next_obs = np.concatenate([next_obs, action, [reward], [done]])
return next_obs, reward, done, info
示例15: make_mujoco_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_mujoco_env(env_id, seed):
"""
Create a wrapped, monitored gym.Env for MuJoCo.
"""
set_global_seeds(seed)
env = gym.make(env_id)
env = Monitor(env, logger.get_dir())
env.seed(seed)
return env