Python gym.Env方法代码示例

本文整理汇总了Python中gym.Env方法的典型用法代码示例。如果您正苦于以下问题：Python gym.Env方法的具体用法？Python gym.Env怎么用？Python gym.Env使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym的用法示例。

在下文中一共展示了gym.Env方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_wrapper_by_name

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def get_wrapper_by_name(env, classname):
    """Given an a gym environment possibly wrapped multiple times, returns a wrapper
    of class named classname or raises ValueError if no such wrapper was applied

    Parameters
    ----------
    env: gym.Env of gym.Wrapper
        gym environment
    classname: str
        name of the wrapper

    Returns
    -------
    wrapper: gym.Wrapper
        wrapper named classname
    """
    currentenv = env
    while True:
        if classname == currentenv.class_name():
            return currentenv
        elif isinstance(currentenv, gym.Wrapper):
            currentenv = currentenv.env
        else:
            raise ValueError("Couldn't find wrapper named %s" % classname)

开发者ID:Hwhitetooth，项目名称:lirpg，代码行数:26，代码来源:misc_util.py

示例2: make_mujoco_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)

    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)

    return env

开发者ID:MaxSobolMark，项目名称:HardRLWithYoutube，代码行数:19，代码来源:cmd_util.py

示例3: run

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def run(self, *agent: Agent, **agents: Agent) -> Union[float, Dict[str, float]]:
        """Run one agent or multiple named agents

        Parameters
        ----------
        *agent: Agent (optional)
            the agent to play a single-agent environment
        **agents: Agent
            the named agents to play a multi-agent environment

        Returns
        -------
        float:
            the mean reward (possibly for each agent)
        """
        san = "single_agent_name"
        sum_rewards: Dict[str, float] = {name: 0.0 for name in agents} if agents else {san: 0.0}
        for _ in range(self.num_repetitions):
            rewards = self._run_once(*agent, **agents)
            for name, value in rewards.items():
                sum_rewards[name] += value
        mean_rewards = {name: float(value) / self.num_repetitions for name, value in sum_rewards.items()}
        if isinstance(self.env, gym.Env):
            return mean_rewards[san]
        return mean_rewards

开发者ID:facebookresearch，项目名称:nevergrad，代码行数:27，代码来源:base.py

示例4: init

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self,
                 env: gym.Env,
                 parameters: Union[None, str, np.ndarray] = None,
                 action_in_domain: bool = False,
                 next_state_in_domain: bool = False):
        """ The abstract base class for reward functions

        Parameters
        ----------
        env: gym.Env
            A gym environment for which the reward function is defined.
        parameters: Union[None, str, np.ndarray]
            A numpy ndarray containing the parameters. If value is 'random',
            initializes with random parameters (mean 0, standard deviation 1).
        action_in_domain: bool
            Indicates whether actions are in the domain, i.e. R(s, a) or R(s, a, s')
        next_state_in_domain: bool
            Indicates whether next states are in the domain, i.e. R(s, a, s')
        """
        self.env = env
        self.action_in_domain = action_in_domain
        if next_state_in_domain:
            assert action_in_domain
        self.next_state_in_domain = next_state_in_domain
        self.parameters = parameters

开发者ID:JohannesHeidecke，项目名称:irl-benchmark，代码行数:27，代码来源:reward_function.py

示例5: unwrap_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def unwrap_env(env: gym.Env,
               until_class: Union[None, gym.Env] = None) -> gym.Env:
    """Unwrap wrapped env until we get an instance that is a until_class.

    If until_class is None, env will be unwrapped until the lowest layer.
    """
    if until_class is None:
        while hasattr(env, 'env'):
            env = env.env
        return env

    while hasattr(env, 'env') and not isinstance(env, until_class):
        env = env.env

    if not isinstance(env, until_class):
        raise ValueError(
            "Unwrapping env did not yield an instance of class {}".format(
                until_class))
    return env

开发者ID:JohannesHeidecke，项目名称:irl-benchmark，代码行数:21，代码来源:wrapper.py

示例6: test_is_unwrappable_to

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def test_is_unwrappable_to():
    assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
    assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper)
    env = feature_wrapper.make('FrozenLake-v0')
    reward_function = FeatureBasedRewardFunction(env, 'random')
    env = RewardWrapper(env, reward_function)
    assert is_unwrappable_to(env, RewardWrapper)
    assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
    assert is_unwrappable_to(env, DiscreteEnv)
    assert is_unwrappable_to(env, gym.Env)

开发者ID:JohannesHeidecke，项目名称:irl-benchmark，代码行数:18，代码来源:utils_wrapper_test.py

示例7: make_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_env(env_id: str):
    """Make a basic gym environment, without any special wrappers.

    Parameters
    ----------
    env_id: str
        The environment's id, e.g. 'FrozenLake-v0'.
    Returns
    -------
    gym.Env
        A gym environment.
    """
    assert env_id in ENV_IDS
    if not env_id in ENV_IDS_NON_GYM:
        env = gym.make(env_id)
    else:
        if env_id == 'MazeWorld0-v0':
            env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200)
        elif env_id == 'MazeWorld1-v0':
            env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200)
        else:
            raise NotImplementedError()
    return env

开发者ID:JohannesHeidecke，项目名称:irl-benchmark，代码行数:25，代码来源:__init__.py

示例8: init_rl

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def init_rl(
    env: Union[gym.Env, VecEnv],
    model_class: Type[BaseRLModel] = stable_baselines.PPO2,
    policy_class: Type[BasePolicy] = MlpPolicy,
    **model_kwargs,
):
    """Instantiates a policy for the provided environment.

    Args:
        env: The (vector) environment.
        model_class: A Stable Baselines RL algorithm.
        policy_class: A Stable Baselines compatible policy network class.
        model_kwargs (dict): kwargs passed through to the algorithm.
          Note: anything specified in `policy_kwargs` is passed through by the
          algorithm to the policy network.

    Returns:
      An RL algorithm.
    """
    return model_class(
        policy_class, env, **model_kwargs
    )  # pytype: disable=not-instantiable

开发者ID:HumanCompatibleAI，项目名称:imitation，代码行数:24，代码来源:util.py

示例9: test_model_based

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def test_model_based(env: gym.Env) -> None:
    """Smoke test for each of the ModelBasedEnv methods with type checks.

    Raises:
        AssertionError if test fails.
    """
    state = env.initial_state()
    assert env.state_space.contains(state)

    action = env.action_space.sample()
    new_state = env.transition(state, action)
    assert env.state_space.contains(new_state)

    reward = env.reward(state, action, new_state)
    assert isinstance(reward, float)

    done = env.terminal(state, 0)
    assert isinstance(done, bool)

    obs = env.obs_from_state(state)
    assert env.observation_space.contains(obs)
    next_obs = env.obs_from_state(new_state)
    assert env.observation_space.contains(next_obs)

开发者ID:HumanCompatibleAI，项目名称:imitation，代码行数:25，代码来源:envs.py

示例10: init

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self, verbose: int = 0):
        super(BaseCallback, self).__init__()
        # The RL model
        self.model = None  # type: Optional[BaseRLModel]
        # An alias for self.model.get_env(), the environment used for training
        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
        # Number of time the callback was called
        self.n_calls = 0  # type: int
        # n_envs * n times env.step() was called
        self.num_timesteps = 0  # type: int
        self.verbose = verbose
        self.locals = None  # type: Optional[Dict[str, Any]]
        self.globals = None  # type: Optional[Dict[str, Any]]
        self.logger = None  # type: Optional[logger.Logger]
        # Sometimes, for event callback, it is useful
        # to have access to the parent object
        self.parent = None  # type: Optional[BaseCallback]

    # Type hint as string to avoid circular import

开发者ID:Stable-Baselines-Team，项目名称:stable-baselines，代码行数:21，代码来源:callbacks.py

示例11: sync_envs_normalization

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def sync_envs_normalization(env: Union[gym.Env, VecEnv], eval_env: Union[gym.Env, VecEnv]) -> None:
    """
    Sync eval and train environments when using VecNormalize

    :param env: (Union[gym.Env, VecEnv]))
    :param eval_env: (Union[gym.Env, VecEnv]))
    """
    env_tmp, eval_env_tmp = env, eval_env
    # Special case for the _UnvecWrapper
    # Avoid circular import
    from stable_baselines.common.base_class import _UnvecWrapper
    if isinstance(env_tmp, _UnvecWrapper):
        return
    while isinstance(env_tmp, VecEnvWrapper):
        if isinstance(env_tmp, VecNormalize):
            # sync reward and observation scaling
            eval_env_tmp.obs_rms = deepcopy(env_tmp.obs_rms)
            eval_env_tmp.ret_rms = deepcopy(env_tmp.ret_rms)
        env_tmp = env_tmp.venv
        # Make pytype happy, in theory env and eval_env have the same type
        assert isinstance(eval_env_tmp, VecEnvWrapper), "the second env differs from the first env"
        eval_env_tmp = eval_env_tmp.venv

开发者ID:Stable-Baselines-Team，项目名称:stable-baselines，代码行数:24，代码来源:__init__.py

示例12: make_robotics_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.

    :param env_id: (str) the environment ID
    :param seed: (int) the initial seed for RNG
    :param rank: (int) the rank of the environment (for logging)
    :param allow_early_resets: (bool) allows early reset of the environment
    :return: (Gym Environment) The robotic environment
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    keys = ['observation', 'desired_goal']
    # TODO: remove try-except once most users are running modern Gym
    try:  # for modern Gym (>=0.15.4)
        from gym.wrappers import FilterObservation, FlattenObservation
        env = FlattenObservation(FilterObservation(env, keys))
    except ImportError:  # for older gym (<=0.15.3)
        from gym.wrappers import FlattenDictWrapper  # pytype:disable=import-error
        env = FlattenDictWrapper(env, keys)
    env = Monitor(
        env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
        info_keywords=('is_success',), allow_early_resets=allow_early_resets)
    env.seed(seed)
    return env

开发者ID:Stable-Baselines-Team，项目名称:stable-baselines，代码行数:27，代码来源:cmd_util.py

示例13: init

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def __init__(self, *, env: Union[gym.Env, VecEnv], model: 'BaseRLModel', n_steps: int):
        """
        Collect experience by running `n_steps` in the environment.
        Note: if this is a `VecEnv`, the total number of steps will
        be `n_steps * n_envs`.

        :param env: (Union[gym.Env, VecEnv]) The environment to learn from
        :param model: (BaseRLModel) The model to learn
        :param n_steps: (int) The number of steps to run for each environment
        """
        self.env = env
        self.model = model
        n_envs = env.num_envs
        self.batch_ob_shape = (n_envs * n_steps,) + env.observation_space.shape
        self.obs = np.zeros((n_envs,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
        self.obs[:] = env.reset()
        self.n_steps = n_steps
        self.states = model.initial_state
        self.dones = [False for _ in range(n_envs)]
        self.callback = None  # type: Optional[BaseCallback]
        self.continue_training = True
        self.n_envs = n_envs

开发者ID:Stable-Baselines-Team，项目名称:stable-baselines，代码行数:24，代码来源:runners.py

示例14: step

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def step(self, action):
        """gym.Env step function.

        Args:
            action (int): action taken.

        Returns:
            np.ndarray: augmented observation.
            float: reward.
            bool: terminal signal.
            dict: environment info.

        """
        next_obs, reward, done, info = self.env.step(action)
        next_obs = np.concatenate([next_obs, action, [reward], [done]])
        return next_obs, reward, done, info

开发者ID:rlworkgroup，项目名称:garage，代码行数:18，代码来源:rl2.py

示例15: make_mujoco_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import Env [as 别名]
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env

开发者ID:Hwhitetooth，项目名称:lirpg，代码行数:11，代码来源:cmd_util.py

注：本文中的gym.Env方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。