Python time_limit.TimeLimit方法代码示例

本文整理汇总了Python中gym.wrappers.time_limit.TimeLimit方法的典型用法代码示例。如果您正苦于以下问题：Python time_limit.TimeLimit方法的具体用法？Python time_limit.TimeLimit怎么用？Python time_limit.TimeLimit使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.wrappers.time_limit的用法示例。

在下文中一共展示了time_limit.TimeLimit方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_is_unwrappable_to

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def test_is_unwrappable_to():
    assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
    assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(
        feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper)
    env = feature_wrapper.make('FrozenLake-v0')
    reward_function = FeatureBasedRewardFunction(env, 'random')
    env = RewardWrapper(env, reward_function)
    assert is_unwrappable_to(env, RewardWrapper)
    assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
    assert is_unwrappable_to(env, DiscreteEnv)
    assert is_unwrappable_to(env, gym.Env)

开发者ID:JohannesHeidecke，项目名称:irl-benchmark，代码行数:18，代码来源:utils_wrapper_test.py

示例2: make

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id):
        logger.info('Making new env: %s', id)
        spec = self.spec(id)
        env = spec.make()
        # We used to have people override _reset/_step rather than
        # reset/step. Set _gym_disable_underscore_compat = True on
        # your environment if you use these methods and don't want
        # compatibility code to be invoked.
        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
            patch_deprecated_methods(env)
        if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
            from gym.wrappers.time_limit import TimeLimit
            env = TimeLimit(env,
                            max_episode_steps=env.spec.max_episode_steps,
                            max_episode_seconds=env.spec.max_episode_seconds)
        return env

开发者ID:joanby，项目名称:ia-course，代码行数:18，代码来源:registration.py

示例3: make

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id, kwargs):
        logger.info('Making new env: %s', id)
        spec = registration.spec(id)
        env = spec.make()
        # We used to have people override _reset/_step rather than
        # reset/step. Set _gym_disable_underscore_compat = True on
        # your environment if you use these methods and don't want
        # compatibility code to be invoked.
        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
            patch_deprecated_methods(env)
        if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
            from gym.wrappers.time_limit import TimeLimit
            env = TimeLimit(env,
                            max_episode_steps=env.spec.max_episode_steps,
                            max_episode_seconds=env.spec.max_episode_seconds)
        return env

开发者ID:alexsax，项目名称:midlevel-reps，代码行数:18，代码来源:make_env.py

示例4: init

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, id, entry_point=None, reward_threshold=None, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None):
        self.id = id
        # Evaluation parameters
        self.reward_threshold = reward_threshold
        # Environment properties
        self.nondeterministic = nondeterministic
        self.entry_point = entry_point

        if tags is None:
            tags = {}
        self.tags = tags

        tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
        
        self.max_episode_steps = max_episode_steps

        # We may make some of these other parameters public if they're
        # useful.
        match = env_id_re.search(id)
        if not match:
            raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
        self._env_name = match.group(1)
        self._kwargs = {} if kwargs is None else kwargs

开发者ID:hust512，项目名称:DQN-DDPG_Stock_Trading，代码行数:25，代码来源:registration.py

示例5: make

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, path, **kwargs):
        if len(kwargs) > 0:
            logger.info('Making new env: %s (%s)', path, kwargs)
        else:
            logger.info('Making new env: %s', path)
        spec = self.spec(path)
        env = spec.make(**kwargs)
        # We used to have people override _reset/_step rather than
        # reset/step. Set _gym_disable_underscore_compat = True on
        # your environment if you use these methods and don't want
        # compatibility code to be invoked.
        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
            patch_deprecated_methods(env)
        if (env.spec.max_episode_steps is not None) and not spec.tags.get('vnc'):
            from gym.wrappers.time_limit import TimeLimit
            env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps)
        return env

开发者ID:hust512，项目名称:DQN-DDPG_Stock_Trading，代码行数:19，代码来源:registration.py

示例6: create_multi_agent_curried_policy_wrapper

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def create_multi_agent_curried_policy_wrapper(
    mon_dir, env_name, num_envs, embed_index, max_steps, state_shape=None, add_zoo=False, num_zoo=5
):
    def episode_limit(env):
        return time_limit.TimeLimit(env, max_episode_steps=max_steps)

    def env_fn(i):
        return make_env(env_name, seed=42, i=i, out_dir=mon_dir, pre_wrappers=[episode_limit])

    vec_env = make_dummy_vec_multi_env([lambda: env_fn(i) for i in range(num_envs)])

    zoo = load_policy(
        policy_path="1",
        policy_type="zoo",
        env=vec_env,
        env_name=env_name,
        index=1 - embed_index,
        transparent_params=None,
    )
    half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index)
    policies = [
        _get_constant_policy(
            half_env, constant_value=half_env.action_space.sample(), state_shape=state_shape
        )
        for _ in range(10)
    ]
    if add_zoo:
        policies += [zoo] * num_zoo

    policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs)

    vec_env = CurryVecEnv(
        venv=vec_env, policy=policy_wrapper, agent_idx=embed_index, deterministic=False
    )
    vec_env = FlattenSingletonVecEnv(vec_env)

    yield vec_env, policy_wrapper, zoo
    policy_wrapper.close()

开发者ID:HumanCompatibleAI，项目名称:adversarial-policies，代码行数:40，代码来源:test_wrappers.py

示例7: test_lstm_train

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def test_lstm_train():
    """Test that LSTM models are able to achieve >=150 (out of 500) reward on CartPoleNoVelEnv.

    This environment requires memory to perform well in."""
    def make_env(i):
        env = CartPoleNoVelEnv()
        env = TimeLimit(env, max_episode_steps=500)
        env = bench.Monitor(env, None, allow_early_resets=True)
        env.seed(i)
        return env

    env = SubprocVecEnv([lambda: make_env(i) for i in range(NUM_ENVS)])
    env = VecNormalize(env)
    model = PPO2(MlpLstmPolicy, env, n_steps=128, nminibatches=NUM_ENVS, lam=0.95, gamma=0.99,
                 noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1)

    eprewmeans = []
    def reward_callback(local, _):
        nonlocal eprewmeans
        eprewmeans.append(safe_mean([ep_info['r'] for ep_info in local['ep_info_buf']]))

    model.learn(total_timesteps=100000, callback=reward_callback)

    # Maximum episode reward is 500.
    # In CartPole-v1, a non-recurrent policy can easily get >= 450.
    # In CartPoleNoVelEnv, a non-recurrent policy doesn't get more than ~50.
    # LSTM policies can reach above 400, but it varies a lot between runs; consistently get >=150.
    # See PR #244 for more detailed benchmarks.

    average_reward = sum(eprewmeans[-NUM_EPISODES_FOR_SCORE:]) / NUM_EPISODES_FOR_SCORE
    assert average_reward >= 150, "Mean reward below 150; per-episode rewards {}".format(average_reward)

开发者ID:Stable-Baselines-Team，项目名称:stable-baselines，代码行数:33，代码来源:test_lstm_policy.py

示例8: new

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __new__(cls, *args, **kwargs):
        """Returns environment specific wrapper based on input environment type.

        Args:
            args: positional arguments
            kwargs: keyword arguments

        Returns:
             garage.envs.bullet.BulletEnv: if the environment is a bullet-based
                environment. Else returns a garage.envs.GarageEnv
        """
        # Determine if the input env is a bullet-based gym environment
        env = None
        if 'env' in kwargs:  # env passed as a keyword arg
            env = kwargs['env']
        elif len(args) >= 1 and isinstance(args[0], TimeLimit):
            # env passed as a positional arg
            # only checks env created by gym.make(), which has type TimeLimit
            env = args[0]
        if env and any(env.env.spec.id == name
                       for name in _get_bullet_env_list()):
            return BulletEnv(env)

        env_name = ''
        if 'env_name' in kwargs:  # env_name as a keyword arg
            env_name = kwargs['env_name']
        elif len(args) >= 2:
            # env_name as a positional arg
            env_name = args[1]
        if env_name != '' and any(env_name == name
                                  for name in _get_bullet_env_list()):
            return BulletEnv(gym.make(env_name))

        return super(GarageEnv, cls).__new__(cls)

开发者ID:rlworkgroup，项目名称:garage，代码行数:36，代码来源:garage_env.py

示例9: init

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, env=None, env_name='', is_image=False):
        """Initializes a GarageEnv.

        Args:
            env (gym.wrappers.time_limit): A gym.wrappers.time_limit.TimeLimit
                object wrapping a gym.Env created via gym.make().
            env_name (str): If the env_name is speficied, a gym environment
                with that name will be created. If such an environment does not
                exist, a `gym.error` is thrown.
            is_image (bool): True if observations contain pixel values,
                false otherwise. Setting this to true converts a gym.Spaces.Box
                obs space to an akro.Image and normalizes pixel values.
        """
        # Needed for deserialization
        self._env_name = env_name
        self._env = env

        if env_name:
            super().__init__(gym.make(env_name))
        else:
            super().__init__(env)

        self.action_space = akro.from_gym(self.env.action_space)
        self.observation_space = akro.from_gym(self.env.observation_space,
                                               is_image=is_image)
        self._spec = EnvSpec(action_space=self.action_space,
                             observation_space=self.observation_space)

开发者ID:rlworkgroup，项目名称:garage，代码行数:29，代码来源:garage_env.py

示例10: step

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def step(self, action):
        """Call step on wrapped env.

        This method is necessary to suppress a deprecated warning
        thrown by gym.Wrapper.

        Args:
            action (np.ndarray): An action provided by the agent.

        Returns:
            np.ndarray: Agent's observation of the current environment
            float: Amount of reward returned after previous action
            bool: Whether the episode has ended, in which case further step()
                calls will return undefined results
            dict: Contains auxiliary diagnostic information (helpful for
                debugging, and sometimes learning)

        """
        observation, reward, done, info = self.env.step(action)
        # gym envs that are wrapped in TimeLimit wrapper modify
        # the done/termination signal to be true whenever a time
        # limit expiration occurs. The following statement sets
        # the done signal to be True only if caused by an
        # environment termination, and not a time limit
        # termination. The time limit termination signal
        # will be saved inside env_infos as
        # 'GarageEnv.TimeLimitTerminated'
        if 'TimeLimit.truncated' in info:
            info['GarageEnv.TimeLimitTerminated'] = done  # done = True always
            done = not info['TimeLimit.truncated']
        return observation, reward, done, info

开发者ID:rlworkgroup，项目名称:garage，代码行数:33，代码来源:garage_env.py

示例11: init

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
        self.id = id
        # Evaluation parameters
        self.trials = trials
        self.reward_threshold = reward_threshold
        # Environment properties
        self.nondeterministic = nondeterministic

        if tags is None:
            tags = {}
        self.tags = tags

        # BACKWARDS COMPAT 2017/1/18
        if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
            max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
            # TODO: Add the following deprecation warning after 2017/02/18
            # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")

        tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
        ######

        # BACKWARDS COMPAT 2017/1/31
        if timestep_limit is not None:
            max_episode_steps = timestep_limit
            # TODO: Add the following deprecation warning after 2017/03/01
            # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
        ######

        self.max_episode_steps = max_episode_steps
        self.max_episode_seconds = max_episode_seconds

        # We may make some of these other parameters public if they're
        # useful.
        match = env_id_re.search(id)
        if not match:
            raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
        self._env_name = match.group(1)
        self._entry_point = entry_point
        self._local_only = local_only
        self._kwargs = {} if kwargs is None else kwargs

开发者ID:ArztSamuel，项目名称:DRL_DeliveryDuel，代码行数:42，代码来源:registration.py

示例12: make

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def make(self, id):
        logger.info('Making new env: %s', id)
        spec = self.spec(id)
        env = spec.make()
        if hasattr(env, "_reset") and hasattr(env, "_step"):
            patch_deprecated_methods(env)
        if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
            from gym.wrappers.time_limit import TimeLimit
            env = TimeLimit(env,
                            max_episode_steps=env.spec.max_episode_steps,
                            max_episode_seconds=env.spec.max_episode_seconds)
        return env

开发者ID:ArztSamuel，项目名称:DRL_DeliveryDuel，代码行数:14，代码来源:registration.py

示例13: meta_reset

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def meta_reset(self, seed):
        np.random.seed(seed)

        env = RandomWeightHopperEnv(rand_mass=self.rand_mass,
                                    rand_gravity=self.rand_gravity,
                                    rand_friction=self.rand_friction,
                                    rand_thickness=self.rand_thickness)

        # Based on Hopper-v2
        spec = EnvSpec(
            'RandomWeightHopperEnv-v0',
            entry_point='generic_rl.envs.mujoco:RandomWeightHopperEnv',
            max_episode_steps=1000,
            reward_threshold=3800.0
        )

        env._spec = spec
        env.seed(seed)

        # Wrap the env as needed
        env = TimeLimit(
            env,
            max_episode_steps=spec.max_episode_steps,
            max_episode_seconds=spec.max_episode_seconds
        )

        self.env = env
        # Fix for done flags.
        self.env.reset()
        self.step = env.step
        self.render = env.render
        self.reset = env.reset

开发者ID:openai，项目名称:EPG，代码行数:34，代码来源:random_robots.py

示例14: _make

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def _make(id_, env_kwargs=None):
    """
    Recreating the gym make function from gym/envs/registration.py
    as such as it can support extra arguments for the environment
    :param id_: (str) The environment ID
    :param env_kwargs: (dict) The extra arguments for the environment
    """
    if env_kwargs is None:
        env_kwargs = {}

    # getting the spec from the ID we want
    spec = registry.spec(id_)

    # Keeping the checks and safe guards of the old code
    assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \
                                          '(HINT: is there a newer registered version of this env?)'.format(spec.id_)

    if callable(spec._entry_point):
        env = spec._entry_point(**env_kwargs)
    else:
        cls = load(spec._entry_point)
        # create the env, with the original kwargs, and the new ones overriding them if needed
        env = cls(**{**spec._kwargs, **env_kwargs})

    # Make the enviroment aware of which spec it came from.
    env.unwrapped.spec = spec

    # Keeping the old patching system for _reset, _step and timestep limit
    if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
        patch_deprecated_methods(env)
    if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
        from gym.wrappers.time_limit import TimeLimit
        env = TimeLimit(env,
                        max_episode_steps=env.spec.max_episode_steps,
                        max_episode_seconds=env.spec.max_episode_seconds)
    return env

开发者ID:araffin，项目名称:robotics-rl-srl，代码行数:38，代码来源:utils.py

示例15: get_timesteps_per_episode

# 需要导入模块: from gym.wrappers import time_limit [as 别名]
# 或者: from gym.wrappers.time_limit import TimeLimit [as 别名]
def get_timesteps_per_episode(env):
    if hasattr(env, "_max_episode_steps"):
        return env._max_episode_steps
    if hasattr(env, "spec"):
        return env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")
    if hasattr(env, "env"):
        return get_timesteps_per_episode(env.env)
    return None

开发者ID:nottombrown，项目名称:rl-teacher，代码行数:10，代码来源:envs.py

注：本文中的gym.wrappers.time_limit.TimeLimit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。