当前位置: 首页>>代码示例>>Python>>正文


Python normalized_env.normalize方法代码示例

本文整理汇总了Python中rllab.envs.normalized_env.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python normalized_env.normalize方法的具体用法?Python normalized_env.normalize怎么用?Python normalized_env.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rllab.envs.normalized_env的用法示例。


在下文中一共展示了normalized_env.normalize方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_env

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def get_env(env_name):
    if env_name == 'snake':
        return TfEnv(normalize(SnakeEnv()))
    elif env_name == 'swimmer':
        return TfEnv(normalize(SwimmerEnv()))
    elif env_name == 'half_cheetah':
        return TfEnv(normalize(HalfCheetahEnv()))
    elif env_name == 'hopper':
        return TfEnv(normalize(HopperEnv()))
    elif env_name == 'ant':
        return TfEnv(normalize(AntEnv()))
    # elif env_name == 'humanoidstandup':
    #     return TfEnv(GymEnv('HumanoidStandup-v1',
    #                         record_video=False,
    #                         record_log=False))
    elif env_name == 'humanoid':
        return TfEnv(normalize(HumanoidEnv()))
    # elif env_name == 'simple_humanoid':
    #     return TfEnv(normalize(SimpleHumanoidEnv()))
    else:
        assert False, "Define the env from env_name." 
开发者ID:thanard,项目名称:me-trpo,代码行数:23,代码来源:env_helpers.py

示例2: run_task

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
    env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))

    observation_dim = np.prod(env.observation_space.shape)
    action_dim = np.prod(env.action_space.shape)

    qf = NormalizedAdvantageFunction(
        observation_dim=observation_dim,
        action_dim=action_dim,
        vf_hidden_sizes=(200, 200),
        mean_hidden_sizes=(200, 200),
        pds_hidden_sizes=(200, 200))

    es = OUStrategy(action_space=env.action_space)

    algo = NAF(
        env=env,
        es=es,
        qf=qf,
        batch_size=64,
        max_path_length=100,
        epoch_length=1000,
        min_pool_size=1000,
        n_epochs=80,
        discount=0.99,
        # scale_reward=0.01,
        qf_learning_rate=1e-3,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        plot=True,
    )
    algo.train()

# if __name__ == '__main__':
#     run_task() 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:36,代码来源:naf_pendulum.py

示例3: run_task

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
    # Please note that different environments with different action spaces may
    # require different policies. For example with a Discrete action space, a
    # CategoricalMLPPolicy works, but for a Box action space may need to use
    # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
    env = normalize(GymEnv("CartPole-v0", record_video=False, force_reset=True))

    policy = CategoricalMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train() 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:31,代码来源:trpo_rllab_cartpole.py

示例4: run_task

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
    # Please note that different environments with different action spaces may require different
    # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
    # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
    env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))

    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train() 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:30,代码来源:trpo_rllab_pendulum.py

示例5: test_rllab

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def test_rllab(self):
        try:
            from rllab.algos.ddpg import DDPG
            from rllab.envs.normalized_env import normalize
            from rllab.exploration_strategies.ou_strategy import OUStrategy
            from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
            from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
            from rllab.envs.gym_env import GymEnv
        except ImportError:
            print('rllab is not installed!')
            return None

        env = GymEnv('simglucose-adolescent1-v0')
        env = normalize(env)

        policy = DeterministicMLPPolicy(
            env_spec=env.spec,
            # The neural network policy should have two hidden layers, each
            # with 32 hidden units.
            hidden_sizes=(32, 32))

        es = OUStrategy(env_spec=env.spec)

        qf = ContinuousMLPQFunction(env_spec=env.spec)

        algo = DDPG(
            env=env,
            policy=policy,
            es=es,
            qf=qf,
            batch_size=32,
            max_path_length=100,
            epoch_length=1000,
            min_pool_size=10000,
            n_epochs=5,
            discount=0.99,
            scale_reward=0.01,
            qf_learning_rate=1e-3,
            policy_learning_rate=1e-4)
        algo.train() 
开发者ID:jxx123,项目名称:simglucose,代码行数:42,代码来源:test_rllab.py

示例6: run_task

# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
    env = normalize(MultiGoalEnv())

    observation_dim = np.prod(env.observation_space.shape)
    action_dim = np.prod(env.action_space.shape)


    qf =  SVGDMLPQFunction(
        observation_dim=observation_dim,
        action_dim=action_dim,
        hidden_sizes=(100, 100),
    )

    policy = SVGDPolicy(
        observation_dim=observation_dim,
        action_dim=action_dim,
        hidden_sizes=(100, 100),
        output_nonlinearity=None,
    )

    algo = SoftQ(
        env=env,
        policy=policy,
        qf=qf,
        batch_size=64,
        n_epochs=100,
        epoch_length=100,
        min_pool_size=100,
        replay_pool_size=1000000,
        discount=0.99,
        alpha=0.1,
        max_path_length=30,
        qf_target_n_particles=16,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-3,
        kernel_n_particles=32,
        kernel_update_ratio=0.5,
        n_eval_episodes=10,
        soft_target_tau=1000,
        scale_reward=0.1,
        include_horizon_terminal_transitions=False,
        # plot=True,
    )

    algo.train()

# if __name__ == '__main__':
#     run_task() 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:50,代码来源:softq_multigoal.py


注:本文中的rllab.envs.normalized_env.normalize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。