當前位置: 首頁>>代碼示例>>Python>>正文


Python normalized_env.normalize方法代碼示例

本文整理匯總了Python中rllab.envs.normalized_env.normalize方法的典型用法代碼示例。如果您正苦於以下問題:Python normalized_env.normalize方法的具體用法?Python normalized_env.normalize怎麽用?Python normalized_env.normalize使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在rllab.envs.normalized_env的用法示例。


在下文中一共展示了normalized_env.normalize方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_env

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def get_env(env_name):
    if env_name == 'snake':
        return TfEnv(normalize(SnakeEnv()))
    elif env_name == 'swimmer':
        return TfEnv(normalize(SwimmerEnv()))
    elif env_name == 'half_cheetah':
        return TfEnv(normalize(HalfCheetahEnv()))
    elif env_name == 'hopper':
        return TfEnv(normalize(HopperEnv()))
    elif env_name == 'ant':
        return TfEnv(normalize(AntEnv()))
    # elif env_name == 'humanoidstandup':
    #     return TfEnv(GymEnv('HumanoidStandup-v1',
    #                         record_video=False,
    #                         record_log=False))
    elif env_name == 'humanoid':
        return TfEnv(normalize(HumanoidEnv()))
    # elif env_name == 'simple_humanoid':
    #     return TfEnv(normalize(SimpleHumanoidEnv()))
    else:
        assert False, "Define the env from env_name." 
開發者ID:thanard,項目名稱:me-trpo,代碼行數:23,代碼來源:env_helpers.py

示例2: run_task

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def run_task(*_):
    env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))

    observation_dim = np.prod(env.observation_space.shape)
    action_dim = np.prod(env.action_space.shape)

    qf = NormalizedAdvantageFunction(
        observation_dim=observation_dim,
        action_dim=action_dim,
        vf_hidden_sizes=(200, 200),
        mean_hidden_sizes=(200, 200),
        pds_hidden_sizes=(200, 200))

    es = OUStrategy(action_space=env.action_space)

    algo = NAF(
        env=env,
        es=es,
        qf=qf,
        batch_size=64,
        max_path_length=100,
        epoch_length=1000,
        min_pool_size=1000,
        n_epochs=80,
        discount=0.99,
        # scale_reward=0.01,
        qf_learning_rate=1e-3,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        plot=True,
    )
    algo.train()

# if __name__ == '__main__':
#     run_task() 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:36,代碼來源:naf_pendulum.py

示例3: run_task

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def run_task(*_):
    # Please note that different environments with different action spaces may
    # require different policies. For example with a Discrete action space, a
    # CategoricalMLPPolicy works, but for a Box action space may need to use
    # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
    env = normalize(GymEnv("CartPole-v0", record_video=False, force_reset=True))

    policy = CategoricalMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train() 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:31,代碼來源:trpo_rllab_cartpole.py

示例4: run_task

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def run_task(*_):
    # Please note that different environments with different action spaces may require different
    # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
    # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
    env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))

    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train() 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:30,代碼來源:trpo_rllab_pendulum.py

示例5: test_rllab

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def test_rllab(self):
        try:
            from rllab.algos.ddpg import DDPG
            from rllab.envs.normalized_env import normalize
            from rllab.exploration_strategies.ou_strategy import OUStrategy
            from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
            from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
            from rllab.envs.gym_env import GymEnv
        except ImportError:
            print('rllab is not installed!')
            return None

        env = GymEnv('simglucose-adolescent1-v0')
        env = normalize(env)

        policy = DeterministicMLPPolicy(
            env_spec=env.spec,
            # The neural network policy should have two hidden layers, each
            # with 32 hidden units.
            hidden_sizes=(32, 32))

        es = OUStrategy(env_spec=env.spec)

        qf = ContinuousMLPQFunction(env_spec=env.spec)

        algo = DDPG(
            env=env,
            policy=policy,
            es=es,
            qf=qf,
            batch_size=32,
            max_path_length=100,
            epoch_length=1000,
            min_pool_size=10000,
            n_epochs=5,
            discount=0.99,
            scale_reward=0.01,
            qf_learning_rate=1e-3,
            policy_learning_rate=1e-4)
        algo.train() 
開發者ID:jxx123,項目名稱:simglucose,代碼行數:42,代碼來源:test_rllab.py

示例6: run_task

# 需要導入模塊: from rllab.envs import normalized_env [as 別名]
# 或者: from rllab.envs.normalized_env import normalize [as 別名]
def run_task(*_):
    env = normalize(MultiGoalEnv())

    observation_dim = np.prod(env.observation_space.shape)
    action_dim = np.prod(env.action_space.shape)


    qf =  SVGDMLPQFunction(
        observation_dim=observation_dim,
        action_dim=action_dim,
        hidden_sizes=(100, 100),
    )

    policy = SVGDPolicy(
        observation_dim=observation_dim,
        action_dim=action_dim,
        hidden_sizes=(100, 100),
        output_nonlinearity=None,
    )

    algo = SoftQ(
        env=env,
        policy=policy,
        qf=qf,
        batch_size=64,
        n_epochs=100,
        epoch_length=100,
        min_pool_size=100,
        replay_pool_size=1000000,
        discount=0.99,
        alpha=0.1,
        max_path_length=30,
        qf_target_n_particles=16,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-3,
        kernel_n_particles=32,
        kernel_update_ratio=0.5,
        n_eval_episodes=10,
        soft_target_tau=1000,
        scale_reward=0.1,
        include_horizon_terminal_transitions=False,
        # plot=True,
    )

    algo.train()

# if __name__ == '__main__':
#     run_task() 
開發者ID:nosyndicate,項目名稱:pytorchrl,代碼行數:50,代碼來源:softq_multigoal.py


注:本文中的rllab.envs.normalized_env.normalize方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。