本文整理汇总了Python中rllab.envs.normalized_env.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python normalized_env.normalize方法的具体用法?Python normalized_env.normalize怎么用?Python normalized_env.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.envs.normalized_env
的用法示例。
在下文中一共展示了normalized_env.normalize方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_env
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def get_env(env_name):
if env_name == 'snake':
return TfEnv(normalize(SnakeEnv()))
elif env_name == 'swimmer':
return TfEnv(normalize(SwimmerEnv()))
elif env_name == 'half_cheetah':
return TfEnv(normalize(HalfCheetahEnv()))
elif env_name == 'hopper':
return TfEnv(normalize(HopperEnv()))
elif env_name == 'ant':
return TfEnv(normalize(AntEnv()))
# elif env_name == 'humanoidstandup':
# return TfEnv(GymEnv('HumanoidStandup-v1',
# record_video=False,
# record_log=False))
elif env_name == 'humanoid':
return TfEnv(normalize(HumanoidEnv()))
# elif env_name == 'simple_humanoid':
# return TfEnv(normalize(SimpleHumanoidEnv()))
else:
assert False, "Define the env from env_name."
示例2: run_task
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
qf = NormalizedAdvantageFunction(
observation_dim=observation_dim,
action_dim=action_dim,
vf_hidden_sizes=(200, 200),
mean_hidden_sizes=(200, 200),
pds_hidden_sizes=(200, 200))
es = OUStrategy(action_space=env.action_space)
algo = NAF(
env=env,
es=es,
qf=qf,
batch_size=64,
max_path_length=100,
epoch_length=1000,
min_pool_size=1000,
n_epochs=80,
discount=0.99,
# scale_reward=0.01,
qf_learning_rate=1e-3,
# Uncomment both lines (this and the plot parameter below) to enable plotting
plot=True,
)
algo.train()
# if __name__ == '__main__':
# run_task()
示例3: run_task
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
# Please note that different environments with different action spaces may
# require different policies. For example with a Discrete action space, a
# CategoricalMLPPolicy works, but for a Box action space may need to use
# a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
env = normalize(GymEnv("CartPole-v0", record_video=False, force_reset=True))
policy = CategoricalMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
n_itr=50,
discount=0.99,
step_size=0.01,
optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
示例4: run_task
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
# Please note that different environments with different action spaces may require different
# policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
# action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))
policy = GaussianMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
n_itr=50,
discount=0.99,
step_size=0.01,
optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
示例5: test_rllab
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def test_rllab(self):
try:
from rllab.algos.ddpg import DDPG
from rllab.envs.normalized_env import normalize
from rllab.exploration_strategies.ou_strategy import OUStrategy
from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
from rllab.envs.gym_env import GymEnv
except ImportError:
print('rllab is not installed!')
return None
env = GymEnv('simglucose-adolescent1-v0')
env = normalize(env)
policy = DeterministicMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each
# with 32 hidden units.
hidden_sizes=(32, 32))
es = OUStrategy(env_spec=env.spec)
qf = ContinuousMLPQFunction(env_spec=env.spec)
algo = DDPG(
env=env,
policy=policy,
es=es,
qf=qf,
batch_size=32,
max_path_length=100,
epoch_length=1000,
min_pool_size=10000,
n_epochs=5,
discount=0.99,
scale_reward=0.01,
qf_learning_rate=1e-3,
policy_learning_rate=1e-4)
algo.train()
示例6: run_task
# 需要导入模块: from rllab.envs import normalized_env [as 别名]
# 或者: from rllab.envs.normalized_env import normalize [as 别名]
def run_task(*_):
env = normalize(MultiGoalEnv())
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
qf = SVGDMLPQFunction(
observation_dim=observation_dim,
action_dim=action_dim,
hidden_sizes=(100, 100),
)
policy = SVGDPolicy(
observation_dim=observation_dim,
action_dim=action_dim,
hidden_sizes=(100, 100),
output_nonlinearity=None,
)
algo = SoftQ(
env=env,
policy=policy,
qf=qf,
batch_size=64,
n_epochs=100,
epoch_length=100,
min_pool_size=100,
replay_pool_size=1000000,
discount=0.99,
alpha=0.1,
max_path_length=30,
qf_target_n_particles=16,
qf_learning_rate=1e-3,
policy_learning_rate=1e-3,
kernel_n_particles=32,
kernel_update_ratio=0.5,
n_eval_episodes=10,
soft_target_tau=1000,
scale_reward=0.1,
include_horizon_terminal_transitions=False,
# plot=True,
)
algo.train()
# if __name__ == '__main__':
# run_task()