本文整理汇总了Python中baselines.common.cmd_util.make_mujoco_env方法的典型用法代码示例。如果您正苦于以下问题:Python cmd_util.make_mujoco_env方法的具体用法?Python cmd_util.make_mujoco_env怎么用?Python cmd_util.make_mujoco_env使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.cmd_util
的用法示例。
在下文中一共展示了cmd_util.make_mujoco_env方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def train(env_id, num_timesteps, seed):
env = make_mujoco_env(env_id, seed)
with tf.Session(config=tf.ConfigProto()):
ob_dim = env.observation_space.shape[0]
ac_dim = env.action_space.shape[0]
with tf.variable_scope("vf"):
vf = NeuralNetValueFunction(ob_dim, ac_dim)
with tf.variable_scope("pi"):
policy = GaussianMlpPolicy(ob_dim, ac_dim)
learn(env, policy=policy, vf=vf,
gamma=0.99, lam=0.97, timesteps_per_batch=2500,
desired_kl=0.002,
num_timesteps=num_timesteps, animate=False)
env.close()
示例2: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def train(env_id, num_timesteps, seed):
import baselines.common.tf_util as U
sess = U.single_threaded_session()
sess.__enter__()
rank = MPI.COMM_WORLD.Get_rank()
if rank == 0:
logger.configure()
else:
logger.configure(format_strs=[])
logger.set_level(logger.DISABLED)
workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
env = make_mujoco_env(env_id, workerseed)
trpo_mpi.learn(env, policy_fn, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3)
env.close()
示例3: main
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def main():
logger.configure()
parser = mujoco_arg_parser()
parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
parser.set_defaults(num_timesteps=int(2e7))
args = parser.parse_args()
if not args.play:
# train the model
train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
else:
# construct the model object, load pre-trained model and render
pi = train(num_timesteps=1, seed=args.seed)
U.load_state(args.model_path)
env = make_mujoco_env('Humanoid-v2', seed=0)
ob = env.reset()
while True:
action = pi.act(stochastic=False, ob=ob)[0]
ob, _, done, _ = env.step(action)
env.render()
if done:
ob = env.reset()
示例4: main
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def main():
logger.configure()
parser = mujoco_arg_parser()
parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
parser.set_defaults(num_timesteps=int(2e7))
args = parser.parse_args()
if not args.play:
# train the model
train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
else:
# construct the model object, load pre-trained model and render
pi = train(num_timesteps=1, seed=args.seed)
U.load_state(args.model_path)
env = make_mujoco_env('Humanoid-v2', seed=0)
ob = env.reset()
while True:
action = pi.act(stochastic=False, ob=ob)[0]
ob, _, done, _ = env.step(action)
env.render()
if done:
ob = env.reset()
示例5: main
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def main():
logger.configure()
parser = mujoco_arg_parser()
parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
parser.set_defaults(num_timesteps=int(5e7))
args = parser.parse_args()
if not args.play:
# train the model
train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
else:
# construct the model object, load pre-trained model and render
pi = train(num_timesteps=1, seed=args.seed)
U.load_state(args.model_path)
env = make_mujoco_env('Humanoid-v2', seed=0)
ob = env.reset()
while True:
action = pi.act(stochastic=False, ob=ob)[0]
ob, _, done, _ = env.step(action)
env.render()
if done:
ob = env.reset()
示例6: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def train(env_id, num_timesteps, seed):
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
gamma=0.99, lam=0.95, schedule='linear',
)
env.close()
示例7: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def train(num_timesteps, seed, model_path=None):
env_id = 'Humanoid-v2'
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
# parameters below were the best found in a simple random search
# these are good enough to make humanoid walk, but whether those are
# an absolute best or not is not certain
env = RewScale(env, 0.1)
pi = pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10,
optim_stepsize=3e-4,
optim_batchsize=64,
gamma=0.99,
lam=0.95,
schedule='linear',
)
env.close()
if model_path:
U.save_state(model_path)
return pi
示例8: test_lstm_example
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def test_lstm_example():
import tensorflow as tf
from baselines.common import policies, models, cmd_util
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
# create vectorized environment
venv = DummyVecEnv([lambda: cmd_util.make_mujoco_env('Reacher-v2', seed=0)])
with tf.Session() as sess:
# build policy based on lstm network with 128 units
policy = policies.build_policy(venv, models.lstm(128))(nbatch=1, nsteps=1)
# initialize tensorflow variables
sess.run(tf.global_variables_initializer())
# prepare environment variables
ob = venv.reset()
state = policy.initial_state
done = [False]
step_counter = 0
# run a single episode until the end (i.e. until done)
while True:
action, _, state, _ = policy.step(ob, S=state, M=done)
ob, reward, done, _ = venv.step(action)
step_counter += 1
if done:
break
assert step_counter > 5
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:33,代码来源:test_doc_examples.py
示例9: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 别名]
def train(num_timesteps, seed, model_path=None):
env_id = 'Humanoid-v2'
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
# parameters below were the best found in a simple random search
# these are good enough to make humanoid walk, but whether those are
# an absolute best or not is not certain
env = RewScale(env, 0.1)
pi = pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10,
optim_stepsize=3e-4,
optim_batchsize=64,
gamma=0.99,
lam=0.95,
schedule='linear',
)
env.close()
if model_path:
U.save_state(model_path)
return pi