當前位置: 首頁>>代碼示例>>Python>>正文


Python cmd_util.make_mujoco_env方法代碼示例

本文整理匯總了Python中baselines.common.cmd_util.make_mujoco_env方法的典型用法代碼示例。如果您正苦於以下問題:Python cmd_util.make_mujoco_env方法的具體用法?Python cmd_util.make_mujoco_env怎麽用?Python cmd_util.make_mujoco_env使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在baselines.common.cmd_util的用法示例。


在下文中一共展示了cmd_util.make_mujoco_env方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def train(env_id, num_timesteps, seed):
    env = make_mujoco_env(env_id, seed)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:19,代碼來源:run_mujoco.py

示例2: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def train(env_id, num_timesteps, seed):
    import baselines.common.tf_util as U
    sess = U.single_threaded_session()
    sess.__enter__()

    rank = MPI.COMM_WORLD.Get_rank()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
        logger.set_level(logger.DISABLED)
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    def policy_fn(name, ob_space, ac_space):
        return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
            hid_size=32, num_hid_layers=2)
    env = make_mujoco_env(env_id, workerseed)
    trpo_mpi.learn(env, policy_fn, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1,
        max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3)
    env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:21,代碼來源:run_mujoco.py

示例3: main

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(2e7))
   
    args = parser.parse_args()
    
    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
    else:       
        # construct the model object, load pre-trained model and render
        pi = train(num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        ob = env.reset()        
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ =  env.step(action)
            env.render()
            if done:
                ob = env.reset() 
開發者ID:MaxSobolMark,項目名稱:HardRLWithYoutube,代碼行數:26,代碼來源:run_humanoid.py

示例4: main

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(2e7))

    args = parser.parse_args()

    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
    else:
        # construct the model object, load pre-trained model and render
        pi = train(num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        ob = env.reset()
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ =  env.step(action)
            env.render()
            if done:
                ob = env.reset() 
開發者ID:hiwonjoon,項目名稱:ICML2019-TREX,代碼行數:26,代碼來源:run_humanoid.py

示例5: main

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(5e7))

    args = parser.parse_args()

    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
    else:
        # construct the model object, load pre-trained model and render
        pi = train(num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        ob = env.reset()
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ =  env.step(action)
            env.render()
            if done:
                ob = env.reset() 
開發者ID:openai,項目名稱:baselines,代碼行數:26,代碼來源:run_humanoid.py

示例6: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def train(env_id, num_timesteps, seed):
    from baselines.ppo1 import mlp_policy, pposgd_simple
    U.make_session(num_cpu=1).__enter__()
    def policy_fn(name, ob_space, ac_space):
        return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
            hid_size=64, num_hid_layers=2)
    env = make_mujoco_env(env_id, seed)
    pposgd_simple.learn(env, policy_fn,
            max_timesteps=num_timesteps,
            timesteps_per_actorbatch=2048,
            clip_param=0.2, entcoeff=0.0,
            optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
            gamma=0.99, lam=0.95, schedule='linear',
        )
    env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:17,代碼來源:run_mujoco.py

示例7: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def train(num_timesteps, seed, model_path=None):
    env_id = 'Humanoid-v2'
    from baselines.ppo1 import mlp_policy, pposgd_simple
    U.make_session(num_cpu=1).__enter__()
    def policy_fn(name, ob_space, ac_space):
        return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
            hid_size=64, num_hid_layers=2)
    env = make_mujoco_env(env_id, seed)

    # parameters below were the best found in a simple random search
    # these are good enough to make humanoid walk, but whether those are
    # an absolute best or not is not certain
    env = RewScale(env, 0.1)
    pi = pposgd_simple.learn(env, policy_fn,
            max_timesteps=num_timesteps,
            timesteps_per_actorbatch=2048,
            clip_param=0.2, entcoeff=0.0,
            optim_epochs=10, 
            optim_stepsize=3e-4, 
            optim_batchsize=64, 
            gamma=0.99, 
            lam=0.95,
            schedule='linear',
        )
    env.close()
    if model_path:
        U.save_state(model_path)
        
    return pi 
開發者ID:MaxSobolMark,項目名稱:HardRLWithYoutube,代碼行數:31,代碼來源:run_humanoid.py

示例8: test_lstm_example

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def test_lstm_example():
    import tensorflow as tf
    from baselines.common import policies, models, cmd_util
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv

    # create vectorized environment
    venv = DummyVecEnv([lambda: cmd_util.make_mujoco_env('Reacher-v2', seed=0)])

    with tf.Session() as sess:
        # build policy based on lstm network with 128 units
        policy = policies.build_policy(venv, models.lstm(128))(nbatch=1, nsteps=1)

        # initialize tensorflow variables
        sess.run(tf.global_variables_initializer())

        # prepare environment variables
        ob = venv.reset()
        state = policy.initial_state
        done = [False]
        step_counter = 0

        # run a single episode until the end (i.e. until done)
        while True:
            action, _, state, _ = policy.step(ob, S=state, M=done)
            ob, reward, done, _ = venv.step(action)
            step_counter += 1
            if done:
                break


        assert step_counter > 5 
開發者ID:quantumiracle,項目名稱:Reinforcement_Learning_for_Traffic_Light_Control,代碼行數:33,代碼來源:test_doc_examples.py

示例9: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_mujoco_env [as 別名]
def train(num_timesteps, seed, model_path=None):
    env_id = 'Humanoid-v2'
    from baselines.ppo1 import mlp_policy, pposgd_simple
    U.make_session(num_cpu=1).__enter__()
    def policy_fn(name, ob_space, ac_space):
        return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
            hid_size=64, num_hid_layers=2)
    env = make_mujoco_env(env_id, seed)

    # parameters below were the best found in a simple random search
    # these are good enough to make humanoid walk, but whether those are
    # an absolute best or not is not certain
    env = RewScale(env, 0.1)
    pi = pposgd_simple.learn(env, policy_fn,
            max_timesteps=num_timesteps,
            timesteps_per_actorbatch=2048,
            clip_param=0.2, entcoeff=0.0,
            optim_epochs=10,
            optim_stepsize=3e-4,
            optim_batchsize=64,
            gamma=0.99,
            lam=0.95,
            schedule='linear',
        )
    env.close()
    if model_path:
        U.save_state(model_path)

    return pi 
開發者ID:hiwonjoon,項目名稱:ICML2019-TREX,代碼行數:31,代碼來源:run_humanoid.py


注:本文中的baselines.common.cmd_util.make_mujoco_env方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。