當前位置: 首頁>>代碼示例>>Python>>正文


Python ppo2.learn方法代碼示例

本文整理匯總了Python中baselines.ppo2.ppo2.learn方法的典型用法代碼示例。如果您正苦於以下問題:Python ppo2.learn方法的具體用法?Python ppo2.learn怎麽用?Python ppo2.learn使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在baselines.ppo2.ppo2的用法示例。


在下文中一共展示了ppo2.learn方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:20,代碼來源:run_atari.py

示例2: test_microbatches

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def test_microbatches():
    def env_fn():
        env = gym.make('CartPole-v0')
        env.seed(0)
        return env

    learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)

    env_ref = DummyVecEnv([env_fn])
    sess_ref = make_session(make_default=True, graph=tf.Graph())
    learn_fn(env=env_ref)
    vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}

    env_test = DummyVecEnv([env_fn])
    sess_test = make_session(make_default=True, graph=tf.Graph())
    learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
    vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}

    for v in vars_ref:
        np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3) 
開發者ID:hiwonjoon,項目名稱:ICML2019-TREX,代碼行數:22,代碼來源:test_microbatches.py

示例3: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed, policy, hparams):

    ncpu = multiprocessing.cpu_count()
    #if sys.platform == 'darwin': ncpu //= 2
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=hparams['gpu_fraction'])
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu,
                            gpu_options=gpu_options)
    config.gpu_options.allow_growth = False #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
    env = VecFrameStack(make_atari_env(env_id, 8, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, nsteps=128), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'cnn_attention': CnnAttentionPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1),
        hparams=hparams) 
開發者ID:vik-goel,項目名稱:MOREL,代碼行數:24,代碼來源:run_atari.py

示例4: main

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101
    with tf.Session(config=config):
        # Take more timesteps than we need to be sure that
        # we stop due to an exception.
        ppo2.learn(policy=policies.CnnPolicy,
                   env=DummyVecEnv([make_env]),
                   nsteps=4096,
                   nminibatches=8,
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=3,
                   log_interval=1,
                   ent_coef=0.01,
                   lr=lambda _: 2e-4,
                   cliprange=lambda _: 0.1,
                   total_timesteps=int(1e7),
                   load_path='./pretrain_model') # Set to None if no pretrained model 
開發者ID:flyyufelix,項目名稱:sonic_contest,代碼行數:22,代碼來源:ppo2_agent.py

示例5: main

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101
    env_fns, env_names = create_envs()
    with tf.Session(config=config):
        # Take more timesteps than we need to be sure that
        # we stop due to an exception.
        ppo2.learn(policy=policies.CnnPolicy,
                   env=SubprocVecEnv(env_fns),
                   nsteps=4096, 
                   nminibatches=8, 
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=3, 
                   log_interval=1, 
                   ent_coef=0.01,
                   lr=lambda _: 2e-4,
                   cliprange=lambda _: 0.1, 
                   total_timesteps=int(1e9),
                   save_interval=10,
                   save_path='./checkpoints_joint_ppo2',
                   load_path=None) 
開發者ID:flyyufelix,項目名稱:sonic_contest,代碼行數:25,代碼來源:ppo2_joint.py

示例6: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
開發者ID:flyyufelix,項目名稱:sonic_contest,代碼行數:20,代碼來源:run_atari.py

示例7: run

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def run(bsuite_id: str) -> str:
  """Runs a PPO agent on a given bsuite environment, logging to CSV."""

  def _load_env():
    raw_env = bsuite.load_and_record(
      bsuite_id=bsuite_id,
      save_path=FLAGS.save_path,
      logging_mode=FLAGS.logging_mode,
      overwrite=FLAGS.overwrite,
  )
    if FLAGS.verbose:
      raw_env = terminal_logging.wrap_environment(raw_env, log_every=True)  # pytype: disable=wrong-arg-types
    return gym_wrapper.GymFromDMEnv(raw_env)
  env = dummy_vec_env.DummyVecEnv([_load_env])

  ppo2.learn(
      env=env,
      network=FLAGS.network,
      lr=FLAGS.learning_rate,
      total_timesteps=FLAGS.total_timesteps,  # make sure to run enough steps
      nsteps=FLAGS.nsteps,
      gamma=FLAGS.agent_discount,
  )

  return bsuite_id 
開發者ID:deepmind,項目名稱:bsuite,代碼行數:27,代碼來源:run.py

示例8: test_identity

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def test_identity(learn_func):
    '''
    Test if the algorithm (with a given policy) 
    can learn an identity transformation (i.e. return observation as an action)
    '''
    np.random.seed(0)
    np_random.seed(0)
    random.seed(0)

    env = DummyVecEnv([lambda: IdentityEnv(10)])

    with tf.Graph().as_default(), tf.Session().as_default():
        tf.set_random_seed(0)
        model = learn_func(env)

        N_TRIALS = 1000
        sum_rew = 0
        obs = env.reset()
        for i in range(N_TRIALS):
            obs, rew, done, _ = env.step(model.step(obs)[0])
            sum_rew += rew

        assert sum_rew > 0.9 * N_TRIALS 
開發者ID:junhyukoh,項目名稱:self-imitation-learning,代碼行數:25,代碼來源:test_identity.py

示例9: test_microbatches

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def test_microbatches():
    def env_fn():
        env = gym.make('CartPole-v0')
        env.seed(0)
        return env

    learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)

    env_ref = DummyVecEnv([env_fn])
    sess_ref = make_session(make_default=True, graph=tf.Graph())
    learn_fn(env=env_ref)
    vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}

    env_test = DummyVecEnv([env_fn])
    sess_test = make_session(make_default=True, graph=tf.Graph())
    learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
    # learn_fn(env=env_test)
    vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}

    for v in vars_ref:
        np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3) 
開發者ID:openai,項目名稱:baselines,代碼行數:23,代碼來源:test_microbatches.py

示例10: main

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101
    with tf.Session(config=config):
        # Take more timesteps than we need to be sure that
        # we stop due to an exception.
        ppo2.learn(policy=policies.CnnPolicy,
                   env=DummyVecEnv([make_env]),
                   nsteps=4096,
                   nminibatches=8,
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=3,
                   log_interval=1,
                   ent_coef=0.01,
                   lr=lambda _: 2e-4,
                   cliprange=lambda _: 0.1,
                   total_timesteps=int(1e7)) 
開發者ID:openai,項目名稱:retro-baselines,代碼行數:21,代碼來源:ppo2_agent.py

示例11: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed, policy, r_ex_coef, r_in_coef, lr_alpha, lr_beta, reward_freq):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy, MlpPolicyIntrinsicReward
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True
    tf.Session(config=config).__enter__()
    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir())
        return env
    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    if policy == 'mlp':
        policy = MlpPolicy
    elif policy == 'mlp_int':
        policy = MlpPolicyIntrinsicReward
    else:
        raise NotImplementedError
    ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
        lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
        ent_coef=0.0,
        lr_alpha=lr_alpha,
        cliprange=0.2,
        total_timesteps=num_timesteps,
        r_ex_coef=r_ex_coef,
        r_in_coef=r_in_coef,
        lr_beta=lr_beta,
        reward_freq=reward_freq) 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:40,代碼來源:run_mujoco.py

示例12: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.Session(config=config).__enter__()
    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir())
        return env
    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    policy = MlpPolicy
    ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
        lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
        ent_coef=0.0,
        lr=3e-4,
        cliprange=0.2,
        total_timesteps=num_timesteps) 
開發者ID:bowenliu16,項目名稱:rl_graph_generation,代碼行數:30,代碼來源:run_mujoco.py

示例13: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.Session(config=config).__enter__()

    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return env

    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    policy = MlpPolicy
    model = ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
                       lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
                       ent_coef=0.0,
                       lr=3e-4,
                       cliprange=0.2,
                       total_timesteps=num_timesteps)

    return model, env 
開發者ID:junhyukoh,項目名稱:self-imitation-learning,代碼行數:34,代碼來源:run_mujoco.py

示例14: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(_):
  """Trains a PPO2 policy."""
  vec_env = SubprocVecEnv([
      (lambda _i=i: create_single_football_env(_i))
      for i in range(FLAGS.num_envs)
  ], context=None)

  # Import tensorflow after we create environments. TF is not fork sake, and
  # we could be using TF as part of environment if one of the players is
  # controled by an already trained model.
  import tensorflow.compat.v1 as tf
  ncpu = multiprocessing.cpu_count()
  config = tf.ConfigProto(allow_soft_placement=True,
                          intra_op_parallelism_threads=ncpu,
                          inter_op_parallelism_threads=ncpu)
  config.gpu_options.allow_growth = True
  tf.Session(config=config).__enter__()

  ppo2.learn(network=FLAGS.policy,
             total_timesteps=FLAGS.num_timesteps,
             env=vec_env,
             seed=FLAGS.seed,
             nsteps=FLAGS.nsteps,
             nminibatches=FLAGS.nminibatches,
             noptepochs=FLAGS.noptepochs,
             max_grad_norm=FLAGS.max_grad_norm,
             gamma=FLAGS.gamma,
             ent_coef=FLAGS.ent_coef,
             lr=FLAGS.lr,
             log_interval=1,
             save_interval=FLAGS.save_interval,
             cliprange=FLAGS.cliprange,
             load_path=FLAGS.load_path) 
開發者ID:google-research,項目名稱:football,代碼行數:35,代碼來源:run_ppo2.py

示例15: train

# 需要導入模塊: from baselines.ppo2 import ppo2 [as 別名]
# 或者: from baselines.ppo2.ppo2 import learn [as 別名]
def train(env_id, num_timesteps, seed, policy):
    from baselines.common import set_global_seeds
    from baselines.common.atari_wrappers import make_atari, wrap_deepmind
    from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
    from baselines.common.vec_env.vec_frame_stack import VecFrameStack
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy
    import gym
    import logging
    import multiprocessing
    import os.path as osp
    import tensorflow as tf
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    gym.logger.setLevel(logging.WARN)
    tf.Session(config=config).__enter__()

    def make_env(rank):
        def env_fn():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            return wrap_deepmind(env)
        return env_fn
    nenvs = 8
    env = SubprocVecEnv([make_env(i) for i in range(nenvs)])
    set_global_seeds(seed)
    env = VecFrameStack(env, 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
開發者ID:cxxgtxy,項目名稱:deeprl-baselines,代碼行數:41,代碼來源:run_atari.py


注:本文中的baselines.ppo2.ppo2.learn方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。