當前位置: 首頁>>代碼示例>>Python>>正文


Python cmd_util.make_atari_env方法代碼示例

本文整理匯總了Python中baselines.common.cmd_util.make_atari_env方法的典型用法代碼示例。如果您正苦於以下問題:Python cmd_util.make_atari_env方法的具體用法?Python cmd_util.make_atari_env怎麽用?Python cmd_util.make_atari_env使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在baselines.common.cmd_util的用法示例。


在下文中一共展示了cmd_util.make_atari_env方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env,
          v_ex_coef, r_ex_coef, r_in_coef, lr_alpha, lr_beta):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    elif policy == 'cnn_int':
        policy_fn = CnnPolicyIntrinsicReward
    else:
        raise NotImplementedError
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.01), lrschedule=lrschedule,
          v_ex_coef=v_ex_coef, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef,
          lr_alpha=lr_alpha, lr_beta=lr_beta)
    env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:19,代碼來源:run_atari.py

示例2: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:20,代碼來源:run_atari.py

示例3: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy, hparams):

    ncpu = multiprocessing.cpu_count()
    #if sys.platform == 'darwin': ncpu //= 2
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=hparams['gpu_fraction'])
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu,
                            gpu_options=gpu_options)
    config.gpu_options.allow_growth = False #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
    env = VecFrameStack(make_atari_env(env_id, 8, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, nsteps=128), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'cnn_attention': CnnAttentionPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1),
        hparams=hparams) 
開發者ID:vik-goel,項目名稱:MOREL,代碼行數:24,代碼來源:run_atari.py

示例4: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
開發者ID:flyyufelix,項目名稱:sonic_contest,代碼行數:20,代碼來源:run_atari.py

示例5: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, num_cpu):
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    policy_fn = CnnPolicy
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
    env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:7,代碼來源:run_atari.py

示例6: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    env = make_atari_env(env_id, num_cpu, seed)
    if policy == 'cnn':
        policy_fn = AcerCnnPolicy
    elif policy == 'lstm':
        policy_fn = AcerLstmPolicy
    else:
        print("Policy {} not implemented".format(policy))
        return
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
開發者ID:Hwhitetooth,項目名稱:lirpg,代碼行數:13,代碼來源:run_atari.py

示例7: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
開發者ID:bowenliu16,項目名稱:rl_graph_generation,代碼行數:12,代碼來源:run_atari.py

示例8: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, ckpt_path, hparams):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    elif policy == 'cnn_attention':
        policy_fn = CnnAttentionPolicy

    video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
    env = VecFrameStack(make_atari_env(env_id, num_env, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, hparams=hparams), 4)

    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule, ckpt_path=ckpt_path, hparams=hparams)
    env.close() 
開發者ID:vik-goel,項目名稱:MOREL,代碼行數:17,代碼來源:run_atari.py

示例9: train

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def train(env_id, num_timesteps, seed, num_cpu):
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    policy_fn = partial(CnnPolicy, one_dim_bias=True)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
    env.close() 
開發者ID:flyyufelix,項目名稱:sonic_contest,代碼行數:7,代碼來源:run_atari.py

示例10: build_env

# 需要導入模塊: from baselines.common import cmd_util [as 別名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 別名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed    

    env_type, env_id = get_env_type(args.env)
    if env_type == 'mujoco':
        get_session(tf.ConfigProto(allow_soft_placement=True,
                                   intra_op_parallelism_threads=1, 
                                   inter_op_parallelism_threads=1))

        if args.num_env:
            env = SubprocVecEnv([lambda: make_mujoco_env(env_id, seed + i if seed is not None else None, args.reward_scale) for i in range(args.num_env)])    
        else:
            env = DummyVecEnv([lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        env = VecNormalize(env)

    elif env_type == 'atari':
        if alg == 'acer':
            env = make_atari_env(env_id, nenv, seed)#, wrapper_kwargs={'clip_rewards': False})
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_atari_env(env_id, nenv, seed), frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)
        
    elif env_type == 'classic_control':
        def make_env():
            e = gym.make(env_id)
            e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
            e.seed(seed)
            return e
            
        env = DummyVecEnv([make_env])

    else:
        raise ValueError('Unknown env_type {}'.format(env_type))

    return env 
開發者ID:MaxSobolMark,項目名稱:HardRLWithYoutube,代碼行數:63,代碼來源:run.py


注:本文中的baselines.common.cmd_util.make_atari_env方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。