当前位置: 首页>>代码示例>>Python>>正文


Python cmd_util.make_atari_env方法代码示例

本文整理汇总了Python中baselines.common.cmd_util.make_atari_env方法的典型用法代码示例。如果您正苦于以下问题:Python cmd_util.make_atari_env方法的具体用法?Python cmd_util.make_atari_env怎么用?Python cmd_util.make_atari_env使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在baselines.common.cmd_util的用法示例。


在下文中一共展示了cmd_util.make_atari_env方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env,
          v_ex_coef, r_ex_coef, r_in_coef, lr_alpha, lr_beta):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    elif policy == 'cnn_int':
        policy_fn = CnnPolicyIntrinsicReward
    else:
        raise NotImplementedError
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.01), lrschedule=lrschedule,
          v_ex_coef=v_ex_coef, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef,
          lr_alpha=lr_alpha, lr_beta=lr_beta)
    env.close() 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:19,代码来源:run_atari.py

示例2: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:20,代码来源:run_atari.py

示例3: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, hparams):

    ncpu = multiprocessing.cpu_count()
    #if sys.platform == 'darwin': ncpu //= 2
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=hparams['gpu_fraction'])
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu,
                            gpu_options=gpu_options)
    config.gpu_options.allow_growth = False #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
    env = VecFrameStack(make_atari_env(env_id, 8, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, nsteps=128), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'cnn_attention': CnnAttentionPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1),
        hparams=hparams) 
开发者ID:vik-goel,项目名称:MOREL,代码行数:24,代码来源:run_atari.py

示例4: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1)) 
开发者ID:flyyufelix,项目名称:sonic_contest,代码行数:20,代码来源:run_atari.py

示例5: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    policy_fn = CnnPolicy
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
    env.close() 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:7,代码来源:run_atari.py

示例6: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    env = make_atari_env(env_id, num_cpu, seed)
    if policy == 'cnn':
        policy_fn = AcerCnnPolicy
    elif policy == 'lstm':
        policy_fn = AcerLstmPolicy
    else:
        print("Policy {} not implemented".format(policy))
        return
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:13,代码来源:run_atari.py

示例7: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
开发者ID:bowenliu16,项目名称:rl_graph_generation,代码行数:12,代码来源:run_atari.py

示例8: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, ckpt_path, hparams):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    elif policy == 'cnn_attention':
        policy_fn = CnnAttentionPolicy

    video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
    env = VecFrameStack(make_atari_env(env_id, num_env, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, hparams=hparams), 4)

    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule, ckpt_path=ckpt_path, hparams=hparams)
    env.close() 
开发者ID:vik-goel,项目名称:MOREL,代码行数:17,代码来源:run_atari.py

示例9: train

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    policy_fn = partial(CnnPolicy, one_dim_bias=True)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
    env.close() 
开发者ID:flyyufelix,项目名称:sonic_contest,代码行数:7,代码来源:run_atari.py

示例10: build_env

# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed    

    env_type, env_id = get_env_type(args.env)
    if env_type == 'mujoco':
        get_session(tf.ConfigProto(allow_soft_placement=True,
                                   intra_op_parallelism_threads=1, 
                                   inter_op_parallelism_threads=1))

        if args.num_env:
            env = SubprocVecEnv([lambda: make_mujoco_env(env_id, seed + i if seed is not None else None, args.reward_scale) for i in range(args.num_env)])    
        else:
            env = DummyVecEnv([lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        env = VecNormalize(env)

    elif env_type == 'atari':
        if alg == 'acer':
            env = make_atari_env(env_id, nenv, seed)#, wrapper_kwargs={'clip_rewards': False})
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_atari_env(env_id, nenv, seed), frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)
        
    elif env_type == 'classic_control':
        def make_env():
            e = gym.make(env_id)
            e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
            e.seed(seed)
            return e
            
        env = DummyVecEnv([make_env])

    else:
        raise ValueError('Unknown env_type {}'.format(env_type))

    return env 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:63,代码来源:run.py


注:本文中的baselines.common.cmd_util.make_atari_env方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。