本文整理汇总了Python中baselines.common.cmd_util.make_atari_env方法的典型用法代码示例。如果您正苦于以下问题:Python cmd_util.make_atari_env方法的具体用法?Python cmd_util.make_atari_env怎么用?Python cmd_util.make_atari_env使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.cmd_util
的用法示例。
在下文中一共展示了cmd_util.make_atari_env方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env,
v_ex_coef, r_ex_coef, r_in_coef, lr_alpha, lr_beta):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
elif policy == 'cnn_int':
policy_fn = CnnPolicyIntrinsicReward
else:
raise NotImplementedError
env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.01), lrschedule=lrschedule,
v_ex_coef=v_ex_coef, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef,
lr_alpha=lr_alpha, lr_beta=lr_beta)
env.close()
示例2: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True #pylint: disable=E1101
tf.Session(config=config).__enter__()
env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1))
示例3: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, hparams):
ncpu = multiprocessing.cpu_count()
#if sys.platform == 'darwin': ncpu //= 2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=hparams['gpu_fraction'])
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu,
gpu_options=gpu_options)
config.gpu_options.allow_growth = False #pylint: disable=E1101
tf.Session(config=config).__enter__()
video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
env = VecFrameStack(make_atari_env(env_id, 8, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, nsteps=128), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'cnn_attention': CnnAttentionPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1),
hparams=hparams)
示例4: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True #pylint: disable=E1101
tf.Session(config=config).__enter__()
env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1))
示例5: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
policy_fn = CnnPolicy
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
env.close()
示例6: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
env = make_atari_env(env_id, num_cpu, seed)
if policy == 'cnn':
policy_fn = AcerCnnPolicy
elif policy == 'lstm':
policy_fn = AcerLstmPolicy
else:
print("Policy {} not implemented".format(policy))
return
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
env.close()
示例7: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
env.close()
示例8: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, ckpt_path, hparams):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
elif policy == 'cnn_attention':
policy_fn = CnnAttentionPolicy
video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
env = VecFrameStack(make_atari_env(env_id, num_env, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, hparams=hparams), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule, ckpt_path=ckpt_path, hparams=hparams)
env.close()
示例9: train
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
policy_fn = partial(CnnPolicy, one_dim_bias=True)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
env.close()
示例10: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_atari_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
seed = args.seed
env_type, env_id = get_env_type(args.env)
if env_type == 'mujoco':
get_session(tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1))
if args.num_env:
env = SubprocVecEnv([lambda: make_mujoco_env(env_id, seed + i if seed is not None else None, args.reward_scale) for i in range(args.num_env)])
else:
env = DummyVecEnv([lambda: make_mujoco_env(env_id, seed, args.reward_scale)])
env = VecNormalize(env)
elif env_type == 'atari':
if alg == 'acer':
env = make_atari_env(env_id, nenv, seed)#, wrapper_kwargs={'clip_rewards': False})
elif alg == 'deepq':
env = atari_wrappers.make_atari(env_id)
env.seed(seed)
env = bench.Monitor(env, logger.get_dir())
env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True)
elif alg == 'trpo_mpi':
env = atari_wrappers.make_atari(env_id)
env.seed(seed)
env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
env = atari_wrappers.wrap_deepmind(env)
# TODO check if the second seeding is necessary, and eventually remove
env.seed(seed)
else:
frame_stack_size = 4
env = VecFrameStack(make_atari_env(env_id, nenv, seed), frame_stack_size)
elif env_type == 'retro':
import retro
gamestate = args.gamestate or 'Level1-1'
env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE)
env.seed(args.seed)
env = bench.Monitor(env, logger.get_dir())
env = retro_wrappers.wrap_deepmind_retro(env)
elif env_type == 'classic_control':
def make_env():
e = gym.make(env_id)
e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
e.seed(seed)
return e
env = DummyVecEnv([make_env])
else:
raise ValueError('Unknown env_type {}'.format(env_type))
return env