本文整理汇总了Python中baselines.common.vec_env.vec_frame_stack.VecFrameStack方法的典型用法代码示例。如果您正苦于以下问题:Python vec_frame_stack.VecFrameStack方法的具体用法?Python vec_frame_stack.VecFrameStack怎么用?Python vec_frame_stack.VecFrameStack使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.vec_env.vec_frame_stack
的用法示例。
在下文中一共展示了vec_frame_stack.VecFrameStack方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env,
v_ex_coef, r_ex_coef, r_in_coef, lr_alpha, lr_beta):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
elif policy == 'cnn_int':
policy_fn = CnnPolicyIntrinsicReward
else:
raise NotImplementedError
env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.01), lrschedule=lrschedule,
v_ex_coef=v_ex_coef, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef,
lr_alpha=lr_alpha, lr_beta=lr_beta)
env.close()
示例2: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True #pylint: disable=E1101
tf.Session(config=config).__enter__()
env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1))
示例3: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy, hparams):
ncpu = multiprocessing.cpu_count()
#if sys.platform == 'darwin': ncpu //= 2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=hparams['gpu_fraction'])
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu,
gpu_options=gpu_options)
config.gpu_options.allow_growth = False #pylint: disable=E1101
tf.Session(config=config).__enter__()
video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
env = VecFrameStack(make_atari_env(env_id, 8, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, nsteps=128), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'cnn_attention': CnnAttentionPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1),
hparams=hparams)
示例4: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True #pylint: disable=E1101
tf.Session(config=config).__enter__()
env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1))
示例5: __init__
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def __init__(
self,
model,
env_id,
num_env: int = 4,
seed: int = 1,
wrapper_kwargs=None,
start_index=0,
stack_frames: int = 4,
):
if wrapper_kwargs is None:
wrapper_kwargs = {}
wrapper_kwargs["episode_life"] = False
self.env = VecFrameStack(
_make_atari_env(env_id, num_env, seed, wrapper_kwargs, start_index), stack_frames
)
self.model = model
self.end_ix = np.zeros(num_env, dtype=bool)
self.states = model.initial_state
self.obs = None
self.dones = None
示例6: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
policy_fn = CnnPolicy
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
env.close()
示例7: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
env.close()
示例8: learn
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def learn(env_path, seed, max_steps, reward_range, base_port, unity_arguments, summary_writer):
env = VecFrameStack(_make_a2c(env_path, num_env=8, seed=seed, reward_range=reward_range, base_port=base_port, unity_arguments=unity_arguments), nstack=4)
model = learn_a2c(policy=CnnPolicy, env=env, seed=seed, ent_coef=0.01, nsteps=5, total_timesteps=max_steps, callback=_create_summary_callback(summary_writer=summary_writer))
try:
env.close()
except Exception as e:
print("Failed to close environment: " + str(e))
return model
示例9: __init__
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def __init__(self, env, model, nsteps):
super().__init__(env=env, model=model, nsteps=nsteps)
assert isinstance(env.action_space, spaces.Discrete), 'This ACER implementation works only with discrete action spaces!'
assert isinstance(env, VecFrameStack)
self.nact = env.action_space.n
nenv = self.nenv
self.nbatch = nenv * nsteps
self.batch_ob_shape = (nenv*(nsteps+1),) + env.observation_space.shape
self.obs = env.reset()
self.obs_dtype = env.observation_space.dtype
self.ac_dtype = env.action_space.dtype
self.nstack = self.env.nstack
self.nc = self.batch_ob_shape[-1] // self.nstack
示例10: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, ckpt_path, hparams):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
elif policy == 'cnn_attention':
policy_fn = CnnAttentionPolicy
video_log_dir = os.path.join(hparams['base_dir'], 'videos', hparams['experiment_name'])
env = VecFrameStack(make_atari_env(env_id, num_env, seed, video_log_dir=video_log_dir, write_attention_video='attention' in policy, hparams=hparams), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule, ckpt_path=ckpt_path, hparams=hparams)
env.close()
示例11: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, num_cpu):
env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
policy_fn = partial(CnnPolicy, one_dim_bias=True)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
env.close()
示例12: build_env
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args.env)
if env_type in {'atari', 'retro'}:
if alg == 'deepq':
env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
elif alg == 'trpo_mpi':
env = make_env(env_id, env_type, seed=seed)
else:
frame_stack_size = 4
env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
env = VecFrameStack(env, frame_stack_size)
else:
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
flatten_dict_observations = alg not in {'her'}
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
if env_type == 'mujoco':
env = VecNormalize(env)
return env
示例13: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, sil_update, sil_beta):
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
env_args = {'episode_life': False, 'clip_rewards': False}
env = VecFrameStack(
make_atari_env(env_id, num_env, seed, wrapper_kwargs=env_args), 4)
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule,
sil_update=sil_update, sil_beta=sil_beta)
env.close()
示例14: __init__
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def __init__(self, model, env_id, num_env: int=4, seed: int=1,
wrapper_kwargs=None, start_index=0, stack_frames: int=4):
if wrapper_kwargs is None:
wrapper_kwargs = {}
wrapper_kwargs["episode_life"] = False
self.env = VecFrameStack(_make_atari_env(env_id, num_env, seed,
wrapper_kwargs, start_index), stack_frames)
self.model = model
self.end_ix = np.zeros(num_env, dtype=bool)
self.states = model.initial_state
self.obs = None
self.dones = None
示例15: train
# 需要导入模块: from baselines.common.vec_env import vec_frame_stack [as 别名]
# 或者: from baselines.common.vec_env.vec_frame_stack import VecFrameStack [as 别名]
def train(env_id, num_timesteps, seed, policy):
from baselines.common import set_global_seeds
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.ppo2 import ppo2
from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy
import gym
import logging
import multiprocessing
import os.path as osp
import tensorflow as tf
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True #pylint: disable=E1101
gym.logger.setLevel(logging.WARN)
tf.Session(config=config).__enter__()
def make_env(rank):
def env_fn():
env = make_atari(env_id)
env.seed(seed + rank)
env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
return wrap_deepmind(env)
return env_fn
nenvs = 8
env = SubprocVecEnv([make_env(i) for i in range(nenvs)])
set_global_seeds(seed)
env = VecFrameStack(env, 4)
policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
ent_coef=.01,
lr=lambda f : f * 2.5e-4,
cliprange=lambda f : f * 0.1,
total_timesteps=int(num_timesteps * 1.1))