本文整理匯總了Python中baselines.common.vec_env.subproc_vec_env.SubprocVecEnv方法的典型用法代碼示例。如果您正苦於以下問題:Python subproc_vec_env.SubprocVecEnv方法的具體用法?Python subproc_vec_env.SubprocVecEnv怎麽用?Python subproc_vec_env.SubprocVecEnv使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類baselines.common.vec_env.subproc_vec_env
的用法示例。
在下文中一共展示了subproc_vec_env.SubprocVecEnv方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_env_after_learn
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def test_env_after_learn(algo):
def make_env():
# acktr requires too much RAM, fails on travis
env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
return env
make_session(make_default=True, graph=tf.Graph())
env = SubprocVecEnv([make_env])
learn = get_learn_function(algo)
# Commenting out the following line resolves the issue, though crash happens at env.reset().
learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)
env.reset()
env.close()
開發者ID:quantumiracle,項目名稱:Reinforcement_Learning_for_Traffic_Light_Control,代碼行數:18,代碼來源:test_env_after_learn.py
示例2: make_vec_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, gamestate=None):
"""
Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
"""
if wrapper_kwargs is None: wrapper_kwargs = {}
mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
seed = seed + 10000 * mpi_rank if seed is not None else None
def make_thunk(rank):
return lambda: make_env(
env_id=env_id,
env_type=env_type,
subrank = rank,
seed=seed,
reward_scale=reward_scale,
gamestate=gamestate,
wrapper_kwargs=wrapper_kwargs
)
set_global_seeds(seed)
if num_env > 1:
return SubprocVecEnv([make_thunk(i + start_index) for i in range(num_env)])
else:
return DummyVecEnv([make_thunk(start_index)])
示例3: make_atari_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_atari_env(env_id, num_env, seed, hparams=None, wrapper_kwargs=None, start_index=0, nsteps=5, **kwargs):
"""
Create a wrapped, monitored SubprocVecEnv for Atari.
"""
if wrapper_kwargs is None: wrapper_kwargs = {}
def make_env(rank): # pylint: disable=C0111
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
if rank == start_index and 'video_log_dir' in kwargs:
env = VideoLogMonitor(env, kwargs['video_log_dir'] + '_rgb', write_attention_video=kwargs['write_attention_video'], hparams=hparams, nsteps=nsteps)
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
env_fns = [make_env(i + start_index) for i in range(num_env)]
global my_subproc_vec_env
assert my_subproc_vec_env == None
my_subproc_vec_env = SubprocVecEnv(env_fns)
return my_subproc_vec_env
示例4: main
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def main():
"""Run PPO until the environment throws an exception."""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # pylint: disable=E1101
env_fns, env_names = create_envs()
with tf.Session(config=config):
# Take more timesteps than we need to be sure that
# we stop due to an exception.
ppo2.learn(policy=policies.CnnPolicy,
env=SubprocVecEnv(env_fns),
nsteps=4096,
nminibatches=8,
lam=0.95,
gamma=0.99,
noptepochs=3,
log_interval=1,
ent_coef=0.001,
lr=lambda _: 2e-4,
cliprange=lambda _: 0.1,
total_timesteps=int(1e9),
save_interval=10,
save_path='checkpoints_expert_'+expert_name,
load_path='./checkpoints_joint_ppo2/00300') # Pretrained model
示例5: main
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def main():
"""Run PPO until the environment throws an exception."""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # pylint: disable=E1101
env_fns, env_names = create_envs()
with tf.Session(config=config):
# Take more timesteps than we need to be sure that
# we stop due to an exception.
ppo2.learn(policy=policies.CnnPolicy,
env=SubprocVecEnv(env_fns),
nsteps=4096,
nminibatches=8,
lam=0.95,
gamma=0.99,
noptepochs=3,
log_interval=1,
ent_coef=0.01,
lr=lambda _: 2e-4,
cliprange=lambda _: 0.1,
total_timesteps=int(1e9),
save_interval=10,
save_path='./checkpoints_joint_ppo2',
load_path=None)
示例6: _make_atari_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def _make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
"""
Create a wrapped, monitored SubprocVecEnv for Atari. This is the same as the one used
in OpenAI Baselines.
"""
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
wrapper_kwargs = {} if wrapper_kwargs is None else wrapper_kwargs
def make_env(rank): # pylint: disable=C0111
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = Monitor(
env,
logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
allow_early_resets=True,
)
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
示例7: _make_atari_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def _make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
"""
Create a wrapped, monitored SubprocVecEnv for Atari. This is the same as the one used
in OpenAI Baselines.
"""
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
wrapper_kwargs = {} if wrapper_kwargs is None else wrapper_kwargs
def make_env(rank): # pylint: disable=C0111
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
allow_early_resets=True)
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
示例8: train
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
def make_env(rank):
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
gym.logger.setLevel(logging.WARN)
return wrap_deepmind(env)
return _thunk
set_global_seeds(seed)
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
env.close()
示例9: train
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
def make_env(rank):
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
gym.logger.setLevel(logging.WARN)
return wrap_deepmind(env)
return _thunk
set_global_seeds(seed)
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
if policy == 'cnn':
policy_fn = AcerCnnPolicy
elif policy == 'lstm':
policy_fn = AcerLstmPolicy
else:
print("Policy {} not implemented".format(policy))
return
learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
env.close()
示例10: train
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def train(env_id, num_frames, seed, policy, lrschedule, num_cpu):
num_timesteps = int(num_frames / 4 * 1.1)
# divide by 4 due to frameskip, then do a little extras so episodes end
def make_env(rank):
def _thunk():
env = gym.make(env_id)
env.seed(seed + rank)
env = bench.Monitor(env, logger.get_dir() and
os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
gym.logger.setLevel(logging.WARN)
return wrap_deepmind(env)
return _thunk
set_global_seeds(seed)
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
if policy == 'cnn':
policy_fn = CnnPolicy
elif policy == 'linear':
policy_fn = LinearPolicy
elif policy == 'lstm':
policy_fn = LstmPolicy
elif policy == 'lnlstm':
policy_fn = LnLstmPolicy
learn(policy_fn, env, seed, total_timesteps=num_timesteps, lrschedule=lrschedule)
env.close()
示例11: make_atari_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
"""
Create a wrapped, monitored SubprocVecEnv for Atari.
"""
if wrapper_kwargs is None: wrapper_kwargs = {}
def make_env(rank): # pylint: disable=C0111
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
示例12: make_atari_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
"""
Create a wrapped, monitored SubprocVecEnv for Atari.
"""
if wrapper_kwargs is None: wrapper_kwargs = {}
mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
def make_env(rank): # pylint: disable=C0111
def _thunk():
env = make_atari(env_id)
env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)))
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
示例13: _make_a2c
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def _make_a2c(env_path, num_env, seed, reward_range, base_port, unity_arguments):
"""
Create wrapped SubprocVecEnv for using A2C on a Unity-Environment
"""
def make_env(rank):
def _thunk():
return _wrap_unity_env(env_path, seed, base_port, unity_arguments, rank, reward_range)
return _thunk
return SubprocVecEnv([make_env(i) for i in range(num_env)])
示例14: make_vec_envs
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep,
device, allow_early_resets, num_frame_stack=None):
envs = [make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets)
for i in range(num_processes)]
if len(envs) > 1:
envs = SubprocVecEnv(envs)
else:
envs = DummyVecEnv(envs)
if len(envs.observation_space.shape) == 1:
if gamma is None:
envs = VecNormalize(envs, ret=False)
else:
envs = VecNormalize(envs, gamma=gamma)
envs = VecPyTorch(envs, device)
# if num_frame_stack is not None:
# envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
# elif len(envs.observation_space.shape) == 3:
# envs = VecPyTorchFrameStack(envs, 4, device)
return envs
# Can be used to test recurrent policies for Reacher-v2
示例15: make_vec_env
# 需要導入模塊: from baselines.common.vec_env import subproc_vec_env [as 別名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 別名]
def make_vec_env(env_id, env_type, num_env, seed,
wrapper_kwargs=None,
env_kwargs=None,
start_index=0,
reward_scale=1.0,
flatten_dict_observations=True,
gamestate=None,
initializer=None,
force_dummy=False):
"""
Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
"""
wrapper_kwargs = wrapper_kwargs or {}
env_kwargs = env_kwargs or {}
mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
seed = seed + 10000 * mpi_rank if seed is not None else None
logger_dir = logger.get_dir()
def make_thunk(rank, initializer=None):
return lambda: make_env(
env_id=env_id,
env_type=env_type,
mpi_rank=mpi_rank,
subrank=rank,
seed=seed,
reward_scale=reward_scale,
gamestate=gamestate,
flatten_dict_observations=flatten_dict_observations,
wrapper_kwargs=wrapper_kwargs,
env_kwargs=env_kwargs,
logger_dir=logger_dir,
initializer=initializer
)
set_global_seeds(seed)
if not force_dummy and num_env > 1:
return SubprocVecEnv([make_thunk(i + start_index, initializer=initializer) for i in range(num_env)])
else:
return DummyVecEnv([make_thunk(i + start_index, initializer=None) for i in range(num_env)])