本文整理汇总了Python中stable_baselines.common.vec_env.SubprocVecEnv方法的典型用法代码示例。如果您正苦于以下问题:Python vec_env.SubprocVecEnv方法的具体用法?Python vec_env.SubprocVecEnv怎么用?Python vec_env.SubprocVecEnv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类stable_baselines.common.vec_env
的用法示例。
在下文中一共展示了vec_env.SubprocVecEnv方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_subproc_start_method
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def test_subproc_start_method():
start_methods = [None]
# Only test thread-safe methods. Others may deadlock tests! (gh/428)
safe_methods = {'forkserver', 'spawn'}
available_methods = multiprocessing.get_all_start_methods()
start_methods += list(safe_methods.intersection(available_methods))
space = gym.spaces.Discrete(2)
def obs_assert(obs):
return check_vecenv_obs(obs, space)
for start_method in start_methods:
vec_env_class = functools.partial(SubprocVecEnv, start_method=start_method)
check_vecenv_spaces(vec_env_class, space, obs_assert)
with pytest.raises(ValueError, match="cannot find context for 'illegal_method'"):
vec_env_class = functools.partial(SubprocVecEnv, start_method='illegal_method')
check_vecenv_spaces(vec_env_class, space, obs_assert)
示例2: test_make_vec_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class):
env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls,
wrapper_class=wrapper_class, monitor_dir=None, seed=0)
assert env.num_envs == n_envs
if vec_env_cls is None:
assert isinstance(env, DummyVecEnv)
if wrapper_class is not None:
assert isinstance(env.envs[0], wrapper_class)
else:
assert isinstance(env.envs[0], Monitor)
else:
assert isinstance(env, SubprocVecEnv)
# Kill subprocesses
env.close()
示例3: load_stable_baselines_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def load_stable_baselines_env(cfg_path, vector_length, mp, n_stack, number_maps, action_frame_repeat,
scaled_resolution):
env_fn = lambda: MazeExplorer.load_vizdoom_env(cfg_path, number_maps, action_frame_repeat, scaled_resolution)
if mp:
env = SubprocVecEnv([env_fn for _ in range(vector_length)])
else:
env = DummyVecEnv([env_fn for _ in range(vector_length)])
if n_stack > 0:
env = VecFrameStack(env, n_stack=n_stack)
return env
示例4: make_atari_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None,
start_index=0, allow_early_resets=True,
start_method=None, use_subprocess=False):
"""
Create a wrapped, monitored VecEnv for Atari.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environment you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param wrapper_kwargs: (dict) the parameters for wrap_deepmind function
:param start_index: (int) start rank index
:param allow_early_resets: (bool) allows early reset of the environment
:param start_method: (str) method used to start the subprocesses.
See SubprocVecEnv doc for more information
:param use_subprocess: (bool) Whether to use `SubprocVecEnv` or `DummyVecEnv` when
`num_env` > 1, `DummyVecEnv` is usually faster. Default: False
:return: (VecEnv) The atari environment
"""
if wrapper_kwargs is None:
wrapper_kwargs = {}
def make_env(rank):
def _thunk():
env = make_atari(env_id)
env.seed(seed + rank)
env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
allow_early_resets=allow_early_resets)
return wrap_deepmind(env, **wrapper_kwargs)
return _thunk
set_global_seeds(seed)
# When using one environment, no need to start subprocesses
if num_env == 1 or not use_subprocess:
return DummyVecEnv([make_env(i + start_index) for i in range(num_env)])
return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)],
start_method=start_method)
示例5: test_lstm_train
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def test_lstm_train():
"""Test that LSTM models are able to achieve >=150 (out of 500) reward on CartPoleNoVelEnv.
This environment requires memory to perform well in."""
def make_env(i):
env = CartPoleNoVelEnv()
env = TimeLimit(env, max_episode_steps=500)
env = bench.Monitor(env, None, allow_early_resets=True)
env.seed(i)
return env
env = SubprocVecEnv([lambda: make_env(i) for i in range(NUM_ENVS)])
env = VecNormalize(env)
model = PPO2(MlpLstmPolicy, env, n_steps=128, nminibatches=NUM_ENVS, lam=0.95, gamma=0.99,
noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1)
eprewmeans = []
def reward_callback(local, _):
nonlocal eprewmeans
eprewmeans.append(safe_mean([ep_info['r'] for ep_info in local['ep_info_buf']]))
model.learn(total_timesteps=100000, callback=reward_callback)
# Maximum episode reward is 500.
# In CartPole-v1, a non-recurrent policy can easily get >= 450.
# In CartPoleNoVelEnv, a non-recurrent policy doesn't get more than ~50.
# LSTM policies can reach above 400, but it varies a lot between runs; consistently get >=150.
# See PR #244 for more detailed benchmarks.
average_reward = sum(eprewmeans[-NUM_EPISODES_FOR_SCORE:]) / NUM_EPISODES_FOR_SCORE
assert average_reward >= 150, "Mean reward below 150; per-episode rewards {}".format(average_reward)
示例6: load_train_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, policy, disc_action_space, normalize):
# Choosing environment wrapper according to the policy
if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
if disc_action_space:
env_temp = RosEnvDiscImg
else:
env_temp = RosEnvContImg
elif policy == "CNN1DPolicy":
if disc_action_space:
env_temp = RosEnvDiscRawScanPrepWp
else:
env_temp = RosEnvContRawScanPrepWp
elif policy == "CNN1DPolicy_multi_input":
if disc_action_space:
env_temp = RosEnvDiscRaw
else:
env_temp = RosEnvContRaw
elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
if disc_action_space:
env_temp = RosEnvDiscImgVel
else:
env_temp = RosEnvContImgVel
env = SubprocVecEnv([lambda k=k: Monitor(env_temp("sim%d" % (k+1), StateCollector("sim%s"%(k+1), "train") , stack_offset, num_stacks, robot_radius, rew_fnc, debug, "train", task_mode), '%s/%s/sim_%d'%(path_to_models, agent_name, k+1), allow_early_resets=True) for k in range(num_envs)])
# Normalizing?
if normalize:
env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0,
gamma=0.99, epsilon=1e-08)
else:
env = env
# Stack of data?
if num_stacks > 1:
env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)
return env
示例7: main
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def main(args):
start = time.time()
env_id = 'fwmav_maneuver-v0'
env = DummyVecEnv([make_env(env_id, 0)])
# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])
n_actions = env.action_space.shape[-1]
param_noise = None
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
model = DDPG(
policy = MyDDPGPolicy,
env = env,
gamma = 1.0,
nb_train_steps=5000,
nb_rollout_steps=10000,
nb_eval_steps=10000,
param_noise=param_noise,
action_noise=action_noise,
tau=0.003,
batch_size=256,
observation_range=(-np.inf, np.inf),
actor_lr=0.0001,
critic_lr=0.001,
reward_scale=0.05,
memory_limit=10000000,
verbose=1,
)
model.learn(total_timesteps=args.time_step)
model.save(args.model_path)
end = time.time()
print("Time used: ", end - start)
示例8: main
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def main(args):
start = time.time()
env_id = 'fwmav_hover-v0'
env = DummyVecEnv([make_env(env_id, 0)])
# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])
n_actions = env.action_space.shape[-1]
param_noise = None
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
model = DDPG(
policy = MyDDPGPolicy,
env = env,
gamma = 1.0,
nb_train_steps=5000,
nb_rollout_steps=10000,
nb_eval_steps=10000,
param_noise=param_noise,
action_noise=action_noise,
tau=0.003,
batch_size=256,
observation_range=(-np.inf, np.inf),
actor_lr=0.0001,
critic_lr=0.001,
reward_scale=0.05,
memory_limit=10000000,
verbose=1,
)
model.learn(total_timesteps=args.time_step)
model.save(args.model_path)
end = time.time()
print("Time used: ", end - start)
示例9: main
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def main(args):
try:
model_cls = getattr(importlib.import_module(
'stable_baselines'), args.model_type)
except AttributeError:
print(args.model_type, "Error: wrong model type")
return
try:
policy_cls = getattr(importlib.import_module(
'stable_baselines.common.policies'), args.policy_type)
except AttributeError:
print(args.policy_type, "Error: wrong policy type")
return
start = time.time()
env_id = 'fwmav_hover-v0'
# env = DummyVecEnv([make_env(env_id, 1)])
env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])
model = model_cls(policy_cls, env, verbose=0)
model.learn(total_timesteps=args.time_step)
model.save(args.model_path)
end = time.time()
print("Time used: ", end - start)
示例10: createEnvs
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None):
"""
:param args: (argparse.Namespace Object)
:param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
:param env_kwargs: (dict) The extra arguments for the environment
:param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
:return: (Gym VecEnv)
"""
# imported here to prevent cyclic imports
from environments.registry import registered_env
from state_representation.registry import registered_srl, SRLType
assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
"Error: cannot have more than 1 CPU for the environment {}".format(args.env)
if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL:
srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
env_kwargs["state_dim"] = srl_model.state_dim
env_kwargs["srl_pipe"] = srl_model.pipe
envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs)
for i in range(args.num_cpu)]
if len(envs) == 1:
# No need for subprocesses when having only one env
envs = DummyVecEnv(envs)
else:
envs = SubprocVecEnv(envs)
envs = VecFrameStack(envs, args.num_stack)
if args.srl_model != "raw_pixels":
printYellow("Using MLP policy because working on state representation")
envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)
return envs
示例11: run_model_stablebaseline
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def run_model_stablebaseline(flow_params,
num_cpus=1,
rollout_size=50,
num_steps=50):
"""Run the model for num_steps if provided.
Parameters
----------
flow_params : dict
flow-specific parameters
num_cpus : int
number of CPUs used during training
rollout_size : int
length of a single rollout
num_steps : int
total number of training steps
The total rollout length is rollout_size.
Returns
-------
stable_baselines.*
the trained model
"""
from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines import PPO2
if num_cpus == 1:
constructor = env_constructor(params=flow_params, version=0)()
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: constructor])
else:
env = SubprocVecEnv([env_constructor(params=flow_params, version=i)
for i in range(num_cpus)])
train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
train_model.learn(total_timesteps=num_steps)
return train_model
示例12: train
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def train(self,
n_epochs: int = 10,
save_every: int = 1,
test_trained_model: bool = True,
render_test_env: bool = False,
render_report: bool = True,
save_report: bool = False):
train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)
del test_provider
train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(self.n_envs)])
model_params = self.get_model_params()
model = self.Model(self.Policy,
train_env,
verbose=self.model_verbose,
nminibatches=self.n_minibatches,
tensorboard_log=self.tensorboard_path,
**model_params)
self.logger.info(f'Training for {n_epochs} epochs')
steps_per_epoch = len(train_provider.data_frame)
for model_epoch in range(0, n_epochs):
self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps')
model.learn(total_timesteps=steps_per_epoch)
if model_epoch % save_every == 0:
model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
model.save(model_path)
if test_trained_model:
self.test(model_epoch,
render_env=render_test_env,
render_report=render_report,
save_report=save_report)
self.logger.info(f'Trained {n_epochs} models')
示例13: make_vec_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def make_vec_env(env_id, n_envs=1, seed=None, start_index=0,
monitor_dir=None, wrapper_class=None,
env_kwargs=None, vec_env_cls=None, vec_env_kwargs=None):
"""
Create a wrapped, monitored `VecEnv`.
By default it uses a `DummyVecEnv` which is usually faster
than a `SubprocVecEnv`.
:param env_id: (str or Type[gym.Env]) the environment ID or the environment class
:param n_envs: (int) the number of environments you wish to have in parallel
:param seed: (int) the initial seed for the random number generator
:param start_index: (int) start rank index
:param monitor_dir: (str) Path to a folder where the monitor files will be saved.
If None, no file will be written, however, the env will still be wrapped
in a Monitor wrapper to provide additional information about training.
:param wrapper_class: (gym.Wrapper or callable) Additional wrapper to use on the environment.
This can also be a function with single argument that wraps the environment in many things.
:param env_kwargs: (dict) Optional keyword argument to pass to the env constructor
:param vec_env_cls: (Type[VecEnv]) A custom `VecEnv` class constructor. Default: None.
:param vec_env_kwargs: (dict) Keyword arguments to pass to the `VecEnv` class constructor.
:return: (VecEnv) The wrapped environment
"""
env_kwargs = {} if env_kwargs is None else env_kwargs
vec_env_kwargs = {} if vec_env_kwargs is None else vec_env_kwargs
def make_env(rank):
def _init():
if isinstance(env_id, str):
env = gym.make(env_id)
if len(env_kwargs) > 0:
warnings.warn("No environment class was passed (only an env ID) so `env_kwargs` will be ignored")
else:
env = env_id(**env_kwargs)
if seed is not None:
env.seed(seed + rank)
env.action_space.seed(seed + rank)
# Wrap the env in a Monitor wrapper
# to have additional training information
monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
# Create the monitor folder if needed
if monitor_path is not None:
os.makedirs(monitor_dir, exist_ok=True)
env = Monitor(env, filename=monitor_path)
# Optionally, wrap the environment with the provided wrapper
if wrapper_class is not None:
env = wrapper_class(env)
return env
return _init
# No custom VecEnv is passed
if vec_env_cls is None:
# Default: use a DummyVecEnv
vec_env_cls = DummyVecEnv
return vec_env_cls([make_env(i + start_index) for i in range(n_envs)], **vec_env_kwargs)
示例14: create_env
# 需要导入模块: from stable_baselines.common import vec_env [as 别名]
# 或者: from stable_baselines.common.vec_env import SubprocVecEnv [as 别名]
def create_env(n_envs, eval_env=False):
"""
Create the environment and wrap it if necessary
:param n_envs: (int)
:param eval_env: (bool) Whether is it an environment used for evaluation or not
:return: (Union[gym.Env, VecEnv])
:return: (gym.Env)
"""
global hyperparams
global env_kwargs
# Do not log eval env (issue with writing the same file)
log_dir = None if eval_env else save_path
if is_atari:
if args.verbose > 0:
print("Using Atari wrapper")
env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
# Frame-stacking with 4 frames
env = VecFrameStack(env, n_stack=4)
elif algo_ in ['dqn', 'ddpg']:
if hyperparams.get('normalize', False):
print("WARNING: normalization not supported yet for DDPG/DQN")
env = gym.make(env_id, **env_kwargs)
env.seed(args.seed)
if env_wrapper is not None:
env = env_wrapper(env)
else:
if n_envs == 1:
env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)])
else:
# env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
# On most env, SubprocVecEnv does not help and is quite memory hungry
env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
if normalize:
if args.verbose > 0:
if len(normalize_kwargs) > 0:
print("Normalization activated: {}".format(normalize_kwargs))
else:
print("Normalizing input and reward")
env = VecNormalize(env, **normalize_kwargs)
# Optional Frame-stacking
if hyperparams.get('frame_stack', False):
n_stack = hyperparams['frame_stack']
env = VecFrameStack(env, n_stack)
print("Stacking {} frames".format(n_stack))
del hyperparams['frame_stack']
if args.algo == 'her':
# Wrap the env if need to flatten the dict obs
if isinstance(env, VecEnv):
env = _UnvecWrapper(env)
env = HERGoalEnvWrapper(env)
return env