本文整理汇总了Python中baselines.common.cmd_util.make_vec_env方法的典型用法代码示例。如果您正苦于以下问题:Python cmd_util.make_vec_env方法的具体用法?Python cmd_util.make_vec_env怎么用?Python cmd_util.make_vec_env使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.cmd_util
的用法示例。
在下文中一共展示了cmd_util.make_vec_env方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args)
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
flatten_dict_observations = alg not in {'her'}
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
if env_type == 'mujoco':
env = VecNormalize(env)
return env
示例2: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
alg = args.alg
seed = args.seed
# tf config
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
flatten_dict_observations = alg not in {'her'}
env = make_vec_env('MujocoQuadForce-v1',
'mujoco',
args.num_env or 1,
seed,
reward_scale=args.reward_scale,
flatten_dict_observations=flatten_dict_observations)
# env = ActionClipWrapper(env)
# if env_type == 'mujoco':
# env = VecNormalize(env)
return env
示例3: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args.env)
if env_type in {'atari', 'retro'}:
if alg == 'deepq':
env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
elif alg == 'trpo_mpi':
env = make_env(env_id, env_type, seed=seed)
else:
frame_stack_size = 4
env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
env = VecFrameStack(env, frame_stack_size)
else:
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
flatten_dict_observations = alg not in {'her'}
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
if env_type == 'mujoco':
env = VecNormalize(env)
return env
示例4: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args)
if env_type in {'atari', 'retro'}:
if alg == 'deepq':
env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
elif alg == 'trpo_mpi':
env = make_env(env_id, env_type, seed=seed)
else:
frame_stack_size = 4
env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
env = VecFrameStack(env, frame_stack_size)
else:
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
flatten_dict_observations = alg not in {'her'}
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
if env_type == 'mujoco':
env = VecNormalize(env, use_tf=True)
return env
示例5: build_testenv
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_testenv(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
seed = args.seed
env_type, env_id = get_env_type('RLTestStock-v0')
get_session(tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1))
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)
return env
示例6: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args.env)
if env_type in {'atari', 'retro'}:
if alg == 'deepq':
env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
elif alg == 'trpo_mpi':
env = make_env(env_id, env_type, seed=seed)
else:
frame_stack_size = 4
env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
env = VecFrameStack(env, frame_stack_size)
else:
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)
if args.custom_reward != '':
from baselines.common.vec_env import VecEnv, VecEnvWrapper
import baselines.common.custom_reward_wrapper as W
assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)
custom_reward_kwargs = eval(args.custom_reward_kwargs)
if args.custom_reward == 'live_long':
env = W.VecLiveLongReward(env,**custom_reward_kwargs)
elif args.custom_reward == 'random_tf':
env = W.VecTFRandomReward(env,**custom_reward_kwargs)
elif args.custom_reward == 'preference':
env = W.VecTFPreferenceReward(env,**custom_reward_kwargs)
elif args.custom_reward == 'preference_normalized':
env = W.VecTFPreferenceRewardNormalized(env,**custom_reward_kwargs)
else:
assert False, 'no such wrapper exist'
if env_type == 'mujoco':
env = VecNormalize(env)
return env
示例7: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
seed = args.seed
env_type, env_id = get_env_type(args.env)
print(env_id)
#extract the agc_env_name
noskip_idx = env_id.find("NoFrameskip")
env_name = env_id[:noskip_idx].lower()
print("Env Name for Masking:", env_name)
if env_type in {'atari', 'retro'}:
if alg == 'deepq':
env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
elif alg == 'trpo_mpi':
env = make_env(env_id, env_type, seed=seed)
else:
frame_stack_size = 4
env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
env = VecFrameStack(env, frame_stack_size)
else:
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
get_session(config=config)
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)
if args.custom_reward != '':
from baselines.common.vec_env import VecEnv, VecEnvWrapper
import baselines.common.custom_reward_wrapper as W
assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)
custom_reward_kwargs = eval(args.custom_reward_kwargs)
if args.custom_reward == 'pytorch':
if args.custom_reward_path == '':
assert False, 'no path for reward model'
else:
env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name)
else:
assert False, 'no such wrapper exist'
if env_type == 'mujoco':
env = VecNormalize(env)
# if env_type == 'atari':
# input("Normalizing for ATari game: okay? [Enter]")
# #normalize rewards but not observations for atari
# env = VecNormalizeRewards(env)
return env
示例8: build_env
# 需要导入模块: from baselines.common import cmd_util [as 别名]
# 或者: from baselines.common.cmd_util import make_vec_env [as 别名]
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
alg = args.alg
rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
seed = args.seed
env_type, env_id = get_env_type(args.env)
if env_type == 'atari':
if alg == 'acer':
env = make_vec_env(env_id, env_type, nenv, seed)
elif alg == 'deepq':
env = atari_wrappers.make_atari(env_id)
env.seed(seed)
env = bench.Monitor(env, logger.get_dir())
env = atari_wrappers.wrap_deepmind(env, frame_stack=True)
elif alg == 'trpo_mpi':
env = atari_wrappers.make_atari(env_id)
env.seed(seed)
env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
env = atari_wrappers.wrap_deepmind(env)
# TODO check if the second seeding is necessary, and eventually remove
env.seed(seed)
else:
frame_stack_size = 4
env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size)
elif env_type == 'retro':
import retro
gamestate = args.gamestate or retro.State.DEFAULT
env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000,
use_restricted_actions=retro.Actions.DISCRETE)
env.seed(args.seed)
env = bench.Monitor(env, logger.get_dir())
env = retro_wrappers.wrap_deepmind_retro(env)
else:
get_session(tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1))
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)
if env_type == 'mujoco':
env = VecNormalize(env)
return env