本文整理汇总了Python中baselines.common.vec_env.dummy_vec_env.DummyVecEnv方法的典型用法代码示例。如果您正苦于以下问题:Python dummy_vec_env.DummyVecEnv方法的具体用法?Python dummy_vec_env.DummyVecEnv怎么用?Python dummy_vec_env.DummyVecEnv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.vec_env.dummy_vec_env
的用法示例。
在下文中一共展示了dummy_vec_env.DummyVecEnv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_microbatches
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def test_microbatches():
def env_fn():
env = gym.make('CartPole-v0')
env.seed(0)
return env
learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)
env_ref = DummyVecEnv([env_fn])
sess_ref = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_ref)
vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}
env_test = DummyVecEnv([env_fn])
sess_test = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
for v in vars_ref:
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3)
示例2: make_vec_env
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, gamestate=None):
"""
Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
"""
if wrapper_kwargs is None: wrapper_kwargs = {}
mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
seed = seed + 10000 * mpi_rank if seed is not None else None
def make_thunk(rank):
return lambda: make_env(
env_id=env_id,
env_type=env_type,
subrank = rank,
seed=seed,
reward_scale=reward_scale,
gamestate=gamestate,
wrapper_kwargs=wrapper_kwargs
)
set_global_seeds(seed)
if num_env > 1:
return SubprocVecEnv([make_thunk(i + start_index) for i in range(num_env)])
else:
return DummyVecEnv([make_thunk(start_index)])
示例3: main
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def main():
"""Run PPO until the environment throws an exception."""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # pylint: disable=E1101
with tf.Session(config=config):
# Take more timesteps than we need to be sure that
# we stop due to an exception.
ppo2.learn(policy=policies.CnnPolicy,
env=DummyVecEnv([make_env]),
nsteps=4096,
nminibatches=8,
lam=0.95,
gamma=0.99,
noptepochs=3,
log_interval=1,
ent_coef=0.01,
lr=lambda _: 2e-4,
cliprange=lambda _: 0.1,
total_timesteps=int(1e7),
load_path='./pretrain_model') # Set to None if no pretrained model
示例4: main
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def main():
"""Run PPO until the environment throws an exception."""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # pylint: disable=E1101
with tf.Session(config=config):
# Take more timesteps than we need to be sure that
# we stop due to an exception.
ppo2.learn(policy=policies.CnnPolicy,
env=DummyVecEnv([make_env]),
nsteps=4096,
nminibatches=8,
lam=0.95,
gamma=0.99,
noptepochs=3,
log_interval=1,
ent_coef=0.001,
lr=lambda _: 2e-4,
cliprange=lambda _: 0.1,
total_timesteps=int(1e7),
load_path='./pretrain_model') # Set to None if no pretrained model
示例5: run
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def run(bsuite_id: str) -> str:
"""Runs a PPO agent on a given bsuite environment, logging to CSV."""
def _load_env():
raw_env = bsuite.load_and_record(
bsuite_id=bsuite_id,
save_path=FLAGS.save_path,
logging_mode=FLAGS.logging_mode,
overwrite=FLAGS.overwrite,
)
if FLAGS.verbose:
raw_env = terminal_logging.wrap_environment(raw_env, log_every=True) # pytype: disable=wrong-arg-types
return gym_wrapper.GymFromDMEnv(raw_env)
env = dummy_vec_env.DummyVecEnv([_load_env])
ppo2.learn(
env=env,
network=FLAGS.network,
lr=FLAGS.learning_rate,
total_timesteps=FLAGS.total_timesteps, # make sure to run enough steps
nsteps=FLAGS.nsteps,
gamma=FLAGS.agent_discount,
)
return bsuite_id
示例6: test_identity
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def test_identity(learn_func):
'''
Test if the algorithm (with a given policy)
can learn an identity transformation (i.e. return observation as an action)
'''
np.random.seed(0)
np_random.seed(0)
random.seed(0)
env = DummyVecEnv([lambda: IdentityEnv(10)])
with tf.Graph().as_default(), tf.Session().as_default():
tf.set_random_seed(0)
model = learn_func(env)
N_TRIALS = 1000
sum_rew = 0
obs = env.reset()
for i in range(N_TRIALS):
obs, rew, done, _ = env.step(model.step(obs)[0])
sum_rew += rew
assert sum_rew > 0.9 * N_TRIALS
示例7: test_microbatches
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def test_microbatches():
def env_fn():
env = gym.make('CartPole-v0')
env.seed(0)
return env
learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)
env_ref = DummyVecEnv([env_fn])
sess_ref = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_ref)
vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}
env_test = DummyVecEnv([env_fn])
sess_test = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
# learn_fn(env=env_test)
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
for v in vars_ref:
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3)
示例8: main
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def main():
"""Run PPO until the environment throws an exception."""
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # pylint: disable=E1101
with tf.Session(config=config):
# Take more timesteps than we need to be sure that
# we stop due to an exception.
ppo2.learn(policy=policies.CnnPolicy,
env=DummyVecEnv([make_env]),
nsteps=4096,
nminibatches=8,
lam=0.95,
gamma=0.99,
noptepochs=3,
log_interval=1,
ent_coef=0.01,
lr=lambda _: 2e-4,
cliprange=lambda _: 0.1,
total_timesteps=int(1e7))
示例9: train
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def train(env_id, num_timesteps, seed, policy, r_ex_coef, r_in_coef, lr_alpha, lr_beta, reward_freq):
from baselines.common import set_global_seeds
from baselines.common.vec_env.vec_normalize import VecNormalize
from baselines.ppo2 import ppo2
from baselines.ppo2.policies import MlpPolicy, MlpPolicyIntrinsicReward
import gym
import tensorflow as tf
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
ncpu = 1
config = tf.ConfigProto(allow_soft_placement=True,
intra_op_parallelism_threads=ncpu,
inter_op_parallelism_threads=ncpu)
config.gpu_options.allow_growth = True
tf.Session(config=config).__enter__()
def make_env():
env = gym.make(env_id)
env = bench.Monitor(env, logger.get_dir())
return env
env = DummyVecEnv([make_env])
env = VecNormalize(env)
set_global_seeds(seed)
if policy == 'mlp':
policy = MlpPolicy
elif policy == 'mlp_int':
policy = MlpPolicyIntrinsicReward
else:
raise NotImplementedError
ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
ent_coef=0.0,
lr_alpha=lr_alpha,
cliprange=0.2,
total_timesteps=num_timesteps,
r_ex_coef=r_ex_coef,
r_in_coef=r_in_coef,
lr_beta=lr_beta,
reward_freq=reward_freq)
示例10: simple_test
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
np.random.seed(0)
np_random.seed(0)
env = DummyVecEnv([env_fn])
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
tf.set_random_seed(0)
model = learn_fn(env)
sum_rew = 0
done = True
for i in range(n_trials):
if done:
obs = env.reset()
state = model.initial_state
if state is not None:
a, v, state, _ = model.step(obs, S=state, M=[False])
else:
a, v, _, _ = model.step(obs)
obs, rew, done, _ = env.step(a)
sum_rew += float(rew)
print("Reward in {} trials is {}".format(n_trials, sum_rew))
assert sum_rew > min_reward_fraction * n_trials, \
'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
示例11: reward_per_episode_test
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
env = DummyVecEnv([env_fn])
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
model = learn_fn(env)
N_TRIALS = 100
observations, actions, rewards = rollout(env, model, N_TRIALS)
rewards = [sum(r) for r in rewards]
avg_rew = sum(rewards) / N_TRIALS
print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
assert avg_rew > min_avg_reward, \
'average reward in {} episodes ({}) is less than {}'.format(n_trials, avg_rew, min_avg_reward)
示例12: test_lstm_example
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def test_lstm_example():
import tensorflow as tf
from baselines.common import policies, models, cmd_util
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
# create vectorized environment
venv = DummyVecEnv([lambda: cmd_util.make_mujoco_env('Reacher-v2', seed=0)])
with tf.Session() as sess:
# build policy based on lstm network with 128 units
policy = policies.build_policy(venv, models.lstm(128))(nbatch=1, nsteps=1)
# initialize tensorflow variables
sess.run(tf.global_variables_initializer())
# prepare environment variables
ob = venv.reset()
state = policy.initial_state
done = [False]
step_counter = 0
# run a single episode until the end (i.e. until done)
while True:
action, _, state, _ = policy.step(ob, S=state, M=done)
ob, reward, done, _ = venv.step(action)
step_counter += 1
if done:
break
assert step_counter > 5
示例13: simple_test
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
np.random.seed(0)
np_random.seed(0)
env = DummyVecEnv([env_fn])
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
tf.set_random_seed(0)
model = learn_fn(env)
sum_rew = 0
done = True
for i in range(n_trials):
if done:
obs = env.reset()
state = model.initial_state
if state is not None:
a, v, state, _ = model.step(obs, S=state, M=[False])
else:
a, v, _, _ = model.step(obs)
obs, rew, done, _ = env.step(a)
sum_rew += float(rew)
print("Reward in {} trials is {}".format(n_trials, sum_rew))
assert sum_rew > min_reward_fraction * n_trials, \
'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
示例14: reward_per_episode_test
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
env = DummyVecEnv([env_fn])
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
model = learn_fn(env)
N_TRIALS = 100
observations, actions, rewards = rollout(env, model, N_TRIALS)
rewards = [sum(r) for r in rewards]
avg_rew = sum(rewards) / N_TRIALS
print("Average reward in {} episodes is {}".format(n_trials, avg_rew))
assert avg_rew > min_avg_reward, \
'average reward in {} episodes ({}) is less than {}'.format(n_trials, avg_rew, min_avg_reward)
示例15: test_coexistence
# 需要导入模块: from baselines.common.vec_env import dummy_vec_env [as 别名]
# 或者: from baselines.common.vec_env.dummy_vec_env import DummyVecEnv [as 别名]
def test_coexistence(learn_fn, network_fn):
'''
Test if more than one model can exist at a time
'''
if learn_fn == 'deepq':
# TODO enable multiple DQN models to be useable at the same time
# github issue https://github.com/openai/baselines/issues/656
return
if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']:
# TODO make acktr work with recurrent policies
# and test
# github issue: https://github.com/openai/baselines/issues/660
return
env = DummyVecEnv([lambda: gym.make('CartPole-v0')])
learn = get_learn_function(learn_fn)
kwargs = {}
kwargs.update(network_kwargs[network_fn])
kwargs.update(learn_kwargs[learn_fn])
learn = partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs)
make_session(make_default=True, graph=tf.Graph());
model1 = learn(seed=1)
make_session(make_default=True, graph=tf.Graph());
model2 = learn(seed=2)
model1.step(env.observation_space.sample())
model2.step(env.observation_space.sample())
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:33,代码来源:test_serialization.py