本文整理汇总了Python中baselines.common.tf_util.make_session方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.make_session方法的具体用法?Python tf_util.make_session怎么用?Python tf_util.make_session使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.tf_util
的用法示例。
在下文中一共展示了tf_util.make_session方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def train(env_id, num_timesteps, seed):
from baselines.pposgd import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
logger.session().__enter__()
set_global_seeds(seed)
env = gym.make(env_id)
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = bench.Monitor(env, osp.join(logger.get_dir(), "monitor.json"))
env.seed(seed)
gym.logger.setLevel(logging.WARN)
pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_batch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
gamma=0.99, lam=0.95,
)
env.close()
示例2: test_microbatches
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def test_microbatches():
def env_fn():
env = gym.make('CartPole-v0')
env.seed(0)
return env
learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)
env_ref = DummyVecEnv([env_fn])
sess_ref = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_ref)
vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}
env_test = DummyVecEnv([env_fn])
sess_test = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
for v in vars_ref:
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3)
示例3: test_microbatches
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def test_microbatches():
def env_fn():
env = gym.make('CartPole-v0')
env.seed(0)
return env
learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0)
env_ref = DummyVecEnv([env_fn])
sess_ref = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_ref)
vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()}
env_test = DummyVecEnv([env_fn])
sess_test = make_session(make_default=True, graph=tf.Graph())
learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2))
# learn_fn(env=env_test)
vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()}
for v in vars_ref:
np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3)
示例4: train
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def train(env_id, num_timesteps, seed):
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
set_global_seeds(seed)
env = gym.make(env_id)
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = bench.Monitor(env, logger.get_dir())
env.seed(seed)
gym.logger.setLevel(logging.WARN)
pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
gamma=0.99, lam=0.95, schedule='linear',
)
env.close()
示例5: main
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def main():
set_global_seeds(1)
args = parse_args()
with U.make_session(4) as sess: # noqa
_, env = make_env(args.env)
model_parent_path = distdeepq.parent_path(args.model_dir)
old_args = json.load(open(model_parent_path + '/args.json'))
act = distdeepq.build_act(
make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name),
p_dist_func=distdeepq.models.atari_model(),
num_actions=env.action_space.n,
dist_params={'Vmin': old_args['vmin'],
'Vmax': old_args['vmax'],
'nb_atoms': old_args['nb_atoms']})
U.load_state(os.path.join(args.model_dir, "saved"))
wang2015_eval(args.env, act, stochastic=args.stochastic)
示例6: main
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def main(args):
U.make_session(num_cpu=1).__enter__()
set_global_seeds(args.seed)
print('Evaluating {}'.format(args.env))
bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
args.stochastic_policy, False, 'BC')
print('Evaluation for {}'.format(args.env))
print(bc_log)
gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
args.stochastic_policy, True, 'gail')
print('Evaluation for {}'.format(args.env))
print(gail_log)
plot(args.env, bc_log, gail_log, args.stochastic_policy)
示例7: main
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def main(args):
U.make_session(num_cpu=1).__enter__()
set_global_seeds(args.seed)
env = gym.make(args.env_id)
def policy_fn(name, ob_space, ac_space, reuse=False):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2)
env = bench.Monitor(env, logger.get_dir() and
osp.join(logger.get_dir(), "monitor.json"))
env.seed(args.seed)
gym.logger.setLevel(logging.WARN)
task_name = get_task_name(args)
args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
args.log_dir = osp.join(args.log_dir, task_name)
dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation)
savedir_fname = learn(env,
policy_fn,
dataset,
max_iters=args.BC_max_iter,
ckpt_dir=args.checkpoint_dir,
log_dir=args.log_dir,
task_name=task_name,
verbose=True)
avg_len, avg_ret = runner(env,
policy_fn,
savedir_fname,
timesteps_per_batch=1024,
number_trajs=10,
stochastic_policy=args.stochastic_policy,
save=args.save_sample,
reuse=True)
示例8: train
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def train(env_id, num_timesteps, seed):
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
gamma=0.99, lam=0.95, schedule='linear',
)
env.close()
示例9: train
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def train(num_timesteps, seed, model_path=None):
env_id = 'Humanoid-v2'
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
# parameters below were the best found in a simple random search
# these are good enough to make humanoid walk, but whether those are
# an absolute best or not is not certain
env = RewScale(env, 0.1)
pi = pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10,
optim_stepsize=3e-4,
optim_batchsize=64,
gamma=0.99,
lam=0.95,
schedule='linear',
)
env.close()
if model_path:
U.save_state(model_path)
return pi
示例10: test_coexistence
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def test_coexistence(learn_fn, network_fn):
'''
Test if more than one model can exist at a time
'''
if learn_fn == 'deepq':
# TODO enable multiple DQN models to be useable at the same time
# github issue https://github.com/openai/baselines/issues/656
return
if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']:
# TODO make acktr work with recurrent policies
# and test
# github issue: https://github.com/openai/baselines/issues/660
return
env = DummyVecEnv([lambda: gym.make('CartPole-v0')])
learn = get_learn_function(learn_fn)
kwargs = {}
kwargs.update(network_kwargs[network_fn])
kwargs.update(learn_kwargs[learn_fn])
learn = partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs)
make_session(make_default=True, graph=tf.Graph());
model1 = learn(seed=1)
make_session(make_default=True, graph=tf.Graph());
model2 = learn(seed=2)
model1.step(env.observation_space.sample())
model2.step(env.observation_space.sample())
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:33,代码来源:test_serialization.py
示例11: main
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def main():
set_global_seeds(1)
args = parse_args()
with U.make_session(4) as sess: # noqa
_, env = make_env(args.env)
act = deepq.build_act(
make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name),
q_func=dueling_model if args.dueling else model,
num_actions=env.action_space.n)
U.load_state(os.path.join(args.model_dir, "saved"))
wang2015_eval(args.env, act, stochastic=args.stochastic)
示例12: load
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def load(path, num_cpu=16):
with open(path, "rb") as f:
model_data, act_params = dill.load(f)
act = deepq.build_act(**act_params)
sess = U.make_session(num_cpu=num_cpu)
sess.__enter__()
with tempfile.TemporaryDirectory() as td:
arc_path = os.path.join(td, "packed.zip")
with open(arc_path, "wb") as f:
f.write(model_data)
zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
U.load_state(os.path.join(td, "model"))
return ActWrapper(act, act_params)
示例13: train
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def train(num_timesteps, seed, model_path=None):
env_id = 'Humanoid-v2'
from baselines.ppo1 import mlp_policy, pposgd_simple
U.make_session(num_cpu=1).__enter__()
def policy_fn(name, ob_space, ac_space):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
env = make_mujoco_env(env_id, seed)
# parameters below were the best found in a simple random search
# these are good enough to make humanoid walk, but whether those are
# an absolute best or not is not certain
env = RewScale(env, 0.1)
pi = pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
optim_epochs=10,
optim_stepsize=3e-4,
optim_batchsize=64,
gamma=0.99,
lam=0.95,
schedule='linear',
)
env.close()
if model_path:
U.save_state(model_path)
return pi
示例14: test_coexistence
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import make_session [as 别名]
def test_coexistence(learn_fn, network_fn):
'''
Test if more than one model can exist at a time
'''
if learn_fn == 'deepq':
# TODO enable multiple DQN models to be useable at the same time
# github issue https://github.com/openai/baselines/issues/656
return
if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']:
# TODO make acktr work with recurrent policies
# and test
# github issue: https://github.com/openai/baselines/issues/660
return
env = DummyVecEnv([lambda: gym.make('CartPole-v0')])
learn = get_learn_function(learn_fn)
kwargs = {}
kwargs.update(network_kwargs[network_fn])
kwargs.update(learn_kwargs[learn_fn])
learn = partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs)
make_session(make_default=True, graph=tf.Graph())
model1 = learn(seed=1)
make_session(make_default=True, graph=tf.Graph())
model2 = learn(seed=2)
model1.step(env.observation_space.sample())
model2.step(env.observation_space.sample())