本文整理汇总了Python中baselines.common.tf_util.load_variables方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.load_variables方法的具体用法?Python tf_util.load_variables怎么用?Python tf_util.load_variables使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.tf_util
的用法示例。
在下文中一共展示了tf_util.load_variables方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: initialize
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import load_variables [as 别名]
def initialize(self, sess):
self.sess = sess
self.sess.run(tf.global_variables_initializer())
self.save = functools.partial(save_variables, sess=self.sess)
self.load = functools.partial(load_variables, sess=self.load)
self.actor_optimizer.sync()
self.critic_optimizer.sync()
self.sess.run(self.target_init_updates)
示例2: runner
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import load_variables [as 别名]
def runner(env, policy_func, load_model_path, timesteps_per_batch, number_trajs,
stochastic_policy, save=False, reuse=False):
# Setup network
# ----------------------------------------
ob_space = env.observation_space
ac_space = env.action_space
pi = policy_func("pi", ob_space, ac_space, reuse=reuse)
U.initialize()
# Prepare for rollouts
# ----------------------------------------
U.load_variables(load_model_path)
obs_list = []
acs_list = []
len_list = []
ret_list = []
for _ in tqdm(range(number_trajs)):
traj = traj_1_generator(pi, env, timesteps_per_batch, stochastic=stochastic_policy)
obs, acs, ep_len, ep_ret = traj['ob'], traj['ac'], traj['ep_len'], traj['ep_ret']
obs_list.append(obs)
acs_list.append(acs)
len_list.append(ep_len)
ret_list.append(ep_ret)
if stochastic_policy:
print('stochastic policy:')
else:
print('deterministic policy:')
if save:
filename = load_model_path.split('/')[-1] + '.' + env.spec.id
np.savez(filename, obs=np.array(obs_list), acs=np.array(acs_list),
lens=np.array(len_list), rets=np.array(ret_list))
avg_len = sum(len_list)/len(len_list)
avg_ret = sum(ret_list)/len(ret_list)
print("Average length:", avg_len)
print("Average return:", avg_ret)
return avg_len, avg_ret
# Sample one trajectory (until trajectory end)
示例3: __init__
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import load_variables [as 别名]
def __init__(self, policy, env, nsteps,
ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4,
alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'):
sess = tf_util.get_session()
nenvs = env.num_envs
nbatch = nenvs*nsteps
with tf.variable_scope('a2c_model', reuse=tf.AUTO_REUSE):
step_model = policy(nenvs, 1, sess)
train_model = policy(nbatch, nsteps, sess)
A = tf.placeholder(train_model.action.dtype, train_model.action.shape)
ADV = tf.placeholder(tf.float32, [nbatch])
R = tf.placeholder(tf.float32, [nbatch])
LR = tf.placeholder(tf.float32, [])
neglogpac = train_model.pd.neglogp(A)
entropy = tf.reduce_mean(train_model.pd.entropy())
pg_loss = tf.reduce_mean(ADV * neglogpac)
vf_loss = losses.mean_squared_error(tf.squeeze(train_model.vf), R)
loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef
params = find_trainable_variables("a2c_model")
grads = tf.gradients(loss, params)
if max_grad_norm is not None:
grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
grads = list(zip(grads, params))
trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon)
_train = trainer.apply_gradients(grads)
lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)
def train(obs, states, rewards, masks, actions, values):
advs = rewards - values
for step in range(len(obs)):
cur_lr = lr.value()
td_map = {train_model.X:obs, A:actions, ADV:advs, R:rewards, LR:cur_lr}
if states is not None:
td_map[train_model.S] = states
td_map[train_model.M] = masks
policy_loss, value_loss, policy_entropy, _ = sess.run(
[pg_loss, vf_loss, entropy, _train],
td_map
)
return policy_loss, value_loss, policy_entropy
self.train = train
self.train_model = train_model
self.step_model = step_model
self.step = step_model.step
self.value = step_model.value
self.initial_state = step_model.initial_state
self.save = functools.partial(tf_util.save_variables, sess=sess)
self.load = functools.partial(tf_util.load_variables, sess=sess)
tf.global_variables_initializer().run(session=sess)