本文整理汇总了Python中baselines.common.tf_util.flatgrad方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.flatgrad方法的具体用法?Python tf_util.flatgrad怎么用?Python tf_util.flatgrad使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.tf_util
的用法示例。
在下文中一共展示了tf_util.flatgrad方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup_critic_optimizer
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def setup_critic_optimizer(self):
logger.info('setting up critic optimizer')
normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1])
self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
if self.critic_l2_reg > 0.:
critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name]
for var in critic_reg_vars:
logger.info(' regularizing: {}'.format(var.name))
logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg))
critic_reg = tc.layers.apply_regularization(
tc.layers.l2_regularizer(self.critic_l2_reg),
weights_list=critic_reg_vars
)
self.critic_loss += critic_reg
critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
logger.info(' critic shapes: {}'.format(critic_shapes))
logger.info(' critic params: {}'.format(critic_nb_params))
self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars,
beta1=0.9, beta2=0.999, epsilon=1e-08)
示例2: setup_critic_optimizer
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def setup_critic_optimizer(self):
logger.info('setting up critic optimizer')
normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1])
self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
if self.critic_l2_reg > 0.:
critic_reg_vars = [var for var in self.critic.trainable_vars if var.name.endswith('/w:0') and 'output' not in var.name]
for var in critic_reg_vars:
logger.info(' regularizing: {}'.format(var.name))
logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg))
critic_reg = tc.layers.apply_regularization(
tc.layers.l2_regularizer(self.critic_l2_reg),
weights_list=critic_reg_vars
)
self.critic_loss += critic_reg
critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
logger.info(' critic shapes: {}'.format(critic_shapes))
logger.info(' critic params: {}'.format(critic_nb_params))
self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars,
beta1=0.9, beta2=0.999, epsilon=1e-08)
示例3: __init__
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def __init__(self, env, hidden_size, entcoeff=0.001, lr_rate=1e-3, scope="adversary"):
self.scope = scope
self.observation_shape = env.observation_space.shape
self.actions_shape = env.action_space.shape
self.input_shape = tuple([o+a for o, a in zip(self.observation_shape, self.actions_shape)])
self.num_actions = env.action_space.shape[0]
self.hidden_size = hidden_size
self.build_ph()
# Build grpah
generator_logits = self.build_graph(self.generator_obs_ph, self.generator_acs_ph, reuse=False)
expert_logits = self.build_graph(self.expert_obs_ph, self.expert_acs_ph, reuse=True)
# Build accuracy
generator_acc = tf.reduce_mean(tf.to_float(tf.nn.sigmoid(generator_logits) < 0.5))
expert_acc = tf.reduce_mean(tf.to_float(tf.nn.sigmoid(expert_logits) > 0.5))
# Build regression loss
# let x = logits, z = targets.
# z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=generator_logits, labels=tf.zeros_like(generator_logits))
generator_loss = tf.reduce_mean(generator_loss)
expert_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=expert_logits, labels=tf.ones_like(expert_logits))
expert_loss = tf.reduce_mean(expert_loss)
# Build entropy loss
logits = tf.concat([generator_logits, expert_logits], 0)
entropy = tf.reduce_mean(logit_bernoulli_entropy(logits))
entropy_loss = -entcoeff*entropy
# Loss + Accuracy terms
self.losses = [generator_loss, expert_loss, entropy, entropy_loss, generator_acc, expert_acc]
self.loss_name = ["generator_loss", "expert_loss", "entropy", "entropy_loss", "generator_acc", "expert_acc"]
self.total_loss = generator_loss + expert_loss + entropy_loss
# Build Reward for policy
self.reward_op = -tf.log(1-tf.nn.sigmoid(generator_logits)+1e-8)
var_list = self.get_trainable_variables()
self.lossandgrad = U.function([self.generator_obs_ph, self.generator_acs_ph, self.expert_obs_ph, self.expert_acs_ph],
self.losses + [U.flatgrad(self.total_loss, var_list)])
示例4: learn
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4,
adam_epsilon=1e-5, optim_stepsize=3e-4,
ckpt_dir=None, log_dir=None, task_name=None,
verbose=False):
val_per_iter = int(max_iters/10)
ob_space = env.observation_space
ac_space = env.action_space
pi = policy_func("pi", ob_space, ac_space) # Construct network for new policy
# placeholder
ob = U.get_placeholder_cached(name="ob")
ac = pi.pdtype.sample_placeholder([None])
stochastic = U.get_placeholder_cached(name="stochastic")
loss = tf.reduce_mean(tf.square(ac-pi.ac))
var_list = pi.get_trainable_variables()
adam = MpiAdam(var_list, epsilon=adam_epsilon)
lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)])
U.initialize()
adam.sync()
logger.log("Pretraining with Behavior Cloning...")
for iter_so_far in tqdm(range(int(max_iters))):
ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train')
train_loss, g = lossandgrad(ob_expert, ac_expert, True)
adam.update(g, optim_stepsize)
if verbose and iter_so_far % val_per_iter == 0:
ob_expert, ac_expert = dataset.get_next_batch(-1, 'val')
val_loss, _ = lossandgrad(ob_expert, ac_expert, True)
logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss))
if ckpt_dir is None:
savedir_fname = tempfile.TemporaryDirectory().name
else:
savedir_fname = osp.join(ckpt_dir, task_name)
U.save_state(savedir_fname, var_list=pi.get_variables())
return savedir_fname
示例5: test_MpiAdam
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def test_MpiAdam():
np.random.seed(0)
tf.set_random_seed(0)
a = tf.Variable(np.random.randn(3).astype('float32'))
b = tf.Variable(np.random.randn(2,5).astype('float32'))
loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))
stepsize = 1e-2
update_op = tf.train.AdamOptimizer(stepsize).minimize(loss)
do_update = U.function([], loss, updates=[update_op])
tf.get_default_session().run(tf.global_variables_initializer())
for i in range(10):
print(i,do_update())
tf.set_random_seed(0)
tf.get_default_session().run(tf.global_variables_initializer())
var_list = [a,b]
lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)], updates=[update_op])
adam = MpiAdam(var_list)
for i in range(10):
l,g = lossandgrad()
adam.update(g, stepsize)
print(i,l)
示例6: setup_actor_optimizer
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def setup_actor_optimizer(self):
logger.info('setting up actor optimizer')
self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf)
actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars]
actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes])
logger.info(' actor shapes: {}'.format(actor_shapes))
logger.info(' actor params: {}'.format(actor_nb_params))
self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm)
self.actor_optimizer = MpiAdam(var_list=self.actor.trainable_vars,
beta1=0.9, beta2=0.999, epsilon=1e-08)
示例7: test_MpiAdam
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flatgrad [as 别名]
def test_MpiAdam():
np.random.seed(0)
tf.set_random_seed(0)
a = tf.Variable(np.random.randn(3).astype('float32'))
b = tf.Variable(np.random.randn(2,5).astype('float32'))
loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))
stepsize = 1e-2
update_op = tf.train.AdamOptimizer(stepsize).minimize(loss)
do_update = U.function([], loss, updates=[update_op])
tf.get_default_session().run(tf.global_variables_initializer())
for i in range(10):
print(i,do_update())
tf.set_random_seed(0)
tf.get_default_session().run(tf.global_variables_initializer())
var_list = [a,b]
lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)], updates=[update_op])
adam = MpiAdam(var_list)
for i in range(10):
l,g = lossandgrad()
adam.update(g, stepsize)
print(i,l)