本文整理汇总了Python中tensorflow.stop_gradient函数的典型用法代码示例。如果您正苦于以下问题:Python stop_gradient函数的具体用法?Python stop_gradient怎么用?Python stop_gradient使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了stop_gradient函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_next_input
def get_next_input(output):
# the next location is computed by the location network
baseline = tf.sigmoid(tf.matmul(output,Wb_h_b) + Bb_h_b)
baselines.append(baseline)
# compute the next location, then impose noise
if eyeCentered:
# add the last sampled glimpse location
# TODO max(-1, min(1, u + N(output, sigma) + prevLoc))
mean_loc = tf.maximum(-1.0, tf.minimum(1.0, tf.matmul(output, Wl_h_l) + sampled_locs[-1] ))
else:
mean_loc = tf.matmul(output, Wl_h_l)
# mean_loc = tf.stop_gradient(mean_loc)
mean_locs.append(mean_loc)
mean_locs_stopGrad.append(tf.stop_gradient(mean_loc))
# add noise
# sample_loc = tf.tanh(mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd))
sample_loc = tf.maximum(-1.0, tf.minimum(1.0, mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd)))
# don't propagate throught the locations
# sample_loc = tf.stop_gradient(sample_loc)
sampled_locs.append(sample_loc)
sampled_locs_stopGrad.append(tf.stop_gradient(sample_loc))
return get_glimpse(sample_loc)
示例2: context_infer
def context_infer(pooled_features):
with tf.variable_scope("fc", reuse=True):
weights = tf.stop_gradient(tf.get_variable("weights"))
# b = tf.stop_gradient(tf.get_variable("biases"))
z = tf.stop_gradient(pooled_features) #Nx64
z = tf.expand_dims(z, -1) # Nx64x1
w = weights # 64x10
w = tf.expand_dims(w, 0) # 1x64x10
mean, variance = tf.nn.moments(w, [1], keep_dims=True) #1x1x10
response = tf.reduce_sum(tf.mul(z, w), 1, keep_dims=True) # Nx1x10
response_vec = tf.mul(response, w) # Nx64x10
response_vec = tf.div(response_vec, variance) # Nx64x10
h = tf.sub(z, response_vec) # Nx64x10
weights_initializer = tf.truncated_normal_initializer(
stddev=FC_WEIGHT_STDDEV)
with tf.variable_scope("context", reuse=True):
context_weights = tf.stop_gradient(tf.get_variable("weights"))
biases = tf.stop_gradient(tf.get_variable("biases"))
context_weights = tf.expand_dims(context_weights, 0)
biases = tf.expand_dims(biases, 0)
scores = tf.reduce_sum(tf.mul(h, context_weights), 1) + biases
# TODO how to deal with b?
return scores
示例3: get_dynamic_rebar_gradient
def get_dynamic_rebar_gradient(self):
"""Get the dynamic rebar gradient (t, eta optimized)."""
tiled_pre_temperature = tf.tile([self.pre_temperature_variable],
[self.batch_size])
temperature = tf.exp(tiled_pre_temperature)
hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
if self.hparams.quadratic:
gumbel_cv, extra = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature)
else:
gumbel_cv, extra = self._create_gumbel_control_variate(logQHard, temperature=temperature)
f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))
eta = {}
h_grads, eta_statistics = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
eta)
model_grads = U.add_grads_and_vars(f_grads, h_grads)
total_grads = model_grads
# Construct the variance objective
g = U.vectorize(model_grads, set_none_to_zero=True)
self.maintain_ema_ops.append(self.ema.apply([g]))
gbar = 0 #tf.stop_gradient(self.ema.average(g))
variance_objective = tf.reduce_mean(tf.square(g - gbar))
reinf_g_t = 0
if self.hparams.quadratic:
for layer in xrange(self.hparams.n_layer):
gumbel_learning_signal, _ = extra[layer]
df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
reinf_g_t_i, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])),
eta)
reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True)
reparam = tf.add_n([reparam_i for _, reparam_i in extra])
else:
gumbel_learning_signal, reparam = extra
df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
reinf_g_t, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))),
eta)
reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True)
reparam_g, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)),
eta)
reparam_g = U.vectorize(reparam_g, set_none_to_zero=True)
reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0]
variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t
debug = { 'ELBO': hardELBO,
'etas': eta_statistics,
'variance_objective': variance_objective,
}
return total_grads, debug, variance_objective, variance_objective_grad
示例4: build_loss
def build_loss(self):
"""
Loss function to minimize, whose gradient is a stochastic
gradient inspired by adaptive importance sampling.
loss = E_{p(z | x)} [ log p(z | x) - log q(z; lambda) ]
is equivalent to minimizing
E_{p(z | x)} [ log p(x, z) - log q(z; lambda) ]
\approx 1/B sum_{b=1}^B
w_norm(z^b; lambda) (log p(x, z^b) - log q(z^b; lambda))
with gradient
\approx - 1/B sum_{b=1}^B
w_norm(z^b; lambda) grad_{lambda} log q(z^b; lambda)
where + z^b ~ q(z^b; lambda)
+ w_norm(z^b; lambda) = w(z^b; lambda) / sum_{b=1}^B w(z^b; lambda)
+ w(z^b; lambda) = p(x, z^b) / q(z^b; lambda)
"""
x = self.data.sample(self.n_data)
z, self.samples = self.variational.sample(self.n_minibatch)
q_log_prob = tf.zeros([self.n_minibatch], dtype=tf.float32)
for i in range(self.variational.num_factors):
q_log_prob += self.variational.log_prob_i(i, tf.stop_gradient(z))
# normalized importance weights
log_w = self.model.log_prob(x, z) - q_log_prob
log_w_norm = log_w - log_sum_exp(log_w)
w_norm = tf.exp(log_w_norm)
self.loss = tf.reduce_mean(w_norm * log_w)
return -tf.reduce_mean(q_log_prob * tf.stop_gradient(w_norm))
示例5: energy
def energy(self, visible_state, hidden_state, scope='energy'):
with tf.variable_scope(scope):
visible_state = tf.stop_gradient(visible_state, name="visible_state")
hidden_state = tf.stop_gradient(hidden_state, name="hidden_state")
energy = -tf.reduce_mean(tf.reduce_sum(tf.multiply(tf.matmul(visible_state, self.W, name='visible_weights'),
hidden_state, name='weights_hidden')
, axis=1, name='energy_sum'), name="batch_energy_mean")
if self.visible.use_bias:
if self.visible.binary:
energy = tf.add(energy, -tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.visible.bias, visible_state, name='visible_bias_energy'), axis=1)))
else:
v = visible_state - self.visible.bias
energy = tf.add(energy, tf.reduce_mean(tf.reduce_sum(tf.multiply(v, v) / 2, axis=1)))
if self.hidden.use_bias:
if self.hidden.binary:
energy = tf.add(energy, -tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.hidden.bias, hidden_state, name='hidden_bias_energy'), axis=1)))
else:
h = hidden_state - self.hidden.bias
energy = tf.add(energy, tf.reduce_mean(tf.reduce_sum(tf.multiply(h, h) / 2, axis=1)))
return energy
示例6: target_critic_net
def target_critic_net(self, states, actions, for_critic_loss=False):
"""Returns the output of the target critic network.
The target network is used to compute stable targets for training.
Args:
states: A [batch_size, num_state_dims] tensor representing a batch
of states.
actions: A [batch_size, num_action_dims] tensor representing a batch
of actions.
Returns:
q values: A [batch_size] tensor of q values.
Raises:
ValueError: If `states` or `actions' do not have the expected dimensions.
"""
self._validate_states(states)
self._validate_actions(actions)
values1 = tf.stop_gradient(
self._target_critic_net(states, actions,
for_critic_loss=for_critic_loss))
values2 = tf.stop_gradient(
self._target_critic_net2(states, actions,
for_critic_loss=for_critic_loss))
if for_critic_loss:
return values1, values2
return values1
示例7: virtual_adversarial_loss_bidir
def virtual_adversarial_loss_bidir(logits, embedded, inputs,
logits_from_embedding_fn):
"""Virtual adversarial loss for bidirectional models."""
logits = tf.stop_gradient(logits)
f_inputs, _ = inputs
weights = f_inputs.eos_weights
if FLAGS.single_label:
indices = tf.stack([tf.range(FLAGS.batch_size), f_inputs.length - 1], 1)
weights = tf.expand_dims(tf.gather_nd(f_inputs.eos_weights, indices), 1)
assert weights is not None
perturbs = [
_mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
for emb in embedded
]
for _ in xrange(FLAGS.num_power_iteration):
perturbs = [
_scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
]
d_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
kl = _kl_divergence_with_logits(logits, d_logits, weights)
perturbs = tf.gradients(
kl,
perturbs,
aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
perturbs = [tf.stop_gradient(d) for d in perturbs]
perturbs = [_scale_l2(d, FLAGS.perturb_norm_length) for d in perturbs]
vadv_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
return _kl_divergence_with_logits(logits, vadv_logits, weights)
示例8: latent_prediction_model
def latent_prediction_model(inputs,
ed_attention_bias,
latents_discrete,
latents_dense,
hparams,
name="latent_prediction"):
"""Transformer-based latent prediction model.
It is an autoregressive decoder over latents_discrete given inputs.
Args:
inputs: Tensor of shape [batch, length_kv, hparams.hidden_size]. Inputs to
attend to for the decoder on latents.
ed_attention_bias: Tensor which broadcasts with shape [batch,
hparams.num_heads, length_q, length_kv]. Encoder-decoder attention bias.
latents_discrete: Tensor of shape [batch, length_q, vocab_size].
One-hot latents to compute log-probability of given inputs.
latents_dense: Tensor of shape [batch, length_q, hparams.hidden_size].
hparams: tf.contrib.training.HParams.
name: string, variable scope.
Returns:
latents_pred: Tensor of shape [batch, length_q, hparams.hidden_size].
latents_pred_loss: Tensor of shape [batch, length_q].
"""
with tf.variable_scope(name):
if hparams.mode != tf.estimator.ModeKeys.PREDICT:
latents_pred = transformer_latent_decoder(
tf.stop_gradient(latents_dense), inputs, ed_attention_bias,
hparams, name)
_, latent_pred_loss = ae_latent_softmax(
latents_pred, tf.stop_gradient(latents_discrete), hparams)
return latents_pred, latent_pred_loss
示例9: build_graph
def build_graph(self, state, action, futurereward, action_prob):
logits, value = self._get_NN_prediction(state)
value = tf.squeeze(value, [1], name='pred_value') # (B,)
policy = tf.nn.softmax(logits, name='policy')
is_training = get_current_tower_context().is_training
if not is_training:
return
log_probs = tf.log(policy + 1e-6)
log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(value, name='predict_reward')
advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage,
cost, tf.reduce_mean(importance, name='importance'))
return cost
示例10: _create_gumbel_control_variate_quadratic
def _create_gumbel_control_variate_quadratic(self, logQHard, temperature=None):
'''Calculate gumbel control variate.
'''
if temperature is None:
temperature = self.hparams.temperature
h = 0
extra = []
for layer in xrange(self.hparams.n_layer):
logQ, softSamples = self._recognition_network(sampler=functools.partial(
self._random_sample_switch, switch_layer=layer, temperature=temperature))
softELBO, _ = self._generator_network(softSamples, logQ)
# Generate the softELBO_v (should be the same value but different grads)
logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
self._random_sample_switch_v, switch_layer=layer, temperature=temperature))
softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
# Compute losses
learning_signal = tf.stop_gradient(softELBO_v)
# Control variate
h += (tf.stop_gradient(learning_signal) * logQHard[layer]
- softELBO + softELBO_v)
extra.append((softELBO_v, -softELBO + softELBO_v))
return h, extra
示例11: _step
def _step(self, J, voltage, refractory, dt):
delta_t = tf.clip_by_value(dt - refractory, self.zero, dt)
dV = (voltage - J) * tf.expm1(-delta_t / self.tau_rc)
voltage += dV
spiked = voltage > self.one
spikes = tf.cast(spiked, J.dtype) * self.alpha
partial_ref = -self.tau_rc * tf.log1p((self.one - voltage) /
(J - self.one))
# FastLIF version (linearly approximate spike time when calculating
# remaining refractory period)
# partial_ref = signals.dt * (voltage - self.one) / dV
refractory = tf.where(spiked, self.tau_ref - partial_ref,
refractory - dt)
voltage = tf.where(spiked, self.zeros,
tf.maximum(voltage, self.min_voltage))
# we use stop_gradient to avoid propagating any nans (those get
# propagated through the cond even if the spiking version isn't
# being used at all)
return (tf.stop_gradient(spikes), tf.stop_gradient(voltage),
tf.stop_gradient(refractory))
示例12: virtual_adversarial_loss_bidir
def virtual_adversarial_loss_bidir(logits, embedded, inputs,
logits_from_embedding_fn):
"""Virtual adversarial loss for bidirectional models."""
logits = tf.stop_gradient(logits)
f_inputs, _ = inputs
weights = _end_of_seq_mask(f_inputs.labels)
perturbs = [
_mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
for emb in embedded
]
for _ in xrange(FLAGS.num_power_iteration):
perturbs = [
_scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
]
d_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
kl = _kl_divergence_with_logits(logits, d_logits, weights)
perturbs = tf.gradients(
kl,
perturbs,
aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
perturbs = [tf.stop_gradient(d) for d in perturbs]
perturbs = [
_scale_l2(_mask_by_length(d, f_inputs.length), FLAGS.perturb_norm_length)
for d in perturbs
]
vadv_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
return _kl_divergence_with_logits(logits, vadv_logits, weights)
示例13: self_kl
def self_kl(self, logits,
sampling_dim, act_dim, act_type):
"""Calculate KL of distribution with itself.
Used layer only for the gradients.
"""
if self.env_spec.is_discrete(act_type):
probs = tf.nn.softmax(logits)
log_probs = tf.nn.log_softmax(logits)
self_kl = tf.reduce_sum(
tf.stop_gradient(probs) *
(tf.stop_gradient(log_probs) - log_probs), -1)
elif self.env_spec.is_box(act_type):
means = logits[:, :sampling_dim / 2]
std = logits[:, sampling_dim / 2:]
my_means = tf.stop_gradient(means)
my_std = tf.stop_gradient(std)
self_kl = tf.reduce_sum(
tf.log(std / my_std) +
(tf.square(my_std) + tf.square(my_means - means)) /
(2.0 * tf.square(std)) - 0.5,
-1)
else:
assert False
return self_kl
示例14: _logits_cumulative
def _logits_cumulative(self, inputs, stop_gradient):
"""Evaluate logits of the cumulative densities.
Arguments:
inputs: The values at which to evaluate the cumulative densities, expected
to be a `Tensor` of shape `(channels, 1, batch)`.
stop_gradient: Boolean. Whether to add `tf.stop_gradient` calls so
that the gradient of the output with respect to the density model
parameters is disconnected (the gradient with respect to `inputs` is
left untouched).
Returns:
A `Tensor` of the same shape as `inputs`, containing the logits of the
cumulative densities evaluated at the given inputs.
"""
logits = inputs
for i in range(len(self.filters) + 1):
matrix = self._matrices[i]
if stop_gradient:
matrix = tf.stop_gradient(matrix)
logits = tf.linalg.matmul(matrix, logits)
bias = self._biases[i]
if stop_gradient:
bias = tf.stop_gradient(bias)
logits += bias
if i < len(self._factors):
factor = self._factors[i]
if stop_gradient:
factor = tf.stop_gradient(factor)
logits += factor * tf.math.tanh(logits)
return logits
示例15: _create_gumbel_control_variate
def _create_gumbel_control_variate(self, logQHard, temperature=None):
'''Calculate gumbel control variate.
'''
if temperature is None:
temperature = self.hparams.temperature
logQ, softSamples = self._recognition_network(sampler=functools.partial(
self._random_sample_soft, temperature=temperature))
softELBO, _ = self._generator_network(softSamples, logQ)
logQ = tf.add_n(logQ)
# Generate the softELBO_v (should be the same value but different grads)
logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
self._random_sample_soft_v, temperature=temperature))
softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
logQ_v = tf.add_n(logQ_v)
# Compute losses
learning_signal = tf.stop_gradient(softELBO_v)
# Control variate
h = (tf.stop_gradient(learning_signal) * tf.add_n(logQHard)
- softELBO + softELBO_v)
extra = (softELBO_v, -softELBO + softELBO_v)
return h, extra