本文整理汇总了Python中tensorflow.python.ops.nn_ops.log_softmax函数的典型用法代码示例。如果您正苦于以下问题:Python log_softmax函数的具体用法?Python log_softmax怎么用?Python log_softmax使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了log_softmax函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_step
def test_step(self):
dummy_cell_state = array_ops.zeros([self.batch_size, self.beam_width])
beam_state = beam_search_decoder.BeamSearchDecoderState(
cell_state=dummy_cell_state,
log_probs=nn_ops.log_softmax(
array_ops.ones([self.batch_size, self.beam_width])),
lengths=constant_op.constant(
2, shape=[self.batch_size, self.beam_width], dtype=dtypes.int64),
finished=array_ops.zeros(
[self.batch_size, self.beam_width], dtype=dtypes.bool),
accumulated_attention_probs=())
logits_ = np.full([self.batch_size, self.beam_width, self.vocab_size],
0.0001)
logits_[0, 0, 2] = 1.9
logits_[0, 0, 3] = 2.1
logits_[0, 1, 3] = 3.1
logits_[0, 1, 4] = 0.9
logits_[1, 0, 1] = 0.5
logits_[1, 1, 2] = 2.7
logits_[1, 2, 2] = 10.0
logits_[1, 2, 3] = 0.2
logits = ops.convert_to_tensor(logits_, dtype=dtypes.float32)
log_probs = nn_ops.log_softmax(logits)
outputs, next_beam_state = beam_search_decoder._beam_search_step(
time=2,
logits=logits,
next_cell_state=dummy_cell_state,
beam_state=beam_state,
batch_size=ops.convert_to_tensor(self.batch_size),
beam_width=self.beam_width,
end_token=self.end_token,
length_penalty_weight=self.length_penalty_weight,
coverage_penalty_weight=self.coverage_penalty_weight)
with self.cached_session() as sess:
outputs_, next_state_, state_, log_probs_ = sess.run(
[outputs, next_beam_state, beam_state, log_probs])
self.assertAllEqual(outputs_.predicted_ids, [[3, 3, 2], [2, 2, 1]])
self.assertAllEqual(outputs_.parent_ids, [[1, 0, 0], [2, 1, 0]])
self.assertAllEqual(next_state_.lengths, [[3, 3, 3], [3, 3, 3]])
self.assertAllEqual(next_state_.finished,
[[False, False, False], [False, False, False]])
expected_log_probs = []
expected_log_probs.append(state_.log_probs[0][[1, 0, 0]])
expected_log_probs.append(state_.log_probs[1][[2, 1, 0]]) # 0 --> 1
expected_log_probs[0][0] += log_probs_[0, 1, 3]
expected_log_probs[0][1] += log_probs_[0, 0, 3]
expected_log_probs[0][2] += log_probs_[0, 0, 2]
expected_log_probs[1][0] += log_probs_[1, 2, 2]
expected_log_probs[1][1] += log_probs_[1, 1, 2]
expected_log_probs[1][2] += log_probs_[1, 0, 1]
self.assertAllEqual(next_state_.log_probs, expected_log_probs)
示例2: testLogSoftmaxAxes
def testLogSoftmaxAxes(self):
arr = np.linspace(0., 1, 12).reshape(3, 4)
x_neg_axis = nn_ops.log_softmax(arr, axis=-2)
y_pos_axis = nn_ops.log_softmax(arr, axis=0)
z_gt_axis = nn_ops.log_softmax(arr, axis=4)
x_neg_axis_tf = self.evaluate(x_neg_axis)
y_pos_axis_tf = self.evaluate(y_pos_axis)
z_gt_axis_tf = self.evaluate(z_gt_axis)
eps = 1e-3
self.assertAllClose(x_neg_axis_tf, y_pos_axis_tf, eps)
self.assertAllClose(y_pos_axis_tf, z_gt_axis_tf, eps)
示例3: test_step_with_eos
def test_step_with_eos(self):
dummy_cell_state = array_ops.zeros([self.batch_size, self.beam_width])
beam_state = beam_search_decoder.BeamSearchDecoderState(
cell_state=dummy_cell_state,
log_probs=nn_ops.log_softmax(
array_ops.ones([self.batch_size, self.beam_width])),
lengths=ops.convert_to_tensor(
[[2, 1, 2], [2, 2, 1]], dtype=dtypes.int32),
finished=ops.convert_to_tensor(
[[False, True, False], [False, False, True]], dtype=dtypes.bool))
logits_ = np.full([self.batch_size, self.beam_width, self.vocab_size],
0.0001)
logits_[0, 0, 2] = 1.9
logits_[0, 0, 3] = 2.1
logits_[0, 1, 3] = 3.1
logits_[0, 1, 4] = 0.9
logits_[1, 0, 1] = 0.5
logits_[1, 1, 2] = 5.7 # why does this not work when it's 2.7?
logits_[1, 2, 2] = 1.0
logits_[1, 2, 3] = 0.2
logits = ops.convert_to_tensor(logits_, dtype=dtypes.float32)
log_probs = nn_ops.log_softmax(logits)
outputs, next_beam_state = beam_search_decoder._beam_search_step(
time=2,
logits=logits,
beam_state=beam_state,
batch_size=ops.convert_to_tensor(self.batch_size),
beam_width=self.beam_width,
end_token=self.end_token,
length_penalty_weight=self.length_penalty_weight)
with self.test_session() as sess:
outputs_, next_state_, state_, log_probs_ = sess.run(
[outputs, next_beam_state, beam_state, log_probs])
np.testing.assert_array_equal(outputs_.parent_ids, [[1, 0, 0], [1, 2, 0]])
np.testing.assert_array_equal(outputs_.predicted_ids, [[0, 3, 2], [2, 0,
1]])
np.testing.assert_array_equal(next_state_.lengths, [[1, 3, 3], [3, 1, 3]])
np.testing.assert_array_equal(next_state_.finished, [[True, False, False],
[False, True, False]])
expected_log_probs = []
expected_log_probs.append(state_.log_probs[0][[1, 0, 0]])
expected_log_probs.append(state_.log_probs[1][[1, 2, 0]])
expected_log_probs[0][1] += log_probs_[0, 0, 3]
expected_log_probs[0][2] += log_probs_[0, 0, 2]
expected_log_probs[1][0] += log_probs_[1, 1, 2]
expected_log_probs[1][2] += log_probs_[1, 0, 1]
np.testing.assert_array_equal(next_state_.log_probs, expected_log_probs)
示例4: _sample_n
def _sample_n(self, n, seed=None):
sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0)
logits = self.logits * array_ops.ones(sample_shape)
logits_2d = array_ops.reshape(logits, [-1, self.event_size])
np_dtype = self.dtype.as_numpy_dtype
# Uniform variates must be sampled from the interval (0,1] rather than
# [0,1], as they are passed through log() to compute Gumbel variates.
# We need to use np.finfo(np_dtype).tiny because it is the smallest,
# positive, "normal" number. A "normal" number is such that the mantissa
# has an implicit leading 1. Normal, positive numbers x, y have the
# reasonable property that: x + y >= max(x, y).
# minval=np.nextafter(np.float32(0),1)) can cause
# tf.random_uniform(dtype=tf.float32) to sample 0.
uniform = random_ops.random_uniform(shape=array_ops.shape(logits_2d),
minval=np.finfo(np_dtype).tiny,
maxval=1,
dtype=self.dtype,
seed=seed)
gumbel = -math_ops.log(-math_ops.log(uniform))
noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d)
samples = nn_ops.log_softmax(noisy_logits)
ret = array_ops.reshape(samples, sample_shape)
return ret
示例5: testEntropyGradient
def testEntropyGradient(self):
with self.cached_session() as sess:
logits = constant_op.constant([[1., 2., 3.], [2., 5., 1.]])
probabilities = nn_ops.softmax(logits)
log_probabilities = nn_ops.log_softmax(logits)
true_entropy = - math_ops.reduce_sum(
probabilities * log_probabilities, axis=-1)
categorical_distribution = categorical.Categorical(probs=probabilities)
categorical_entropy = categorical_distribution.entropy()
# works
true_entropy_g = gradients_impl.gradients(true_entropy, [logits])
categorical_entropy_g = gradients_impl.gradients(
categorical_entropy, [logits])
res = sess.run({"true_entropy": true_entropy,
"categorical_entropy": categorical_entropy,
"true_entropy_g": true_entropy_g,
"categorical_entropy_g": categorical_entropy_g})
self.assertAllClose(res["true_entropy"],
res["categorical_entropy"])
self.assertAllClose(res["true_entropy_g"],
res["categorical_entropy_g"])
示例6: _kl_divergence
def _kl_divergence(p, p_logits, q):
"""Computes the Kullback-Liebler divergence between p and q.
This function uses p's logits in some places to improve numerical stability.
Specifically:
KL(p || q) = sum[ p * log(p / q) ]
= sum[ p * ( log(p) - log(q) ) ]
= sum[ p * ( log_softmax(p_logits) - log(q) ) ]
Args:
p: A 2-D floating-point Tensor p_ij, where `i` corresponds to the minibatch
example and `j` corresponds to the probability of being in class `j`.
p_logits: A 2-D floating-point Tensor corresponding to logits for `p`.
q: A 1-D floating-point Tensor, where q_j corresponds to the probability
of class `j`.
Returns:
KL divergence between two distributions. Output dimension is 1D, one entry
per distribution in `p`.
Raises:
ValueError: If any of the inputs aren't floating-point.
ValueError: If p or p_logits aren't 2D.
ValueError: If q isn't 1D.
"""
for tensor in [p, p_logits, q]:
if not tensor.dtype.is_floating:
raise ValueError('Input %s must be floating type.', tensor.name)
p.shape.assert_has_rank(2)
p_logits.shape.assert_has_rank(2)
q.shape.assert_has_rank(1)
return math_ops.reduce_sum(
p * (nn_ops.log_softmax(p_logits) - math_ops.log(q)), axis=1)
示例7: _log_cdf
def _log_cdf(self, x):
x = self._pad_sample_dims(x)
log_cdf_x = self.components_distribution.log_cdf(x) # [S, B, k]
log_mix_prob = nn_ops.log_softmax(
self.mixture_distribution.logits, axis=-1) # [B, k]
return math_ops.reduce_logsumexp(
log_cdf_x + log_mix_prob, axis=-1) # [S, B]
示例8: _SoftmaxCrossEntropyWithLogitsGrad
def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad):
"""Gradient function for SoftmaxCrossEntropyWithLogits."""
# grad_loss is the backprop for cost, and we multiply it with the gradients
# (which is output[1])
# grad_grad is the backprop for softmax gradient.
#
# Second derivative is just softmax derivative w.r.t. logits.
softmax_grad = op.outputs[1]
grad = _BroadcastMul(grad_loss, softmax_grad)
def IsZero(g):
# Some introspection to check if the gradient is feeding zeros
if context.executing_eagerly():
# TODO(apassos) add an efficient way to detect eager zeros here.
return False
if g.op.type in ("ZerosLike", "Zeros"):
return True
const_fill_value = tensor_util.constant_value(g)
return const_fill_value is not None and (const_fill_value == 0).all()
logits = op.inputs[0]
if grad_grad is not None and not IsZero(grad_grad):
softmax = nn_ops.softmax(logits)
grad += ((grad_grad - array_ops.squeeze(
math_ops.matmul(
array_ops.expand_dims(grad_grad, 1),
array_ops.expand_dims(softmax, 2)),
axis=1)) * softmax)
return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
示例9: _log_prob
def _log_prob(self, x):
x = self._assert_valid_sample(x)
# broadcast logits or x if need be.
logits = self.logits
if (not x.get_shape().is_fully_defined() or
not logits.get_shape().is_fully_defined() or
x.get_shape() != logits.get_shape()):
logits = array_ops.ones_like(x, dtype=logits.dtype) * logits
x = array_ops.ones_like(logits, dtype=x.dtype) * x
logits_shape = array_ops.shape(math_ops.reduce_sum(logits, axis=[-1]))
logits_2d = array_ops.reshape(logits, [-1, self.event_size])
x_2d = array_ops.reshape(x, [-1, self.event_size])
# compute the normalization constant
k = math_ops.cast(self.event_size, x.dtype)
log_norm_const = (math_ops.lgamma(k)
+ (k - 1.)
* math_ops.log(self.temperature))
# compute the unnormalized density
log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self._temperature_2d)
log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keepdims=False)
# combine unnormalized density with normalization constant
log_prob = log_norm_const + log_unnorm_prob
# Reshapes log_prob to be consistent with shape of user-supplied logits
ret = array_ops.reshape(log_prob, logits_shape)
return ret
示例10: _log_prob
def _log_prob(self, x):
with ops.control_dependencies(self._runtime_assertions):
x = self._pad_sample_dims(x)
log_prob_x = self.components_distribution.log_prob(x) # [S, B, k]
log_mix_prob = nn_ops.log_softmax(
self.mixture_distribution.logits, axis=-1) # [B, k]
return math_ops.reduce_logsumexp(
log_prob_x + log_mix_prob, axis=-1) # [S, B]
示例11: testLogSoftmax
def testLogSoftmax(self):
x_shape = [5, 10]
x_np = np.random.randn(*x_shape).astype(np.float32)
y_np = self._log_softmax(x_np)
x_tf = constant_op.constant(x_np)
y_tf = nn_ops.log_softmax(x_tf)
y_tf_np = self.evaluate(y_tf)
eps = 1e-3
self.assertAllClose(y_tf_np, y_np, eps)
示例12: _kl_categorical_categorical
def _kl_categorical_categorical(a, b, name=None):
"""Calculate the batched KL divergence KL(a || b) with a, b OneHotCategorical.
Args:
a: instance of a OneHotCategorical distribution object.
b: instance of a OneHotCategorical distribution object.
name: (optional) Name to use for created operations.
default is "kl_categorical_categorical".
Returns:
Batchwise KL(a || b)
"""
with ops.name_scope(
name, "kl_categorical_categorical", [a.logits, b.logits]):
# sum(p*ln(p/q))
return math_ops.reduce_sum(
nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits)
- nn_ops.log_softmax(b.logits)), reduction_indices=[-1])
示例13: testGradient
def testGradient(self, x_shape):
x_np = np.random.randn(*x_shape).astype(np.float64)
with self.cached_session():
x_tf = constant_op.constant(x_np)
y_tf = nn_ops.log_softmax(x_tf)
err = gradient_checker.compute_gradient_error(x_tf, x_shape, y_tf,
x_shape)
eps = 1e-7
self.assertLess(err, eps)
示例14: _kl_categorical_categorical
def _kl_categorical_categorical(a, b, name=None):
"""Calculate the batched KL divergence KL(a || b) with a and b Categorical.
Args:
a: instance of a Categorical distribution object.
b: instance of a Categorical distribution object.
name: (optional) Name to use for created operations.
default is "kl_categorical_categorical".
Returns:
Batchwise KL(a || b)
"""
with ops.name_scope(name, "kl_categorical_categorical",
values=[a.logits, b.logits]):
# sum(probs log(probs / (1 - probs)))
delta_log_probs1 = (nn_ops.log_softmax(a.logits) -
nn_ops.log_softmax(b.logits))
return math_ops.reduce_sum(nn_ops.softmax(a.logits) * delta_log_probs1,
axis=-1)
示例15: ctc_loss_and_grad
def ctc_loss_and_grad(logits, labels, label_length, logit_length, unique=None):
"""Computes the CTC loss and gradients.
Most users will want fwd_bwd.ctc_loss
This function returns the computed gradient, it does not have a gradient
of its own defined.
Args:
logits: tensor of shape [frames, batch_size, num_labels]
labels: tensor of shape [batch_size, max_label_seq_length]
label_length: tensor of shape [batch_size]
Length of reference label sequence in labels.
logit_length: tensor of shape [batch_size]
Length of input sequence in logits.
unique: (optional) unique label indices as computed by unique(labels)
If supplied, enables an implementation that is faster and more memory
efficient on TPU.
Returns:
loss: tensor of shape [batch_size]
gradient: tensor of shape [frames, batch_size, num_labels]
"""
num_labels = _get_dim(logits, 2)
max_label_seq_length = _get_dim(labels, 1)
ilabel_log_probs = nn_ops.log_softmax(logits)
state_log_probs = _ilabel_to_state(labels, num_labels, ilabel_log_probs)
state_trans_probs = _ctc_state_trans(labels)
initial_state_log_probs, final_state_log_probs = ctc_state_log_probs(
label_length, max_label_seq_length)
fwd_bwd_log_probs, log_likelihood = _forward_backward_log(
state_trans_log_probs=math_ops.log(state_trans_probs),
initial_state_log_probs=initial_state_log_probs,
final_state_log_probs=final_state_log_probs,
observed_log_probs=state_log_probs,
sequence_length=logit_length)
if unique:
olabel_log_probs = _state_to_olabel_unique(
labels, num_labels, fwd_bwd_log_probs, unique)
else:
olabel_log_probs = _state_to_olabel(labels, num_labels, fwd_bwd_log_probs)
grad = math_ops.exp(ilabel_log_probs) - math_ops.exp(olabel_log_probs)
loss = -log_likelihood
return loss, grad