本文整理汇总了Python中tensorflow.python.ops.math_ops.truediv函数的典型用法代码示例。如果您正苦于以下问题:Python truediv函数的具体用法?Python truediv怎么用?Python truediv使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了truediv函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _estimate_data_distribution
def _estimate_data_distribution(labels, num_classes, smoothing_constant=10):
"""Estimate data distribution as labels are seen."""
# Variable to track running count of classes. Smooth by a nonzero value to
# avoid division-by-zero. Higher values provide more stability at the cost of
# slower convergence.
if smoothing_constant <= 0:
raise ValueError('smoothing_constant must be nonzero.')
num_examples_per_class_seen = variables.Variable(
initial_value=[smoothing_constant] * num_classes, trainable=False,
name='class_count', dtype=dtypes.int64)
# Update the class-count based on what labels are seen in batch.
num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
math_ops.reduce_sum(array_ops.one_hot(labels, num_classes,
dtype=dtypes.int64), 0))
# Normalize count into a probability.
# NOTE: Without the `+= 0` line below, the test
# `testMultiThreadedEstimateDataDistribution` fails. The reason is that
# before this line, `num_examples_per_class_seen` is a Tensor that shares a
# buffer with an underlying `ref` object. When the `ref` is changed by another
# thread, `num_examples_per_class_seen` changes as well. Since this can happen
# in the middle of the normalization computation, we get probabilities that
# are very far from summing to one. Adding `+= 0` copies the contents of the
# tensor to a new buffer, which will be consistent from the start to the end
# of the normalization computation.
num_examples_per_class_seen += 0
init_prob_estimate = math_ops.truediv(
num_examples_per_class_seen,
math_ops.reduce_sum(num_examples_per_class_seen))
# Must return float32 (not float64) to agree with downstream `_verify_input`
# checks.
return math_ops.cast(init_prob_estimate, dtypes.float32)
示例2: weighted_resample
def weighted_resample(inputs, weights, overall_rate, scope=None,
mean_decay=0.999, warmup=10, seed=None):
"""Performs an approximate weighted resampling of `inputs`.
This method chooses elements from `inputs` where each item's rate of
selection is proportional to its value in `weights`, and the average
rate of selection across all inputs (and many invocations!) is
`overall_rate`.
Args:
inputs: A list of tensors whose first dimension is `batch_size`.
weights: A `[batch_size]`-shaped tensor with each batch member's weight.
overall_rate: Desired overall rate of resampling.
scope: Scope to use for the op.
mean_decay: How quickly to decay the running estimate of the mean weight.
warmup: Until the resulting tensor has been evaluated `warmup`
times, the resampling menthod uses the true mean over all calls
as its weight estimate, rather than a decayed mean.
seed: Random seed.
Returns:
A list of tensors exactly like `inputs`, but with an unknown (and
possibly zero) first dimension.
A tensor containing the effective resampling rate used for each output.
"""
# Algorithm: Just compute rates as weights/mean_weight *
# overall_rate. This way the average weight corresponds to the
# overall rate, and a weight twice the average has twice the rate,
# etc.
with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
# First: Maintain a running estimated mean weight, with decay
# adjusted (by also maintaining an invocation count) during the
# warmup period so that at the beginning, there aren't too many
# zeros mixed in, throwing the average off.
with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
count_so_far = variable_scope.get_local_variable(
'resample_count', initializer=0)
estimated_mean = variable_scope.get_local_variable(
'estimated_mean', initializer=0.0)
count = count_so_far.assign_add(1)
real_decay = math_ops.minimum(
math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
mean_decay)
batch_mean = math_ops.reduce_mean(weights)
mean = moving_averages.assign_moving_average(
estimated_mean, batch_mean, real_decay, zero_debias=False)
# Then, normalize the weights into rates using the mean weight and
# overall target rate:
rates = weights * overall_rate / mean
results = resample_at_rate([rates] + inputs, rates,
scope=opscope, seed=seed, back_prop=False)
return (results[1:], results[0])
示例3: weighted_moving_average
def weighted_moving_average(value,
decay,
weight,
truediv=True,
collections=None,
name=None):
"""Compute the weighted moving average of `value`.
Conceptually, the weighted moving average is:
`moving_average(value * weight) / moving_average(weight)`,
where a moving average updates by the rule
`new_value = decay * old_value + (1 - decay) * update`
Internally, this Op keeps moving average variables of both `value * weight`
and `weight`.
Args:
value: A numeric `Tensor`.
decay: A float `Tensor` or float value. The moving average decay.
weight: `Tensor` that keeps the current value of a weight.
Shape should be able to multiply `value`.
truediv: Boolean, if `True`, dividing by `moving_average(weight)` is
floating point division. If `False`, use division implied by dtypes.
collections: List of graph collections keys to add the internal variables
`value * weight` and `weight` to.
Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
name: Optional name of the returned operation.
Defaults to "WeightedMovingAvg".
Returns:
An Operation that updates and returns the weighted moving average.
"""
# Unlike assign_moving_average, the weighted moving average doesn't modify
# user-visible variables. It is the ratio of two internal variables, which are
# moving averages of the updates. Thus, the signature of this function is
# quite different than assign_moving_average.
if collections is None:
collections = [ops.GraphKeys.GLOBAL_VARIABLES]
with variable_scope.variable_scope(name, "WeightedMovingAvg",
[value, weight, decay]) as scope:
value_x_weight_var = variable_scope.get_variable(
"value_x_weight",
initializer=init_ops.zeros_initializer(value.get_shape(),
dtype=value.dtype),
trainable=False,
collections=collections)
weight_var = variable_scope.get_variable(
"weight",
initializer=init_ops.zeros_initializer(weight.get_shape(),
dtype=weight.dtype),
trainable=False,
collections=collections)
numerator = assign_moving_average(
value_x_weight_var, value * weight, decay, zero_debias=False)
denominator = assign_moving_average(
weight_var, weight, decay, zero_debias=False)
if truediv:
return math_ops.truediv(numerator, denominator, name=scope.name)
else:
return math_ops.div(numerator, denominator, name=scope.name)
示例4: mean
def mean(self):
"""Class means for every batch member."""
with ops.name_scope('mean'):
alpha_sum = math_ops.reduce_sum(self._alpha,
reduction_indices=self._dist_indices,
keep_dims=True)
mean = math_ops.truediv(self._alpha, alpha_sum)
mean.set_shape(self._alpha.get_shape())
return mean
示例5: compute_cdf
def compute_cdf(values, value_range, **kwargs):
"""Returns the normalized cumulative distribution of the given values tensor.
Uses tf.while_loop to directly compute the cdf of the values. Number of bins
for histogram is fixed at _NBINS=255
Args:
values: Numeric `Tensor`.
value_range: Shape [2] `Tensor` of same `dtype` as `values`
**kwargs: keyword arguments: name
Returns:
A 1-D `Tensor` holding normalized cdf of values.
"""
nbins = _NBINS
name = kwargs.get('name', None)
with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
values = ops.convert_to_tensor(values, name='values')
value_range = ops.convert_to_tensor(value_range, name='value_range')
nbins_float = np.float32(nbins)
# Map tensor values that fall within value_range to [0, 1].
scaled_values = math_ops.truediv(
values - value_range[0],
value_range[1] - value_range[0],
name='scaled_values')
# map tensor values within the open interval value_range to {0,.., nbins-1},
# values outside the open interval will be zero or less, or nbins or more.
indices = math_ops.floor(nbins_float * scaled_values, name='indices')
# Clip edge cases (e.g. value = value_range[1]) or "outliers."
indices = math_ops.cast(
clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
cdf = array_ops.zeros(nbins)
i = constant_op.constant(0)
def loop_cond(loop_count, _):
return math_ops.less(loop_count, nbins)
def loop_body(loop_count, cdf):
temp = math_ops.reduce_sum(
math_ops.cast(
math_ops.less_equal(indices, loop_count), dtypes.float32))
cdf = math_ops.add(
cdf,
array_ops.one_hot(
loop_count, depth=_NBINS, on_value=temp, off_value=0.0))
return [loop_count + 1, cdf]
_, cdf = control_flow_ops.while_loop(
loop_cond, loop_body, [i, cdf], maximum_iterations=nbins)
return math_ops.div(cdf, math_ops.reduce_max(cdf))
示例6: _auc_convert_hist_to_auc
def _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins):
"""Convert histograms to auc.
Args:
hist_true_acc: `Tensor` holding accumulated histogram of scores for records
that were `True`.
hist_false_acc: `Tensor` holding accumulated histogram of scores for
records that were `False`.
nbins: Integer number of bins in the histograms.
Returns:
Scalar `Tensor` estimating AUC.
"""
# Note that this follows the "Approximating AUC" section in:
# Efficient AUC learning curve calculation, R. R. Bouckaert,
# AI'06 Proceedings of the 19th Australian joint conference on Artificial
# Intelligence: advances in Artificial Intelligence
# Pages 181-191.
# Note that the above paper has an error, and we need to re-order our bins to
# go from high to low score.
# Normalize histogram so we get fraction in each bin.
normed_hist_true = math_ops.truediv(hist_true_acc,
math_ops.reduce_sum(hist_true_acc))
normed_hist_false = math_ops.truediv(hist_false_acc,
math_ops.reduce_sum(hist_false_acc))
# These become delta x, delta y from the paper.
delta_y_t = array_ops.reverse(normed_hist_true, [True], name='delta_y_t')
delta_x_t = array_ops.reverse(normed_hist_false, [True], name='delta_x_t')
# strict_1d_cumsum requires float32 args.
delta_y_t = math_ops.cast(delta_y_t, dtypes.float32)
delta_x_t = math_ops.cast(delta_x_t, dtypes.float32)
# Trapezoidal integration, \int_0^1 0.5 * (y_t + y_{t-1}) dx_t
y_t = _strict_1d_cumsum(delta_y_t, nbins)
first_trap = delta_x_t[0] * y_t[0] / 2.0
other_traps = delta_x_t[1:] * (y_t[1:] + y_t[:nbins - 1]) / 2.0
return math_ops.add(first_trap, math_ops.reduce_sum(other_traps), name='auc')
示例7: safe_div
def safe_div(numerator, denominator):
"""Divides two tensors element-wise, returning 0 if the denominator is <= 0.
Args:
numerator: A `Tensor`.
denominator: A `Tensor`, with dtype matching `numerator`.
Returns:
0 if `denominator` <= 0, else `numerator` / `denominator`
"""
t = math_ops.truediv(numerator, denominator)
zero = array_ops.zeros_like(t, dtype=denominator.dtype)
condition = math_ops.greater(denominator, zero)
zero = math_ops.cast(zero, t.dtype)
return array_ops.where(condition, t, zero)
示例8: weighted_moving_average
def weighted_moving_average(
value, decay, weight, truediv=True, name="WeightedMovingAvg"):
"""Compute the weighted moving average of `value`.
Conceptually, the weighted moving average is:
moving_average(value * weight) / moving_average(weight),
where a moving average updates by the rule
new_value = decay * old_value + (1 - decay) * update
Args:
value: A tensor.
decay: A float Tensor or float value. The moving average decay.
weight: A tensor that keeps the current value of a weight.
Shape should be able to multiply `value`.
truediv: Boolean, if True, dividing by moving_average(weight) is floating
point division. If False, use division implied by dtypes.
name: Optional name of the returned operation.
Returns:
An Operation that updates the weighted moving average.
"""
# Unlike assign_moving_average, the weighted moving average doesn't modify
# user-visible variables. It is the ratio of two internal variables, which are
# moving averages of the updates. Thus, the signature of this function is
# quite different than assign_moving_average.
with variable_scope.variable_op_scope(
[value, weight, decay], name, name) as scope:
value_variable = variable_scope.get_variable(
"value",
initializer=array_ops.zeros_initializer(
value.get_shape(), dtype=value.dtype),
trainable=False
)
weight_variable = variable_scope.get_variable(
"weight",
initializer=array_ops.zeros_initializer(
weight.get_shape(), dtype=weight.dtype),
trainable=False
)
numerator = assign_moving_average(value_variable, value * weight, decay)
denominator = assign_moving_average(weight_variable, weight, decay)
if truediv:
return math_ops.truediv(numerator, denominator, name=scope.name)
else:
return math_ops.div(numerator, denominator, name=scope.name)
示例9: _MeanGrad
def _MeanGrad(op, grad):
"""Gradient for Mean."""
sum_grad = _SumGrad(op, grad)[0]
input_shape = op.inputs[0]._shape_tuple() # pylint: disable=protected-access
output_shape = op.outputs[0]._shape_tuple() # pylint: disable=protected-access
if (input_shape is not None and output_shape is not None and
None not in input_shape and None not in output_shape):
input_size = np.prod(input_shape)
output_size = np.prod(output_shape)
factor = input_size // max(output_size, 1)
factor = constant_op.constant(factor, dtype=sum_grad.dtype)
else:
input_shape = array_ops.shape(op.inputs[0])
output_shape = array_ops.shape(op.outputs[0])
factor = _safe_shape_div(
math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
示例10: _histogram
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
"""Return histogram of values.
Given the tensor `values`, this operation returns a rank 1 histogram counting
the number of entries in `values` that fell into every bin. The bins are
equal width and determined by the arguments `value_range` and `nbins`.
Args:
values: Numeric `Tensor`.
value_range: Shape [2] `Tensor` of same `dtype` as `values`.
values <= value_range[0] will be mapped to hist[0],
values >= value_range[1] will be mapped to hist[-1].
nbins: Scalar `int32 Tensor`. Number of histogram bins.
dtype: dtype for returned histogram.
name: A name for this operation (defaults to 'histogram').
Returns:
A 1-D `Tensor` holding histogram of values.
"""
with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
values = ops.convert_to_tensor(values, name='values')
values = gen_array_ops.reshape(values, [-1])
value_range = ops.convert_to_tensor(value_range, name='value_range')
nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
nbins_float = math_ops.cast(nbins, values.dtype)
# Map tensor values that fall within value_range to [0, 1].
scaled_values = math_ops.truediv(
values - value_range[0],
value_range[1] - value_range[0],
name='scaled_values')
# map tensor values within the open interval value_range to {0,.., nbins-1},
# values outside the open interval will be zero or less, or nbins or more.
indices = math_ops.floor(nbins_float * scaled_values, name='indices')
# Clip edge cases (e.g. value = value_range[1]) or "outliers."
indices = math_ops.cast(
clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)
return math_ops.unsorted_segment_sum(
array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
示例11: _estimate_data_distribution
def _estimate_data_distribution(c, num_examples_per_class_seen):
"""Estimate data distribution as labels are seen.
Args:
c: The class labels. Type `int32`, shape `[batch_size]`.
num_examples_per_class_seen: A `ResourceVariable` containing counts.
Type `int64`, shape `[num_classes]`.
Returns:
dist: The updated distribution. Type `float32`, shape `[num_classes]`.
"""
num_classes = num_examples_per_class_seen.get_shape()[0].value
# Update the class-count based on what labels are seen in
# batch. But do this asynchronously to avoid performing a
# cross-device round-trip. Just use the cached value.
num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
math_ops.reduce_sum(
array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
init_prob_estimate = math_ops.truediv(
num_examples_per_class_seen,
math_ops.reduce_sum(num_examples_per_class_seen))
return math_ops.cast(init_prob_estimate, dtypes.float32)
示例12: _estimate_data_distribution
def _estimate_data_distribution(c, num_examples_per_class_seen):
"""Estimate data distribution as labels are seen.
Args:
c: The class labels. Type `int32`, shape `[batch_size]`.
num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
containing counts.
Returns:
num_examples_per_lass_seen: Updated counts. Type `int64`, shape
`[num_classes]`.
dist: The updated distribution. Type `float32`, shape `[num_classes]`.
"""
num_classes = num_examples_per_class_seen.get_shape()[0].value
# Update the class-count based on what labels are seen in batch.
num_examples_per_class_seen = math_ops.add(
num_examples_per_class_seen, math_ops.reduce_sum(
array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
init_prob_estimate = math_ops.truediv(
num_examples_per_class_seen,
math_ops.reduce_sum(num_examples_per_class_seen))
dist = math_ops.cast(init_prob_estimate, dtypes.float32)
return num_examples_per_class_seen, dist
示例13: triplet_semihard_loss
def triplet_semihard_loss(labels, embeddings, margin=1.0):
"""Computes the triplet loss with semi-hard negative mining.
The loss encourages the positive distances (between a pair of embeddings with
the same labels) to be smaller than the minimum negative distance among
which are at least greater than the positive distance plus the margin constant
(called semi-hard negative) in the mini-batch. If no such negative exists,
uses the largest negative distance instead.
See: https://arxiv.org/abs/1503.03832.
Args:
labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
multiclass integer labels.
embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
be l2 normalized.
margin: Float, margin term in the loss definition.
Returns:
triplet_loss: tf.float32 scalar.
"""
# Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
lshape = array_ops.shape(labels)
assert lshape.shape == 1
labels = array_ops.reshape(labels, [lshape[0], 1])
# Build pairwise squared distance matrix.
pdist_matrix = pairwise_distance(embeddings, squared=True)
# Build pairwise binary adjacency matrix.
adjacency = math_ops.equal(labels, array_ops.transpose(labels))
# Invert so we can select negatives only.
adjacency_not = math_ops.logical_not(adjacency)
batch_size = array_ops.size(labels)
# Compute the mask.
pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
mask = math_ops.logical_and(
array_ops.tile(adjacency_not, [batch_size, 1]),
math_ops.greater(
pdist_matrix_tile, array_ops.reshape(
array_ops.transpose(pdist_matrix), [-1, 1])))
mask_final = array_ops.reshape(
math_ops.greater(
math_ops.reduce_sum(
math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
0.0), [batch_size, batch_size])
mask_final = array_ops.transpose(mask_final)
adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
mask = math_ops.cast(mask, dtype=dtypes.float32)
# negatives_outside: smallest D_an where D_an > D_ap.
negatives_outside = array_ops.reshape(
masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
negatives_outside = array_ops.transpose(negatives_outside)
# negatives_inside: largest D_an.
negatives_inside = array_ops.tile(
masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
semi_hard_negatives = array_ops.where(
mask_final, negatives_outside, negatives_inside)
loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)
mask_positives = math_ops.cast(
adjacency, dtype=dtypes.float32) - array_ops.diag(
array_ops.ones([batch_size]))
# In lifted-struct, the authors multiply 0.5 for upper triangular
# in semihard, they take all positive pairs except the diagonal.
num_positives = math_ops.reduce_sum(mask_positives)
triplet_loss = math_ops.truediv(
math_ops.reduce_sum(
math_ops.maximum(
math_ops.multiply(loss_mat, mask_positives), 0.0)),
num_positives,
name='triplet_semihard_loss')
return triplet_loss
示例14: histogram_fixed_width
def histogram_fixed_width(values,
value_range,
nbins=100,
dtype=dtypes.int32,
name=None):
"""Return histogram of values.
Given the tensor `values`, this operation returns a rank 1 histogram counting
the number of entries in `values` that fell into every bin. The bins are
equal width and determined by the arguments `value_range` and `nbins`.
Args:
values: Numeric `Tensor`.
value_range: Shape [2] `Tensor`. new_values <= value_range[0] will be
mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
Must be same dtype as new_values.
nbins: Scalar `int32 Tensor`. Number of histogram bins.
dtype: dtype for returned histogram.
name: A name for this operation (defaults to 'histogram_fixed_width').
Returns:
A 1-D `Tensor` holding histogram of values.
Examples:
```python
# Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
nbins = 5
value_range = [0.0, 5.0]
new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
with tf.default_session() as sess:
hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
variables.initialize_all_variables().run()
sess.run(hist) => [2, 1, 1, 0, 2]
```
"""
with ops.op_scope([values, value_range, nbins], name,
'histogram_fixed_width') as scope:
values = ops.convert_to_tensor(values, name='values')
values = array_ops.reshape(values, [-1])
value_range = ops.convert_to_tensor(value_range, name='value_range')
nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
nbins_float = math_ops.to_float(nbins)
# Map tensor values that fall within value_range to [0, 1].
scaled_values = math_ops.truediv(values - value_range[0],
value_range[1] - value_range[0],
name='scaled_values')
# map tensor values within the open interval value_range to {0,.., nbins-1},
# values outside the open interval will be zero or less, or nbins or more.
indices = math_ops.floor(nbins_float * scaled_values, name='indices')
# Clip edge cases (e.g. value = value_range[1]) or "outliers."
indices = math_ops.cast(
clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
# TODO(langmore) This creates an array of ones to add up and place in the
# bins. This is inefficient, so replace when a better Op is available.
return math_ops.unsorted_segment_sum(
array_ops.ones_like(indices, dtype=dtype),
indices,
nbins,
name=scope)
示例15: lifted_struct_loss
def lifted_struct_loss(labels, embeddings, margin=1.0):
"""Computes the lifted structured loss.
The loss encourages the positive distances (between a pair of embeddings
with the same labels) to be smaller than any negative distances (between a
pair of embeddings with different labels) in the mini-batch in a way
that is differentiable with respect to the embedding vectors.
See: https://arxiv.org/abs/1511.06452.
Args:
labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
multiclass integer labels.
embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
be l2 normalized.
margin: Float, margin term in the loss definition.
Returns:
lifted_loss: tf.float32 scalar.
"""
# Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
lshape = array_ops.shape(labels)
assert lshape.shape == 1
labels = array_ops.reshape(labels, [lshape[0], 1])
# Build pairwise squared distance matrix.
pairwise_distances = pairwise_distance(embeddings)
# Build pairwise binary adjacency matrix.
adjacency = math_ops.equal(labels, array_ops.transpose(labels))
# Invert so we can select negatives only.
adjacency_not = math_ops.logical_not(adjacency)
batch_size = array_ops.size(labels)
diff = margin - pairwise_distances
mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
# Safe maximum: Temporarily shift negative distances
# above zero before taking max.
# this is to take the max only among negatives.
row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
row_negative_maximums = math_ops.reduce_max(
math_ops.multiply(diff - row_minimums, mask), 1,
keepdims=True) + row_minimums
# Compute the loss.
# Keep track of matrix of maximums where M_ij = max(m_i, m_j)
# where m_i is the max of alpha - negative D_i's.
# This matches the Caffe loss layer implementation at:
# https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp # pylint: disable=line-too-long
max_elements = math_ops.maximum(
row_negative_maximums, array_ops.transpose(row_negative_maximums))
diff_tiled = array_ops.tile(diff, [batch_size, 1])
mask_tiled = array_ops.tile(mask, [batch_size, 1])
max_elements_vect = array_ops.reshape(
array_ops.transpose(max_elements), [-1, 1])
loss_exp_left = array_ops.reshape(
math_ops.reduce_sum(
math_ops.multiply(
math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
1,
keepdims=True), [batch_size, batch_size])
loss_mat = max_elements + math_ops.log(
loss_exp_left + array_ops.transpose(loss_exp_left))
# Add the positive distance.
loss_mat += pairwise_distances
mask_positives = math_ops.cast(
adjacency, dtype=dtypes.float32) - array_ops.diag(
array_ops.ones([batch_size]))
# *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
num_positives = math_ops.reduce_sum(mask_positives) / 2.0
lifted_loss = math_ops.truediv(
0.25 * math_ops.reduce_sum(
math_ops.square(
math_ops.maximum(
math_ops.multiply(loss_mat, mask_positives), 0.0))),
num_positives,
name='liftedstruct_loss')
return lifted_loss