本文整理汇总了Python中tensorflow.python.ops.math_ops.minimum函数的典型用法代码示例。如果您正苦于以下问题:Python minimum函数的具体用法?Python minimum怎么用?Python minimum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了minimum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: weighted_resample
def weighted_resample(inputs, weights, overall_rate, scope=None,
mean_decay=0.999, warmup=10, seed=None):
"""Performs an approximate weighted resampling of `inputs`.
This method chooses elements from `inputs` where each item's rate of
selection is proportional to its value in `weights`, and the average
rate of selection across all inputs (and many invocations!) is
`overall_rate`.
Args:
inputs: A list of tensors whose first dimension is `batch_size`.
weights: A `[batch_size]`-shaped tensor with each batch member's weight.
overall_rate: Desired overall rate of resampling.
scope: Scope to use for the op.
mean_decay: How quickly to decay the running estimate of the mean weight.
warmup: Until the resulting tensor has been evaluated `warmup`
times, the resampling menthod uses the true mean over all calls
as its weight estimate, rather than a decayed mean.
seed: Random seed.
Returns:
A list of tensors exactly like `inputs`, but with an unknown (and
possibly zero) first dimension.
A tensor containing the effective resampling rate used for each output.
"""
# Algorithm: Just compute rates as weights/mean_weight *
# overall_rate. This way the average weight corresponds to the
# overall rate, and a weight twice the average has twice the rate,
# etc.
with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
# First: Maintain a running estimated mean weight, with decay
# adjusted (by also maintaining an invocation count) during the
# warmup period so that at the beginning, there aren't too many
# zeros mixed in, throwing the average off.
with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
count_so_far = variable_scope.get_local_variable(
'resample_count', initializer=0)
estimated_mean = variable_scope.get_local_variable(
'estimated_mean', initializer=0.0)
count = count_so_far.assign_add(1)
real_decay = math_ops.minimum(
math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
mean_decay)
batch_mean = math_ops.reduce_mean(weights)
mean = moving_averages.assign_moving_average(
estimated_mean, batch_mean, real_decay, zero_debias=False)
# Then, normalize the weights into rates using the mean weight and
# overall target rate:
rates = weights * overall_rate / mean
results = resample_at_rate([rates] + inputs, rates,
scope=opscope, seed=seed, back_prop=False)
return (results[1:], results[0])
示例2: _renorm_correction_and_moments
def _renorm_correction_and_moments(self, mean, variance, training):
"""Returns the correction and update values for renorm."""
stddev = math_ops.sqrt(variance + self.epsilon)
# Compute the average mean and standard deviation, as if they were
# initialized with this batch's moments.
mixed_renorm_mean = (self.renorm_mean +
(1. - self.renorm_mean_weight) * mean)
mixed_renorm_stddev = (self.renorm_stddev +
(1. - self.renorm_stddev_weight) * stddev)
# Compute the corrections for batch renorm.
r = stddev / mixed_renorm_stddev
d = (mean - mixed_renorm_mean) / mixed_renorm_stddev
# Ensure the corrections use pre-update moving averages.
with ops.control_dependencies([r, d]):
mean = array_ops.identity(mean)
stddev = array_ops.identity(stddev)
rmin, rmax, dmax = [self.renorm_clipping.get(key)
for key in ['rmin', 'rmax', 'dmax']]
if rmin is not None:
r = math_ops.maximum(r, rmin)
if rmax is not None:
r = math_ops.minimum(r, rmax)
if dmax is not None:
d = math_ops.maximum(d, -dmax)
d = math_ops.minimum(d, dmax)
# When not training, use r=1, d=0, and decay=1 meaning no updates.
r = _smart_select(training, lambda: r, lambda: array_ops.ones_like(r))
d = _smart_select(training, lambda: d, lambda: array_ops.zeros_like(d))
decay = _smart_select(training, lambda: self.renorm_momentum, lambda: 1.)
def _update_renorm_variable(var, weight, value):
"""Updates a moving average and weight, returns the unbiased value."""
# Update the variables without zero debiasing. The debiasing will be
# accomplished by dividing the exponential moving average by the weight.
# For example, after a single update, the moving average would be
# (1-decay) * value. and the weight will be 1-decay, with their ratio
# giving value.
# Make sure the weight is not updated until before r and d computation.
value = array_ops.identity(value)
with ops.control_dependencies([value]):
weight_value = array_ops.constant(1., dtype=weight.dtype)
new_var = moving_averages.assign_moving_average(
var, value, decay, zero_debias=False)
new_weight = moving_averages.assign_moving_average(
weight, weight_value, decay, zero_debias=False)
return new_var / new_weight
with ops.colocate_with(self.moving_mean):
new_mean = _update_renorm_variable(self.renorm_mean,
self.renorm_mean_weight,
mean)
with ops.colocate_with(self.moving_variance):
new_stddev = _update_renorm_variable(self.renorm_stddev,
self.renorm_stddev_weight,
stddev)
# Make sqrt(moving_variance + epsilon) = new_stddev.
new_variance = math_ops.square(new_stddev) - self.epsilon
return (r, d, new_mean, new_variance)
示例3: huber_loss
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
loss_collection=ops.GraphKeys.LOSSES,
reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
"""Adds a Huber Loss term to the training procedure.
For each value x in `error=labels-predictions`, the following is calculated:
```
0.5 * x^2 if |x| <= d
0.5 * d^2 + d * (|x| - d) if |x| > d
```
where d is `delta`.
See: https://en.wikipedia.org/wiki/Huber_loss
`weights` acts as a coefficient for the loss. If a scalar is provided, then
the loss is simply scaled by the given value. If `weights` is a tensor of size
[batch_size], then the total loss for each sample of the batch is rescaled
by the corresponding element in the `weights` vector. If the shape of
`weights` matches the shape of `predictions`, then the loss of each
measurable element of `predictions` is scaled by the corresponding value of
`weights`.
Args:
labels: The ground truth output tensor, same dimensions as 'predictions'.
predictions: The predicted outputs.
weights: Optional `Tensor` whose rank is either 0, or the same rank as
`labels`, and must be broadcastable to `labels` (i.e., all dimensions must
be either `1`, or the same as the corresponding `losses` dimension).
delta: `float`, the point where the huber loss function
changes from a quadratic to linear.
scope: The scope for the operations performed in computing the loss.
loss_collection: collection to which the loss will be added.
reduction: Type of reduction to apply to loss.
Returns:
A scalar `Tensor` that returns the weighted loss.
Raises:
ValueError: If the shape of `predictions` doesn't match that of `labels` or
if the shape of `weights` is invalid.
"""
with ops.name_scope(scope, "huber_loss",
(predictions, labels, weights)) as scope:
predictions = math_ops.to_float(predictions)
labels = math_ops.to_float(labels)
predictions.get_shape().assert_is_compatible_with(labels.get_shape())
error = math_ops.subtract(predictions, labels)
abs_error = math_ops.abs(error)
quadratic = math_ops.minimum(abs_error, delta)
# The following expression is the same in value as
# tf.maximum(abs_error - delta, 0), but importantly the gradient for the
# expression when abs_error == delta is 0 (for tf.maximum it would be 1).
# This is necessary to avoid doubling the gradient, since there is already a
# nonzero contribution to the gradient from the quadratic term.
linear = (abs_error - quadratic)
losses = 0.5 * quadratic**2 + delta * linear
return compute_weighted_loss(
losses, weights, scope, loss_collection, reduction=reduction)
示例4: saturate_cast
def saturate_cast(image, dtype):
"""Performs a safe cast of image data to `dtype`.
This function casts the data in image to `dtype`, without applying any
scaling. If there is a danger that image data would over or underflow in the
cast, this op applies the appropriate clamping before the cast.
Args:
image: An image to cast to a different data type.
dtype: A `DType` to cast `image` to.
Returns:
`image`, safely cast to `dtype`.
"""
clamped = image
# When casting to a type with smaller representable range, clamp.
# Note that this covers casting to unsigned types as well.
if image.dtype.min < dtype.min and image.dtype.max > dtype.max:
clamped = clip_ops.clip_by_value(clamped,
math_ops.cast(dtype.min, image.dtype),
math_ops.cast(dtype.max, image.dtype))
elif image.dtype.min < dtype.min:
clamped = math_ops.maximum(clamped, math_ops.cast(dtype.min, image.dtype))
elif image.dtype.max > dtype.max:
clamped = math_ops.minimum(clamped, math_ops.cast(dtype.max, image.dtype))
return math_ops.cast(clamped, dtype)
示例5: __call__
def __call__(self, shape, dtype=None, partition_info=None):
if dtype is None:
dtype = self.dtype
# Check the shape
if len(shape) < 2:
raise ValueError("The tensor to initialize must be "
"at least two-dimensional")
# Flatten the input shape with the last dimension remaining
# its original shape so it works for conv2d
num_rows = 1
for dim in shape[:-1]:
num_rows *= dim
num_cols = shape[-1]
flat_shape = (num_rows, num_cols)
# Generate a random matrix
a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
# Compute the qr factorization
q, r = linalg_ops.qr(a, full_matrices=False)
# Make Q uniform
square_len = math_ops.minimum(num_rows, num_cols)
d = array_ops.diag_part(r[:square_len, :square_len])
ph = d / math_ops.abs(d)
q *= ph
# Pad zeros to Q (if rows smaller than cols)
if num_rows < num_cols:
padding = array_ops.zeros([num_rows, num_cols - num_rows], dtype=dtype)
q = array_ops.concat([q, padding], 1)
return self.gain * array_ops.reshape(q, shape)
示例6: _adaptive_max_norm
def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name):
"""Find max_norm given norm and previous average."""
with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]):
log_norm = math_ops.log(norm + epsilon)
def moving_average(name, value, decay):
moving_average_variable = vs.get_variable(
name,
shape=value.get_shape(),
dtype=value.dtype,
initializer=init_ops.zeros_initializer(),
trainable=False)
return moving_averages.assign_moving_average(
moving_average_variable, value, decay, zero_debias=False)
# quicker adaptation at the beginning
if global_step is not None:
n = math_ops.to_float(global_step)
decay = math_ops.minimum(decay, n / (n + 1.))
# update averages
mean = moving_average("mean", log_norm, decay)
sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay)
variance = sq_mean - math_ops.square(mean)
std = math_ops.sqrt(math_ops.maximum(epsilon, variance))
max_norms = math_ops.exp(mean + std_factor * std)
return max_norms, mean
示例7: clip_by_norm
def clip_by_norm(t, clip_norm, name=None):
"""Clips tensor values to a maximum L2-norm.
Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
normalizes `t` so that its L2-norm is less than or equal to `clip_norm'.
Specifically, if the L2-norm is already less than or equal to `clip_norm`,
then `t` is not modified. If the L2-norm is greater than `clip_norm`, then
this operation returns a tensor of the same type and shape as `t` with its
values set to:
`t * clip_norm / l2norm(t)`
In this case, the L2-norm of the output tensor is `clip_norm`.
This operation is typically used to clip gradients before applying them with
an optimizer.
Args:
t: A `Tensor`.
clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
name: A name for the operation (optional).
Returns:
A clipped `Tensor`.
"""
with ops.op_scope([t, clip_norm], name, "clip_by_norm") as name:
t = ops.convert_to_tensor(t, name="t")
# Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
l2norm_inv = math_ops.rsqrt(
math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
tclip = array_ops.identity(t * clip_norm * math_ops.minimum(
l2norm_inv, constant_op.constant(1.0 / clip_norm)), name=name)
return tclip
示例8: _update_clip_coeff
def _update_clip_coeff(self, grads_and_vars, precon_grads_and_vars):
"""Computes the scale factor for the update to satisfy the norm constraint.
Defined as min(1, sqrt(c / r^T F r)), where c is the norm constraint,
F is the approximate Fisher matrix, and r is the update vector, i.e.
-alpha * v, where alpha is the learning rate, and v is the preconditioned
gradient.
This is based on Section 5 of Ba et al., Distributed Second-Order
Optimization using Kronecker-Factored Approximations. Note that they
absorb the learning rate alpha (which they denote eta_max) into the formula
for the coefficient, while in our implementation, the rescaling is done
before multiplying by alpha. Hence, our formula differs from theirs by a
factor of alpha.
Args:
grads_and_vars: List of (gradient, variable) pairs.
precon_grads_and_vars: List of (preconditioned gradient, variable) pairs.
Must be the result of calling `self._fisher_est.multiply_inverse`
on `grads_and_vars`.
Returns:
Scalar representing the coefficient which should be applied to the
preconditioned gradients to satisfy the norm constraint.
"""
sq_norm_grad = self._squared_fisher_norm(grads_and_vars,
precon_grads_and_vars)
sq_norm_up = sq_norm_grad * self._learning_rate**2
return math_ops.minimum(1.,
math_ops.sqrt(self._norm_constraint / sq_norm_up))
示例9: gradient_clipping
def gradient_clipping(grads_and_vars):
"""Internal function for adaptive clipping."""
grads, variables = zip(*grads_and_vars)
norm = clip_ops.global_norm(grads)
max_norm, log_mean = _adaptive_max_norm(norm, std_factor, decay,
global_step, epsilon, name)
# reports the max gradient norm for debugging
if report_summary:
summary.scalar("global_norm/adaptive_max_gradient_norm", max_norm)
# factor will be 1. if norm is smaller than max_norm
factor = array_ops.where(norm < max_norm,
array_ops.ones_like(norm),
math_ops.exp(log_mean) / norm)
if static_max_norm is not None:
factor = math_ops.minimum(static_max_norm / norm, factor)
# apply factor
clipped_grads = []
for grad in grads:
if grad is None:
clipped_grads.append(None)
elif isinstance(grad, ops.IndexedSlices):
clipped_grads.append(
ops.IndexedSlices(grad.values * factor, grad.indices,
grad.dense_shape))
else:
clipped_grads.append(grad * factor)
return list(zip(clipped_grads, variables))
示例10: decayed_lr
def decayed_lr(learning_rate, global_step, decay_steps, initial_variance,
variance_decay, num_periods, alpha, beta, name):
"""Helper to recompute learning rate; most helpful in eager-mode."""
with ops.name_scope(name, "NoisyLinearCosineDecay",
[learning_rate, global_step]) as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
decay_steps = math_ops.cast(decay_steps, dtype)
initial_variance = math_ops.cast(initial_variance, dtype)
variance_decay = math_ops.cast(variance_decay, dtype)
num_periods = math_ops.cast(num_periods, dtype)
alpha = math_ops.cast(alpha, dtype)
beta = math_ops.cast(beta, dtype)
global_step_recomp = math_ops.cast(global_step, dtype)
global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
linear_decayed = (decay_steps - global_step_recomp) / decay_steps
variance = initial_variance / (
math_ops.pow(1.0 + global_step_recomp, variance_decay))
std = math_ops.sqrt(variance)
noisy_linear_decayed = (
linear_decayed + random_ops.random_normal(
linear_decayed.shape, stddev=std))
completed_fraction = global_step_recomp / decay_steps
fraction = 2.0 * num_periods * completed_fraction
cosine_decayed = 0.5 * (
1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
noisy_linear_cosine_decayed = (
(alpha + noisy_linear_decayed) * cosine_decayed + beta)
return math_ops.multiply(
learning_rate, noisy_linear_cosine_decayed, name=name)
示例11: huber_loss
def huber_loss(y_true, y_pred, delta=1.0):
"""Computes Huber loss value.
For each value x in `error=y_true-y_pred`, the following is calculated:
```
0.5 * x^2 if |x| <= d
0.5 * d^2 + d * (|x| - d) if |x| > d
```
where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
Args:
y_true: tensor of true targets.
y_pred: tensor of predicted targets.
delta: A float, the point where the Huber loss function changes from a
quadratic to linear.
Returns:
Tensor with one scalar loss entry per sample.
"""
y_pred = math_ops.cast(y_pred, dtype=K.floatx())
y_true = math_ops.cast(y_true, dtype=K.floatx())
error = math_ops.subtract(y_pred, y_true)
abs_error = math_ops.abs(error)
quadratic = math_ops.minimum(abs_error, delta)
linear = math_ops.subtract(abs_error, quadratic)
return math_ops.add(
math_ops.multiply(
ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
math_ops.multiply(quadratic, quadratic)),
math_ops.multiply(delta, linear))
示例12: get_best
def get_best(self, n):
"""Return the indices and values of the n highest scores in the TopN."""
def refresh_shortlist():
"""Update the shortlist with the highest scores in id_to_score."""
new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size)
smallest_new_score = math_ops.reduce_min(new_scores)
new_length = math_ops.reduce_sum(
math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min)))
u1 = self.sl_ids.assign(
math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0)))
u2 = self.sl_scores.assign(
array_ops.concat([[smallest_new_score], new_scores], 0))
self.last_ops = [u1, u2]
return control_flow_ops.group(u1, u2)
# We only need to refresh the shortlist if n is greater than the
# current shortlist size (which is stored in sl_ids[0]).
with ops.control_dependencies(self.last_ops):
cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist,
control_flow_ops.no_op)
with ops.control_dependencies([cond_op]):
topk_values, topk_indices = nn_ops.top_k(
self.sl_scores,
math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0])))
# topk_indices are the indices into the shortlist, we want to return
# the indices into id_to_score
gathered_indices = array_ops.gather(self.sl_ids, topk_indices)
return gathered_indices, topk_values
示例13: __call__
def __call__(self, step):
with ops.name_scope(
self.name, "PolynomialDecay",
[self.initial_learning_rate, step, self.decay_steps,
self.end_learning_rate, self.power]
) as name:
initial_learning_rate = ops.convert_to_tensor(
self.initial_learning_rate, name="initial_learning_rate")
dtype = initial_learning_rate.dtype
end_learning_rate = math_ops.cast(self.end_learning_rate, dtype)
power = math_ops.cast(self.power, dtype)
global_step_recomp = math_ops.cast(step, dtype)
decay_steps_recomp = math_ops.cast(self.decay_steps, dtype)
if self.cycle:
# Find the first multiple of decay_steps that is bigger than
# global_step. If global_step is zero set the multiplier to 1
multiplier = control_flow_ops.cond(
math_ops.equal(global_step_recomp, 0), lambda: 1.0,
lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
else:
# Make sure that the global_step used is not bigger than decay_steps.
global_step_recomp = math_ops.minimum(global_step_recomp,
self.decay_steps)
p = math_ops.div(global_step_recomp, decay_steps_recomp)
return math_ops.add(
math_ops.multiply(initial_learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)),
end_learning_rate,
name=name)
示例14: clip_by_value
def clip_by_value(t, clip_value_min, clip_value_max,
name=None):
"""Clips tensor values to a specified min and max.
Given a tensor `t`, this operation returns a tensor of the same type and
shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
Any values less than `clip_value_min` are set to `clip_value_min`. Any values
greater than `clip_value_max` are set to `clip_value_max`.
Args:
t: A `Tensor`.
clip_value_min: A 0-D (scalar) `Tensor`. The minimum value to clip by.
clip_value_max: A 0-D (scalar) `Tensor`. The maximum value to clip by.
name: A name for the operation (optional).
Returns:
A clipped `Tensor`.
"""
with ops.name_scope(name, "clip_by_value",
[t, clip_value_min, clip_value_max]) as name:
t = ops.convert_to_tensor(t, name="t")
# Go through list of tensors, for each value in each tensor clip
t_min = math_ops.minimum(t, clip_value_max)
t_max = math_ops.maximum(t_min, clip_value_min, name=name)
return t_max
示例15: _get_scores
def _get_scores(log_probs, sequence_lengths, length_penalty_weight,
coverage_penalty_weight, finished, accumulated_attention_probs):
"""Calculates scores for beam search hypotheses.
Args:
log_probs: The log probabilities with shape
`[batch_size, beam_width, vocab_size]`.
sequence_lengths: The array of sequence lengths.
length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
coverage_penalty_weight: Float weight to penalize the coverage of source
sentence. Disabled with 0.0.
finished: A boolean tensor of shape `[batch_size, beam_width]` that
specifies which elements in the beam are finished already.
accumulated_attention_probs: Accumulated attention probabilities up to the
current time step, with shape `[batch_size, beam_width, max_time]` if
coverage_penalty_weight is not 0.0.
Returns:
The scores normalized by the length_penalty and coverage_penalty.
Raises:
ValueError: accumulated_attention_probs is None when coverage penalty is
enabled.
"""
length_penalty_ = _length_penalty(
sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight)
length_penalty_ = math_ops.cast(length_penalty_, dtype=log_probs.dtype)
scores = log_probs / length_penalty_
coverage_penalty_weight = ops.convert_to_tensor(
coverage_penalty_weight, name="coverage_penalty_weight")
if coverage_penalty_weight.shape.ndims != 0:
raise ValueError("coverage_penalty_weight should be a scalar, "
"but saw shape: %s" % coverage_penalty_weight.shape)
if tensor_util.constant_value(coverage_penalty_weight) == 0.0:
return scores
if accumulated_attention_probs is None:
raise ValueError(
"accumulated_attention_probs can be None only if coverage penalty is "
"disabled.")
# Add source sequence length mask before computing coverage penalty.
accumulated_attention_probs = array_ops.where(
math_ops.equal(accumulated_attention_probs, 0.0),
array_ops.ones_like(accumulated_attention_probs),
accumulated_attention_probs)
# coverage penalty =
# sum over `max_time` {log(min(accumulated_attention_probs, 1.0))}
coverage_penalty = math_ops.reduce_sum(
math_ops.log(math_ops.minimum(accumulated_attention_probs, 1.0)), 2)
# Apply coverage penalty to finished predictions.
coverage_penalty *= math_ops.to_float(finished)
weighted_coverage_penalty = coverage_penalty * coverage_penalty_weight
# Reshape from [batch_size, beam_width] to [batch_size, beam_width, 1]
weighted_coverage_penalty = array_ops.expand_dims(
weighted_coverage_penalty, 2)
return scores + weighted_coverage_penalty