本文整理汇总了Python中tensorflow.python.ops.math_ops.sqrt函数的典型用法代码示例。如果您正苦于以下问题:Python sqrt函数的具体用法?Python sqrt怎么用?Python sqrt使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sqrt函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _apply_dense
def _apply_dense(self, grad, var):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)
# amsgrad
vhat = self.get_slot(var, "vhat")
vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
v_sqrt = math_ops.sqrt(vhat_t)
var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
示例2: _apply_sparse
def _apply_sparse(self, grad, var):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m := beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t = state_ops.scatter_update(m, grad.indices,
beta1_t * array_ops.gather(m, grad.indices) +
(1 - beta1_t) * grad.values,
use_locking=self._use_locking)
# v := beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_t = state_ops.scatter_update(v, grad.indices,
beta2_t * array_ops.gather(v, grad.indices) +
(1 - beta2_t) * math_ops.square(grad.values),
use_locking=self._use_locking)
# variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))
m_t_slice = array_ops.gather(m_t, grad.indices)
v_t_slice = array_ops.gather(v_t, grad.indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
var_update = state_ops.scatter_sub(var, grad.indices,
lr * m_t_slice / denominator_slice,
use_locking=self._use_locking)
return control_flow_ops.group(var_update, m_t, v_t)
示例3: _resource_apply_sparse
def _resource_apply_sparse(self, grad, var, indices):
var_dtype = var.dtype.base_dtype
lr_t = self._decayed_lr(var_dtype)
beta_1_t = self._get_hyper('beta_1', var_dtype)
beta_2_t = self._get_hyper('beta_2', var_dtype)
local_step = math_ops.cast(self.iterations + 1, var_dtype)
beta_1_power = math_ops.pow(beta_1_t, local_step)
beta_2_power = math_ops.pow(beta_2_t, local_step)
epsilon_t = self._get_hyper('epsilon', var_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, 'm')
m_scaled_g_values = grad * (1 - beta_1_t)
m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
# m_bar = (1 - beta1) * g_t + beta1 * m_t
m_bar = m_scaled_g_values + beta_1_t * array_ops.gather(m_t, indices)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, 'v')
v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
v_t_slice = array_ops.gather(v_t, indices)
v_sqrt = math_ops.sqrt(v_t_slice)
var_update = self._resource_scatter_add(var, indices,
-lr * m_bar / (v_sqrt + epsilon_t))
return control_flow_ops.group(*[var_update, m_bar, v_t])
示例4: _resource_apply_sparse
def _resource_apply_sparse(self, grad, var, indices):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m")
m_t_slice = beta1_t * array_ops.gather(m, indices) + (1 - beta1_t) * grad
m_update_op = resource_variable_ops.resource_scatter_update(m.handle,
indices,
m_t_slice)
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v")
v_t_slice = (beta2_t * array_ops.gather(v, indices) +
(1 - beta2_t) * math_ops.square(grad))
v_update_op = resource_variable_ops.resource_scatter_update(v.handle,
indices,
v_t_slice)
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
var_slice = lr * m_t_slice / (math_ops.sqrt(v_t_slice) + epsilon_t)
var_update_op = resource_variable_ops.resource_scatter_sub(var.handle,
indices,
var_slice)
return control_flow_ops.group(var_update_op, m_update_op, v_update_op)
示例5: _apply_sparse_shared
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, m * beta1_t,
use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
v_sqrt = math_ops.sqrt(v_t)
var_update = state_ops.assign_sub(var,
lr * m_t / (v_sqrt + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t])
示例6: _apply_rms_spectral
def _apply_rms_spectral(self, grad, var):
# see if variable updates need something special
# might have to resize the variables (they are suposedly flat)
rms = self.get_slot(var, "rms")
mom = self.get_slot(var, "momentum")
momentum = math_ops.cast(self._momentum_tensor, var.dtype.base_dtype)
lr = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
decay = math_ops.cast(self._decay_tensor, var.dtype.base_dtype)
epsilon = math_ops.cast(self._epsilon_tensor, var.dtype.base_dtype)
rms_update = rms.assign(decay * rms +
(1 - decay) *
math_ops.square(grad))
aux = math_ops.sqrt(math_ops.sqrt(rms_update)+epsilon)
#sharpGrad = (self._sharpOp(grad / aux) if min(grad.get_shape()) < self._svd_approx_size
# else self._approxSharp(grad / aux, self._svd_approx_size))
sharpGrad = self._sharpOp(grad / aux)
update = (lr *
(sharpGrad / aux))
mom_update = mom.assign(mom * momentum + update)
var_update = var.assign_sub(mom_update)
return control_flow_ops.group(*[var_update, rms_update, mom_update])
示例7: _apply_sparse_shared
def _apply_sparse_shared(self,
grad,
var,
indices,
scatter_update,
scatter_sub):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m")
m_t = scatter_update(m, indices,
beta1_t * array_ops.gather(m, indices) +
(1 - beta1_t) * grad)
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v")
v_t = scatter_update(v, indices,
beta2_t * array_ops.gather(v, indices) +
(1 - beta2_t) * math_ops.square(grad))
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
m_t_slice = array_ops.gather(m_t, indices)
v_t_slice = array_ops.gather(v_t, indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
var_update = scatter_sub(var, indices,
lr * m_t_slice / denominator_slice)
return control_flow_ops.group(var_update, m_t, v_t)
示例8: _stddev
def _stddev(self):
if distribution_util.is_diagonal_scale(self.scale):
return np.sqrt(2) * math_ops.abs(self.scale.diag_part())
elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate)
and self.scale.is_self_adjoint):
return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part(
self.scale.matmul(self.scale.to_dense())))
else:
return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part(
self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)))
示例9: _iter_body
def _iter_body(i, mat_y, unused_old_mat_y, mat_z, unused_old_mat_z, err,
unused_old_err):
current_iterate = 0.5 * (3.0 * identity - math_ops.matmul(mat_z, mat_y))
current_mat_y = math_ops.matmul(mat_y, current_iterate)
current_mat_z = math_ops.matmul(current_iterate, mat_z)
# Compute the error in approximation.
mat_sqrt_a = current_mat_y * math_ops.sqrt(norm)
mat_a_approx = math_ops.matmul(mat_sqrt_a, mat_sqrt_a)
residual = mat_a - mat_a_approx
current_err = math_ops.sqrt(math_ops.reduce_sum(residual * residual)) / norm
return i + 1, current_mat_y, mat_y, current_mat_z, mat_z, current_err, err
示例10: segment_sqrt_n
def segment_sqrt_n(data, segment_ids, num_segments, name=None):
"""For docs, see: _RAGGED_SEGMENT_DOCSTRING."""
with ops.name_scope(name, 'RaggedSegmentSqrtN',
[data, segment_ids, num_segments]):
total = segment_sum(data, segment_ids, num_segments)
ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
array_ops.ones_like(data.flat_values), data.nested_row_splits)
count = segment_sum(ones, segment_ids, num_segments)
if ragged_tensor.is_ragged(total):
return total.with_flat_values(
total.flat_values / math_ops.sqrt(count.flat_values))
else:
return total / math_ops.sqrt(count)
示例11: _stddev
def _stddev(self):
if (isinstance(self.scale, linalg.LinearOperatorIdentity) or
isinstance(self.scale, linalg.LinearOperatorScaledIdentity) or
isinstance(self.scale, linalg.LinearOperatorDiag)):
return math_ops.abs(self.scale.diag_part())
elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate)
and self.scale.is_self_adjoint):
return math_ops.sqrt(array_ops.matrix_diag_part(
self.scale.apply(self.scale.to_dense())))
else:
# TODO(b/35040238): Remove transpose once LinOp supports `transpose`.
return math_ops.sqrt(array_ops.matrix_diag_part(
self.scale.apply(array_ops.matrix_transpose(self.scale.to_dense()))))
示例12: matrix_square_root
def matrix_square_root(mat_a, mat_a_size, iter_count=100, ridge_epsilon=1e-4):
"""Iterative method to get matrix square root.
Stable iterations for the matrix square root, Nicholas J. Higham
Page 231, Eq 2.6b
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.8799&rep=rep1&type=pdf
Args:
mat_a: the symmetric PSD matrix whose matrix square root be computed
mat_a_size: size of mat_a.
iter_count: Maximum number of iterations.
ridge_epsilon: Ridge epsilon added to make the matrix positive definite.
Returns:
mat_a^0.5
"""
def _iter_condition(i, unused_mat_y, unused_old_mat_y, unused_mat_z,
unused_old_mat_z, err, old_err):
# This method require that we check for divergence every step.
return math_ops.logical_and(i < iter_count, err < old_err)
def _iter_body(i, mat_y, unused_old_mat_y, mat_z, unused_old_mat_z, err,
unused_old_err):
current_iterate = 0.5 * (3.0 * identity - math_ops.matmul(mat_z, mat_y))
current_mat_y = math_ops.matmul(mat_y, current_iterate)
current_mat_z = math_ops.matmul(current_iterate, mat_z)
# Compute the error in approximation.
mat_sqrt_a = current_mat_y * math_ops.sqrt(norm)
mat_a_approx = math_ops.matmul(mat_sqrt_a, mat_sqrt_a)
residual = mat_a - mat_a_approx
current_err = math_ops.sqrt(math_ops.reduce_sum(residual * residual)) / norm
return i + 1, current_mat_y, mat_y, current_mat_z, mat_z, current_err, err
identity = linalg_ops.eye(math_ops.to_int32(mat_a_size))
mat_a = mat_a + ridge_epsilon * identity
norm = math_ops.sqrt(math_ops.reduce_sum(mat_a * mat_a))
mat_init_y = mat_a / norm
mat_init_z = identity
init_err = norm
_, _, prev_mat_y, _, _, _, _ = control_flow_ops.while_loop(
_iter_condition, _iter_body, [
0, mat_init_y, mat_init_y, mat_init_z, mat_init_z, init_err,
init_err + 1.0
])
return prev_mat_y * math_ops.sqrt(norm)
示例13: compute_pi_tracenorm
def compute_pi_tracenorm(left_cov, right_cov):
"""Computes the scalar constant pi for Tikhonov regularization/damping.
pi = sqrt( (trace(A) / dim(A)) / (trace(B) / dim(B)) )
See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details.
Args:
left_cov: The left Kronecker factor "covariance".
right_cov: The right Kronecker factor "covariance".
Returns:
The computed scalar constant pi for these Kronecker Factors (as a Tensor).
"""
def _trace(cov):
if len(cov.shape) == 1:
# Diagonal matrix.
return math_ops.reduce_sum(cov)
elif len(cov.shape) == 2:
# Full matrix.
return math_ops.trace(cov)
else:
raise ValueError(
"What's the trace of a Tensor of rank %d?" % len(cov.shape))
# Instead of dividing by the dim of the norm, we multiply by the dim of the
# other norm. This works out the same in the ratio.
left_norm = _trace(left_cov) * right_cov.shape.as_list()[0]
right_norm = _trace(right_cov) * left_cov.shape.as_list()[0]
return math_ops.sqrt(left_norm / right_norm)
示例14: entropy_matched_cauchy_scale
def entropy_matched_cauchy_scale(covariance):
"""Approximates a similar Cauchy distribution given a covariance matrix.
Since Cauchy distributions do not have moments, entropy matching provides one
way to set a Cauchy's scale parameter in a way that provides a similar
distribution. The effect is dividing the standard deviation of an independent
Gaussian by a constant very near 3.
To set the scale of the Cauchy distribution, we first select the diagonals of
`covariance`. Since this ignores cross terms, it overestimates the entropy of
the Gaussian. For each of these variances, we solve for the Cauchy scale
parameter which gives the same entropy as the Gaussian with that
variance. This means setting the (univariate) Gaussian entropy
0.5 * ln(2 * variance * pi * e)
equal to the Cauchy entropy
ln(4 * pi * scale)
Solving, we get scale = sqrt(variance * (e / (8 pi))).
Args:
covariance: A [batch size x N x N] batch of covariance matrices to produce
Cauchy scales for.
Returns:
A [batch size x N] set of Cauchy scale parameters for each part of the batch
and each dimension of the input Gaussians.
"""
return math_ops.sqrt(math.e / (8. * math.pi) *
array_ops.matrix_diag_part(covariance))
示例15: _variance
def _variance(self):
x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1)
v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True)
if self.cholesky_input_output_matrices:
return linalg_ops.cholesky(v)
return v