本文整理汇总了Python中tensorflow.python.ops.linalg_ops.norm方法的典型用法代码示例。如果您正苦于以下问题:Python linalg_ops.norm方法的具体用法?Python linalg_ops.norm怎么用?Python linalg_ops.norm使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow.python.ops.linalg_ops
的用法示例。
在下文中一共展示了linalg_ops.norm方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def __init__(self, loc, scale, validate_args=False, allow_nan_stats=True, name="von-Mises-Fisher"):
"""Construct von-Mises-Fisher distributions with mean and concentration `loc` and `scale`.
Args:
loc: Floating point tensor; the mean of the distribution(s).
scale: Floating point tensor; the concentration of the distribution(s).
Must contain only non-negative values.
validate_args: Python `bool`, default `False`. When `True` distribution
parameters are checked for validity despite possibly degrading runtime
performance. When `False` invalid inputs may silently render incorrect
outputs.
allow_nan_stats: Python `bool`, default `True`. When `True`,
statistics (e.g., mean, mode, variance) use the value "`NaN`" to
indicate the result is undefined. When `False`, an exception is raised
if one or more of the statistic's batch members are undefined.
name: Python `str` name prefixed to Ops created by this class.
Raises:
TypeError: if `loc` and `scale` have different `dtype`.
"""
parameters = locals()
with ops.name_scope(name, values=[loc, scale]):
with ops.control_dependencies([check_ops.assert_positive(scale),
check_ops.assert_near(linalg_ops.norm(loc, axis=-1), 1, atol=1e-7)]
if validate_args else []):
self._loc = array_ops.identity(loc, name="loc")
self._scale = array_ops.identity(scale, name="scale")
check_ops.assert_same_float_dtype([self._loc, self._scale])
super(VonMisesFisher, self).__init__(
dtype=self._scale.dtype,
reparameterization_type=distribution.FULLY_REPARAMETERIZED,
validate_args=validate_args,
allow_nan_stats=allow_nan_stats,
parameters=parameters,
graph_parents=[self._loc, self._scale],
name=name)
self.__m = math_ops.cast(self._loc.shape[-1], dtypes.int32)
self.__mf = math_ops.cast(self.__m, dtype=self.dtype)
self.__e1 = array_ops.one_hot([0], self.__m, dtype=self.dtype)
示例2: _log_unnormalized_prob
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _log_unnormalized_prob(self, x):
with ops.control_dependencies(
[check_ops.assert_near(linalg_ops.norm(x, axis=-1), 1, atol=1e-3)] if self.validate_args else []):
output = self.scale * math_ops.reduce_sum(self._loc * x, axis=-1, keepdims=True)
return array_ops.reshape(output, ops.convert_to_tensor(array_ops.shape(output)[:-1]))
示例3: _compute_weights
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _compute_weights(self):
"""Generate weights by combining the direction of weight vector
with it's norm """
with variable_scope.variable_scope("compute_weights"):
self.layer.kernel = (
nn_impl.l2_normalize(self.layer.v, axis=self.norm_axes) * self.layer.g
)
示例4: _init_norm
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _init_norm(self, weights):
"""Set the norm of the weight vector"""
from tensorflow.python.ops.linalg_ops import norm
with variable_scope.variable_scope("init_norm"):
# pylint: disable=no-member
flat = array_ops.reshape(weights, [-1, self.layer_depth])
# pylint: disable=no-member
return array_ops.reshape(norm(flat, axis=0), (self.layer_depth,))
示例5: _apply_dense
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _apply_dense(self, grad, var):
lr_scale = self.get_slot(var, "lr_scale")
momentum = self.get_slot(var, "momentum")
gbar = self.get_slot(var, "gbar")
gain = self.get_slot(var, "gain")
counter = self.get_slot(var, "counter")
counter_updated = state_ops.assign(counter, counter + 1)
# lr_scale update uses normalized grad and momentum to be independent of dim
normalized_grad = grad / (linalg_ops.norm(grad) + 1e-10)
normalized_momentum = momentum / (linalg_ops.norm(momentum) + 1e-10)
# Apply EG updates on lr_scale:
# grad_lr_scale = -inner_product(current_grad, old_momentum)
# lr_scale <- lr_scale * exp(-scale_learning_rate * grad_lr_scale)
lr_scale_unnormalized_updated = clip_ops.clip_by_value(
lr_scale * math_ops.exp(
self._scale_learning_rate * math_ops.reduce_sum(grad * momentum)),
self._min_scale, self._max_scale)
lr_scale_normalized_updated = clip_ops.clip_by_value(
lr_scale * math_ops.exp(self._scale_learning_rate * math_ops.reduce_sum(
normalized_grad * normalized_momentum)), self._min_scale,
self._max_scale)
lr_scale_updated = state_ops.assign(
lr_scale,
array_ops.where(self._use_directions, lr_scale_normalized_updated,
lr_scale_unnormalized_updated))
# remove the bias of zero initialization in gbar
corrected_gbar = gbar / (
1.0 - self._beta**math_ops.maximum(counter_updated - 1, 1))
# Apply EG updates on gain:
# grad_gain = - current_grad * old_gbar
# gain <- gain * exp(-gain_learning_rate * grad_gain)
gain_unnormalized_updated = clip_ops.clip_by_value(
gain * math_ops.exp(self._gain_learning_rate * grad * corrected_gbar),
self._min_gain, self._max_gain)
# Normalized update uses sign(grad) * sign(gbar) as a proxy for grad_gain.
gain_normalized_updated = clip_ops.clip_by_value(
gain * math_ops.exp(self._gain_learning_rate * math_ops.sign(grad) *
math_ops.sign(gbar)), self._min_gain,
self._max_gain)
gain_updated = state_ops.assign(
gain,
array_ops.where(self._use_signs, gain_normalized_updated,
gain_unnormalized_updated))
scaled_g = self._learning_rate_tensor * gain_updated * grad
with ops.control_dependencies([lr_scale_updated, scaled_g]):
momentum_updated = state_ops.assign(
momentum, self._momentum_tensor * momentum + scaled_g)
gbar_updated = state_ops.assign(
gbar, self._beta * gbar + (1.0 - self._beta) * grad)
with ops.control_dependencies([gbar_updated]):
return state_ops.assign_sub(var, lr_scale_updated * momentum_updated)
示例6: apply_gradients
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
"""See base class."""
assignments = []
for (grad, param) in grads_and_vars:
if grad is None or param is None:
continue
param_name = self._get_variable_name(param.name)
m = tf.get_variable(
name=six.ensure_str(param_name) + "/adam_m",
shape=param.shape.as_list(),
dtype=tf.float32,
trainable=False,
initializer=tf.zeros_initializer())
v = tf.get_variable(
name=six.ensure_str(param_name) + "/adam_v",
shape=param.shape.as_list(),
dtype=tf.float32,
trainable=False,
initializer=tf.zeros_initializer())
# Standard Adam update.
next_m = (
tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
next_v = (
tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
tf.square(grad)))
update = next_m / (tf.sqrt(next_v) + self.epsilon)
# Just adding the square of the weights to the loss function is *not*
# the correct way of using L2 regularization/weight decay with Adam,
# since that will interact with the m and v parameters in strange ways.
#
# Instead we want ot decay the weights in a manner that doesn't interact
# with the m/v parameters. This is equivalent to adding the square
# of the weights to the loss with plain (non-momentum) SGD.
if self._do_use_weight_decay(param_name):
update += self.weight_decay_rate * param
ratio = 1.0
if self._do_layer_adaptation(param_name):
w_norm = linalg_ops.norm(param, ord=2)
g_norm = linalg_ops.norm(update, ord=2)
ratio = array_ops.where(math_ops.greater(w_norm, 0), array_ops.where(
math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0), 1.0)
update_with_lr = ratio * self.learning_rate * update
next_param = param - update_with_lr
assignments.extend(
[param.assign(next_param),
m.assign(next_m),
v.assign(next_v)])
return tf.group(*assignments, name=name)
示例7: _resource_apply_dense
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _resource_apply_dense(self, grad, var):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
weight_decay_rate_t = math_ops.cast(self._weight_decay_rate_t,
var.dtype.base_dtype)
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = m * beta1_t + m_scaled_g_values
m_t = state_ops.assign(m, m_t, use_locking=self._use_locking)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = v * beta2_t + v_scaled_g_values
v_t = state_ops.assign(v, v_t, use_locking=self._use_locking)
# ==== The following is with m_t_hat and v_t_hat
m_t_hat = m_t / (1. - beta1_power)
v_t_hat = v_t / (1. - beta2_power)
v_sqrt = math_ops.sqrt(v_t_hat)
update = m_t_hat / (v_sqrt + epsilon_t)
# ==== The following is the original LAMBOptimizer implementation
# v_sqrt = math_ops.sqrt(v_t_hat)
# update = m_t / (v_sqrt + epsilon_t)
var_name = self._get_variable_name(var.name)
if self._do_use_weight_decay(var_name):
update += weight_decay_rate_t * var
ratio = 1.0
if self._do_layer_adaptation(var_name):
w_norm = linalg_ops.norm(var, ord=2)
g_norm = linalg_ops.norm(update, ord=2)
ratio = array_ops.where(
math_ops.greater(w_norm, 0),
array_ops.where(math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0),
1.0)
var_update = var - ratio * lr_t * update
return state_ops.assign(var, var_update, use_locking=self._use_locking).op
示例8: _apply_sparse_shared
# 需要导入模块: from tensorflow.python.ops import linalg_ops [as 别名]
# 或者: from tensorflow.python.ops.linalg_ops import norm [as 别名]
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
weight_decay_rate_t = math_ops.cast(self._weight_decay_rate_t,
var.dtype.base_dtype)
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
# ==== The following is with m_t_hat and v_t_hat
m_t_hat = m_t / (1. - beta1_power)
v_t_hat = v_t / (1. - beta2_power)
v_sqrt = math_ops.sqrt(v_t_hat)
update = m_t_hat / (v_sqrt + epsilon_t)
# ==== The following is the original LAMBOptimizer implementation
# v_sqrt = math_ops.sqrt(v_t_hat)
# update = m_t / (v_sqrt + epsilon_t)
var_name = self._get_variable_name(var.name)
if self._do_use_weight_decay(var_name):
update += weight_decay_rate_t * var
ratio = 1.0
if self._do_layer_adaptation(var_name):
w_norm = linalg_ops.norm(var, ord=2)
g_norm = linalg_ops.norm(update, ord=2)
ratio = array_ops.where(
math_ops.greater(w_norm, 0),
array_ops.where(math_ops.greater(g_norm, 0), (w_norm / g_norm), 1.0),
1.0)
var_update = state_ops.assign_sub(
var, ratio * lr_t * update, use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t])