本文整理汇总了Python中tensorflow.python.ops.nn.batch_normalization函数的典型用法代码示例。如果您正苦于以下问题:Python batch_normalization函数的具体用法?Python batch_normalization怎么用?Python batch_normalization使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了batch_normalization函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: normalize_in_training
def normalize_in_training():
if needs_broadcasting:
return nn.batch_normalization(inputs,
broadcast_mean,
broadcast_variance,
broadcast_beta,
broadcast_gamma,
self.epsilon)
else:
return nn.batch_normalization(inputs,
mean,
variance,
self.beta if self.center else None,
self.gamma if self.scale else None,
self.epsilon)
示例2: call
def call(self, inputs):
# Compute the axes along which to reduce the mean / variance
input_shape = inputs.shape
ndims = len(input_shape)
# Calculate the moments on the last axis (layer activations).
mean, variance = nn.moments(inputs, self.axis, keep_dims=True)
# Broadcasting only necessary for norm where the axis is not just
# the last dimension
broadcast_shape = [1] * ndims
for dim in self.axis:
broadcast_shape[dim] = input_shape.dims[dim].value
def _broadcast(v):
if (v is not None and len(v.shape) != ndims and
self.axis != [ndims - 1]):
return array_ops.reshape(v, broadcast_shape)
return v
scale, offset = _broadcast(self.gamma), _broadcast(self.beta)
# Compute layer normalization using the batch_normalization function.
outputs = nn.batch_normalization(
inputs,
mean,
variance,
offset=offset,
scale=scale,
variance_epsilon=self.epsilon)
# If some components of the shape got lost due to adjustments, fix that.
outputs.set_shape(input_shape)
return outputs
示例3: __call__
def __call__(self, inputs):
"""Run virtual batch normalization on inputs.
Args:
inputs: Tensor input.
Returns:
A virtual batch normalized version of `inputs`.
Raises:
ValueError: If `inputs` shape isn't compatible with the reference batch.
"""
_validate_call_input([inputs, self._reference_batch], self._batch_axis)
with ops.name_scope(self._vs.name, values=[inputs, self._reference_batch]):
# Calculate the statistics on the current input on a per-example basis.
vb_mean, vb_mean_sq = self._virtual_statistics(
inputs, self._example_reduction_axes)
vb_variance = vb_mean_sq - math_ops.square(vb_mean)
# The exact broadcast shape of the input statistic Tensors depends on the
# current batch, not the reference batch. The parameter broadcast shape
# is independent of the shape of the input statistic Tensor dimensions.
b_shape = self._broadcast_shape[:] # deep copy
b_shape[self._batch_axis] = _static_or_dynamic_batch_size(
inputs, self._batch_axis)
return nn.batch_normalization(
inputs,
self._broadcast(vb_mean, b_shape),
self._broadcast(vb_variance, b_shape),
self._broadcast(self._beta, self._broadcast_shape),
self._broadcast(self._gamma, self._broadcast_shape),
self._epsilon)
示例4: reference_batch_normalization
def reference_batch_normalization(self):
"""Return the reference batch, but batch normalized."""
with ops.name_scope(self._vs.name):
return nn.batch_normalization(self._reference_batch,
self._broadcast(self._ref_mean),
self._broadcast(self._ref_variance),
self._broadcast(self._beta),
self._broadcast(self._gamma), self._epsilon)
示例5: normalize_in_test
def normalize_in_test():
if needs_broadcasting:
broadcast_moving_mean = array_ops.reshape(self.moving_mean,
broadcast_shape)
broadcast_moving_variance = array_ops.reshape(self.moving_variance,
broadcast_shape)
return nn.batch_normalization(inputs,
broadcast_moving_mean,
broadcast_moving_variance,
broadcast_beta,
broadcast_gamma,
self.epsilon)
else:
return nn.batch_normalization(inputs,
self.moving_mean,
self.moving_variance,
self.beta if self.center else None,
self.gamma if self.scale else None,
self.epsilon)
示例6: batch_norm
def batch_norm(x, deterministic, alpha=0.9, shift=True, scope='bn'):
with vs.variable_scope(scope):
dtype = x.dtype
input_shape = x.get_shape().as_list()
feat_dim = input_shape[-1]
axes = range(len(input_shape)-1)
if shift:
beta = vs.get_variable(
scope+"_beta", shape=[feat_dim],
initializer=init_ops.zeros_initializer, dtype=dtype)
else:
beta = vs.get_variable(
scope+"_beta", shape=[feat_dim],
initializer=init_ops.zeros_initializer,
dtype=dtype, trainable=False)
gamma = vs.get_variable(
scope+"_gamma", shape=[feat_dim],
initializer=init_ops.constant_initializer(0.1), dtype=dtype)
mean = vs.get_variable(scope+"_mean", shape=[feat_dim],
initializer=init_ops.zeros_initializer,
dtype=dtype, trainable=False)
var = vs.get_variable(scope+"_var", shape=[feat_dim],
initializer=init_ops.ones_initializer,
dtype=dtype, trainable=False)
counter = vs.get_variable(scope+"_counter", shape=[],
initializer=init_ops.constant_initializer(0),
dtype=tf.int64, trainable=False)
zero_cnt = vs.get_variable(scope+"_zero_cnt", shape=[],
initializer=init_ops.constant_initializer(0),
dtype=tf.int64, trainable=False)
batch_mean, batch_var = moments(x, axes, name=scope+'_moments')
mean, var = cond(math_ops.equal(counter, zero_cnt), lambda: (batch_mean, batch_var),
lambda: (mean, var))
mean, var, counter = cond(deterministic, lambda: (mean, var, counter),
lambda: ((1-alpha) * batch_mean + alpha * mean,
(1-alpha) * batch_var + alpha * var,
counter + 1))
normed = batch_normalization(x, mean, var, beta, gamma, 1e-8)
return normed
示例7: batch_norm
def batch_norm(inputs,
decay=0.999,
center=True,
scale=False,
epsilon=0.001,
activation_fn=None,
updates_collections=ops.GraphKeys.UPDATE_OPS,
is_training=True,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
"""Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
"Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift"
Sergey Ioffe, Christian Szegedy
Can be used as a normalizer function for conv2d and fully_connected.
Args:
-inputs: a tensor of size `[batch_size, height, width, channels]`
or `[batch_size, channels]`.
-decay: decay for the moving average.
-center: If True, subtract `beta`. If False, `beta` is ignored.
-scale: If True, multiply by `gamma`. If False, `gamma` is
not used. When the next layer is linear (also e.g. `nn.relu`), this can be
disabled since the scaling can be done by the next layer.
-epsilon: small float added to variance to avoid dividing by zero.
-activation_fn: Optional activation function.
-updates_collections: collections to collect the update ops for computation.
If None, a control dependency would be added to make sure the updates are
computed.
-is_training: whether or not the layer is in training mode. In training mode
it would accumulate the statistics of the moments into `moving_mean` and
`moving_variance` using an exponential moving average with the given
`decay`. When it is not in training mode then it would use the values of
the `moving_mean` and the `moving_variance`.
-reuse: whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
-variables_collections: optional collections for the variables.
-outputs_collections: collections to add the outputs.
-trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
-scope: Optional scope for `variable_op_scope`.
Returns:
a tensor representing the output of the operation.
"""
with variable_scope.variable_op_scope([inputs],scope, 'BatchNorm', reuse=reuse) as sc:
inputs_shape = inputs.get_shape()
dtype = inputs.dtype.base_dtype
axis = list(range(len(inputs_shape) - 1))
params_shape = inputs_shape[-1:]
# Allocate parameters for the beta and gamma of the normalization.
beta, gamma = None, None
if center:
beta_collections = utils.get_variable_collections(variables_collections,'beta')
beta = variables.model_variable('beta',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,collections=beta_collections,trainable=trainable)
if scale:
gamma_collections = utils.get_variable_collections(variables_collections,'gamma')
gamma = variables.model_variable('gamma',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,collections=gamma_collections,trainable=trainable)
# Create moving_mean and moving_variance variables and add them to the
# appropiate collections.
moving_mean_collections = utils.get_variable_collections(variables_collections, 'moving_mean')
moving_mean = variables.model_variable('moving_mean',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,trainable=False,collections=moving_mean_collections)
moving_variance_collections = utils.get_variable_collections(variables_collections, 'moving_variance')
moving_variance = variables.model_variable('moving_variance',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,trainable=False,collections=moving_variance_collections)
if is_training:
# Calculate the moments based on the individual batch.
mean, variance = nn.moments(inputs, axis, shift=moving_mean)
# Update the moving_mean and moving_variance moments.
update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay)
update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay)
if updates_collections is None:
# Make sure the updates are computed here.
with ops.control_dependencies([update_moving_mean,update_moving_variance]):
outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
else:
# Collect the updates to be computed later.
ops.add_to_collections(updates_collections, update_moving_mean)
ops.add_to_collections(updates_collections, update_moving_variance)
outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
else:
outputs = nn.batch_normalization(
inputs, moving_mean, moving_variance, beta, gamma, epsilon)
outputs.set_shape(inputs.get_shape())
if activation_fn:
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
示例8: call
def call(self, inputs, training=False):
if self.fused:
return self._fused_batch_norm(inputs, training=training)
# First, compute the axes along which to reduce the mean / variance,
# as well as the broadcast shape to be used for all parameters.
input_shape = inputs.get_shape()
ndim = len(input_shape)
reduction_axes = list(range(len(input_shape)))
del reduction_axes[self.axis]
broadcast_shape = [1] * len(input_shape)
broadcast_shape[self.axis] = input_shape[self.axis].value
# Determines whether broadcasting is needed.
needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])
scale, offset = self.gamma, self.beta
# Determine a boolean value for `training`: could be True, False, or None.
training_value = utils.constant_value(training)
if training_value is not False:
# Some of the computations here are not necessary when training==False
# but not a constant. However, this makes the code simpler.
mean, variance = nn.moments(inputs, reduction_axes)
mean = _smart_select(training,
lambda: mean,
lambda: self.moving_mean)
variance = _smart_select(training,
lambda: variance,
lambda: self.moving_variance)
if self.renorm:
r, d, new_mean, new_variance = self._renorm_correction_and_moments(
mean, variance, training)
# When training, the normalized values (say, x) will be transformed as
# x * gamma + beta without renorm, and (x * r + d) * gamma + beta
# = x * (r * gamma) + (d * gamma + beta) with renorm.
scale = array_ops.stop_gradient(r, name='renorm_r')
offset = array_ops.stop_gradient(d, name='renorm_d')
if self.gamma is not None:
scale *= self.gamma
offset *= self.gamma
if self.beta is not None:
offset += self.beta
else:
new_mean, new_variance = mean, variance
# Update moving averages when training, and prevent updates otherwise.
decay = _smart_select(training, lambda: self.momentum, lambda: 1.)
mean_update = moving_averages.assign_moving_average(
self.moving_mean, new_mean, decay, zero_debias=False)
variance_update = moving_averages.assign_moving_average(
self.moving_variance, new_variance, decay, zero_debias=False)
self.add_update(mean_update, inputs=inputs)
self.add_update(variance_update, inputs=inputs)
else:
mean, variance = self.moving_mean, self.moving_variance
def _broadcast(v):
if needs_broadcasting and v is not None:
# In this case we must explicitly broadcast all parameters.
return array_ops.reshape(v, broadcast_shape)
return v
return nn.batch_normalization(inputs,
_broadcast(mean),
_broadcast(variance),
_broadcast(offset),
_broadcast(scale),
self.epsilon)
示例9: instance_norm
#.........这里部分代码省略.........
`batch_size`. The normalization is over all but the last dimension if
`data_format` is `NHWC` and the second dimension if `data_format` is
`NCHW`.
center: If True, add offset of `beta` to normalized tensor. If False, `beta`
is ignored.
scale: If True, multiply by `gamma`. If False, `gamma` is
not used. When the next layer is linear (also e.g. `nn.relu`), this can be
disabled since the scaling can be done by the next layer.
epsilon: Small float added to variance to avoid dividing by zero.
activation_fn: Activation function, default set to None to skip it and
maintain a linear activation.
param_initializers: Optional initializers for beta, gamma, moving mean and
moving variance.
reuse: Whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: Optional collections for the variables.
outputs_collections: Collections to add the outputs.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
data_format: A string. `NHWC` (default) and `NCHW` are supported.
scope: Optional scope for `variable_scope`.
Returns:
A `Tensor` representing the output of the operation.
Raises:
ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
ValueError: If the rank of `inputs` is undefined.
ValueError: If rank or channels dimension of `inputs` is undefined.
"""
inputs = ops.convert_to_tensor(inputs)
inputs_shape = inputs.shape
inputs_rank = inputs.shape.ndims
if inputs_rank is None:
raise ValueError('Inputs %s has undefined rank.' % inputs.name)
if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
raise ValueError('data_format has to be either NCHW or NHWC.')
with variable_scope.variable_scope(
scope, 'InstanceNorm', [inputs], reuse=reuse) as sc:
if data_format == DATA_FORMAT_NCHW:
reduction_axis = 1
# For NCHW format, rather than relying on implicit broadcasting, we
# explicitly reshape the params to params_shape_broadcast when computing
# the moments and the batch normalization.
params_shape_broadcast = list(
[1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)])
else:
reduction_axis = inputs_rank - 1
params_shape_broadcast = None
moments_axes = list(range(inputs_rank))
del moments_axes[reduction_axis]
del moments_axes[0]
params_shape = inputs_shape[reduction_axis:reduction_axis + 1]
if not params_shape.is_fully_defined():
raise ValueError('Inputs %s has undefined channels dimension %s.' % (
inputs.name, params_shape))
# Allocate parameters for the beta and gamma of the normalization.
beta, gamma = None, None
dtype = inputs.dtype.base_dtype
if param_initializers is None:
param_initializers = {}
if center:
beta_collections = utils.get_variable_collections(
variables_collections, 'beta')
beta_initializer = param_initializers.get(
'beta', init_ops.zeros_initializer())
beta = variables.model_variable('beta',
shape=params_shape,
dtype=dtype,
initializer=beta_initializer,
collections=beta_collections,
trainable=trainable)
if params_shape_broadcast:
beta = array_ops.reshape(beta, params_shape_broadcast)
if scale:
gamma_collections = utils.get_variable_collections(
variables_collections, 'gamma')
gamma_initializer = param_initializers.get(
'gamma', init_ops.ones_initializer())
gamma = variables.model_variable('gamma',
shape=params_shape,
dtype=dtype,
initializer=gamma_initializer,
collections=gamma_collections,
trainable=trainable)
if params_shape_broadcast:
gamma = array_ops.reshape(gamma, params_shape_broadcast)
# Calculate the moments (instance activations).
mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)
# Compute instance normalization.
outputs = nn.batch_normalization(
inputs, mean, variance, beta, gamma, epsilon, name='instancenorm')
if activation_fn is not None:
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
示例10: call
#.........这里部分代码省略.........
if (v is not None and
len(v.get_shape()) != ndims and
reduction_axes != list(range(ndims - 1))):
return array_ops.reshape(v, broadcast_shape)
return v
scale, offset = _broadcast(self.gamma), _broadcast(self.beta)
def _compose_transforms(scale, offset, then_scale, then_offset):
if then_scale is not None:
scale *= then_scale
offset *= then_scale
if then_offset is not None:
offset += then_offset
return (scale, offset)
# Determine a boolean value for `training`: could be True, False, or None.
training_value = utils.constant_value(training)
if training_value is not False:
if self.adjustment:
adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs))
# Adjust only during training.
adj_scale = utils.smart_cond(training,
lambda: adj_scale,
lambda: array_ops.ones_like(adj_scale))
adj_bias = utils.smart_cond(training,
lambda: adj_bias,
lambda: array_ops.zeros_like(adj_bias))
scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset)
# Some of the computations here are not necessary when training==False
# but not a constant. However, this makes the code simpler.
keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1
mean, variance = nn.moments(inputs, reduction_axes, keep_dims=keep_dims)
moving_mean = self.moving_mean
moving_variance = self.moving_variance
mean = utils.smart_cond(training,
lambda: mean,
lambda: moving_mean)
variance = utils.smart_cond(training,
lambda: variance,
lambda: moving_variance)
if self.renorm:
r, d, new_mean, new_variance = self._renorm_correction_and_moments(
mean, variance, training)
# When training, the normalized values (say, x) will be transformed as
# x * gamma + beta without renorm, and (x * r + d) * gamma + beta
# = x * (r * gamma) + (d * gamma + beta) with renorm.
r = _broadcast(array_ops.stop_gradient(r, name='renorm_r'))
d = _broadcast(array_ops.stop_gradient(d, name='renorm_d'))
scale, offset = _compose_transforms(r, d, scale, offset)
else:
new_mean, new_variance = mean, variance
if self.virtual_batch_size is not None:
# This isn't strictly correct since in ghost batch norm, you are
# supposed to sequentially update the moving_mean and moving_variance
# with each sub-batch. However, since the moving statistics are only
# used during evaluation, it is more efficient to just update in one
# step and should not make a significant difference in the result.
new_mean = math_ops.reduce_mean(new_mean,
axis=1, keep_dims=True)
new_variance = math_ops.reduce_mean(new_variance,
axis=1, keep_dims=True)
def _do_update(var, value):
return moving_averages.assign_moving_average(
var, value, self.momentum, zero_debias=False)
mean_update = utils.smart_cond(
training,
lambda: _do_update(self.moving_mean, new_mean),
lambda: self.moving_mean)
variance_update = utils.smart_cond(
training,
lambda: _do_update(self.moving_variance, new_variance),
lambda: self.moving_variance)
if context.in_graph_mode():
self.add_update(mean_update, inputs=inputs)
self.add_update(variance_update, inputs=inputs)
else:
mean, variance = self.moving_mean, self.moving_variance
outputs = nn.batch_normalization(inputs,
_broadcast(mean),
_broadcast(variance),
offset,
scale,
self.epsilon)
# If some components of the shape got lost due to adjustments, fix that.
outputs.set_shape(input_shape)
if self.virtual_batch_size is not None:
return undo_virtual_batching(outputs)
return outputs
示例11: batch_norm_mine_old
def batch_norm_mine_old(inputs,
decay=0.999,
center=True,
scale=False,
epsilon=0.001,
activation_fn=None,
param_initializers=None,
param_regularizers=None,
updates_collections=ops.GraphKeys.UPDATE_OPS,
is_training=True,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
batch_weights=None,
fused=False,
data_format=DATA_FORMAT_NHWC,
zero_debias_moving_mean=False,
scope=None,
renorm=False,
renorm_clipping=None,
renorm_decay=0.99):
"""
This earlier version of my modification to batch norm uses
current_mean and current_variance if is_training is True and
moving_mean and moving_variance otherwise. This was leading a large divergence between
the results depending upon whether the is_training set to True or not.
I think ideally it should always use moving_mean and moving_variance. batch_norm_mine
does this.
Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
copy of tensorflow.contrib.layers
Args:
inputs: A tensor with 2 or more dimensions, where the first dimension has
`batch_size`. The normalization is over all but the last dimension if
`data_format` is `NHWC` and the second dimension if `data_format` is
`NCHW`.
decay: Decay for the moving average. Reasonable values for `decay` are close
to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc.
Lower `decay` value (recommend trying `decay`=0.9) if model experiences
reasonably good training performance but poor validation and/or test
performance. Try zero_debias_moving_mean=True for improved stability.
center: If True, add offset of `beta` to normalized tensor. If False, `beta`
is ignored.
scale: If True, multiply by `gamma`. If False, `gamma` is
not used. When the next layer is linear (also e.g. `nn.relu`), this can be
disabled since the scaling can be done by the next layer.
epsilon: Small float added to variance to avoid dividing by zero.
activation_fn: Activation function, default set to None to skip it and
maintain a linear activation.
param_initializers: Optional initializers for beta, gamma, moving mean and
moving variance.
param_regularizers: Optional regularizer for beta and gamma.
updates_collections: Collections to collect the update ops for computation.
The updates_ops need to be executed with the train_op.
If None, a control dependency would be added to make sure the updates are
computed in place.
is_training: Whether or not the layer is in training mode. In training mode
it would accumulate the statistics of the moments into `moving_mean` and
`moving_variance` using an exponential moving average with the given
`decay`. When it is not in training mode then it would use the values of
the `moving_mean` and the `moving_variance`.
reuse: Whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: Optional collections for the variables.
outputs_collections: Collections to add the outputs.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
batch_weights: An optional tensor of shape `[batch_size]`,
containing a frequency weight for each batch item. If present,
then the batch normalization uses weighted mean and
variance. (This can be used to correct for bias in training
example selection.)
fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise.
data_format: A string. `NHWC` (default) and `NCHW` are supported.
zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new
pair of variables 'moving_mean/biased' and 'moving_mean/local_step'.
scope: Optional scope for `variable_scope`.
renorm: Whether to use Batch Renormalization
(https://arxiv.org/abs/1702.03275). This adds extra variables during
training. The inference is the same for either value of this parameter.
renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
scalar `Tensors` used to clip the renorm correction. The correction
`(r, d)` is used as `corrected_value = normalized_value * r + d`, with
`r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
dmax are set to inf, 0, inf, respectively.
renorm_decay: Momentum used to update the moving means and standard
deviations with renorm. Unlike `momentum`, this affects training
and should be neither too small (which would add noise) nor too large
(which would give stale estimates). Note that `decay` is still applied
to get the means and variances for inference.
Returns:
A `Tensor` representing the output of the operation.
Raises:
ValueError: If `batch_weights` is not None and `fused` is True.
ValueError: If `param_regularizers` is not None and `fused` is True.
ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
#.........这里部分代码省略.........
示例12: call
#.........这里部分代码省略.........
inverse_permutation = [x[1] for x in
sorted(zip(permutation, range(ndims)))]
inputs = array_ops.transpose(inputs, perm=permutation)
# Combine the axis and num_virtual_batch dimension in order to take
# advantage of fused batch normalization
combined_dim = expanded_shape[1] * expanded_shape[axis]
perm_shape = [-1] + inputs.shape.as_list()[1:]
combined_shape = (perm_shape[:axis - 1] +
[combined_dim] +
perm_shape[axis + 1:])
inputs = array_ops.reshape(inputs, combined_shape)
# After the above reshape, the batch norm axis is the original self.axis
# Undoes the reshaping and transposing tricks done above
def undo_virtual_batching(outputs):
outputs = array_ops.reshape(outputs, perm_shape)
outputs = array_ops.transpose(outputs, perm=inverse_permutation)
outputs = array_ops.reshape(outputs, original_shape)
return outputs
if self.fused:
outputs = self._fused_batch_norm(inputs, training=training)
if self.num_virtual_batches > 1:
return undo_virtual_batching(outputs)
return outputs
# First, compute the axes along which to reduce the mean / variance,
# as well as the broadcast shape to be used for all parameters.
input_shape = inputs.get_shape()
ndim = len(input_shape)
reduction_axes = list(range(len(input_shape)))
del reduction_axes[self.axis]
broadcast_shape = [1] * len(input_shape)
broadcast_shape[self.axis] = input_shape[self.axis].value
# Determines whether broadcasting is needed.
needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])
scale, offset = self.gamma, self.beta
# Determine a boolean value for `training`: could be True, False, or None.
training_value = utils.constant_value(training)
if training_value is not False:
# Some of the computations here are not necessary when training==False
# but not a constant. However, this makes the code simpler.
mean, variance = nn.moments(inputs, reduction_axes)
mean = _smart_select(training,
lambda: mean,
lambda: self.moving_mean)
variance = _smart_select(training,
lambda: variance,
lambda: self.moving_variance)
if self.renorm:
r, d, new_mean, new_variance = self._renorm_correction_and_moments(
mean, variance, training)
# When training, the normalized values (say, x) will be transformed as
# x * gamma + beta without renorm, and (x * r + d) * gamma + beta
# = x * (r * gamma) + (d * gamma + beta) with renorm.
scale = array_ops.stop_gradient(r, name='renorm_r')
offset = array_ops.stop_gradient(d, name='renorm_d')
if self.gamma is not None:
scale *= self.gamma
offset *= self.gamma
if self.beta is not None:
offset += self.beta
else:
new_mean, new_variance = mean, variance
# Update moving averages when training, and prevent updates otherwise.
decay = _smart_select(training, lambda: self.momentum, lambda: 1.)
mean_update = moving_averages.assign_moving_average(
self.moving_mean, new_mean, decay, zero_debias=False)
variance_update = moving_averages.assign_moving_average(
self.moving_variance, new_variance, decay, zero_debias=False)
if context.in_graph_mode():
self.add_update(mean_update, inputs=inputs)
self.add_update(variance_update, inputs=inputs)
else:
mean, variance = self.moving_mean, self.moving_variance
def _broadcast(v):
if needs_broadcasting and v is not None:
# In this case we must explicitly broadcast all parameters.
return array_ops.reshape(v, broadcast_shape)
return v
outputs = nn.batch_normalization(inputs,
_broadcast(mean),
_broadcast(variance),
_broadcast(offset),
_broadcast(scale),
self.epsilon)
if self.num_virtual_batches > 1:
return undo_virtual_batching(outputs)
return outputs
示例13: batch_norm
def batch_norm(inputs,
decay=0.999,
center=True,
scale=False,
epsilon=0.001,
updates_collections=ops.GraphKeys.UPDATE_OPS,
is_training=True,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
"""Code modification of tensorflow/contrib/layers/python/layers/layers.py
"""
with variable_scope.variable_op_scope([inputs],
scope, 'BatchNorm', reuse=reuse) as sc:
inputs = ops.convert_to_tensor(inputs)
inputs_shape = inputs.get_shape()
inputs_rank = inputs_shape.ndims
if inputs_rank is None:
raise ValueError('Inputs %s has undefined rank.' % inputs.name)
dtype = inputs.dtype.base_dtype
axis = list(range(inputs_rank - 1))
params_shape = inputs_shape[-1:]
if not params_shape.is_fully_defined():
raise ValueError('Inputs %s has undefined last dimension %s.' % (
inputs.name, params_shape))
# Allocate parameters for the beta and gamma of the normalization.
beta, gamma = None, None
if center:
beta_collections = utils.get_variable_collections(variables_collections,
'beta')
beta = variables.model_variable('beta',
shape=params_shape,
dtype=dtype,
initializer=init_ops.zeros_initializer,
collections=beta_collections,
trainable=trainable)
if scale:
gamma_collections = utils.get_variable_collections(variables_collections,
'gamma')
gamma = variables.model_variable('gamma',
shape=params_shape,
dtype=dtype,
initializer=init_ops.ones_initializer,
collections=gamma_collections,
trainable=trainable)
# Create moving_mean and moving_variance variables and add them to the
# appropiate collections.
moving_mean_collections = utils.get_variable_collections(
variables_collections, 'moving_mean')
moving_mean = variables.model_variable(
'moving_mean',
shape=params_shape,
dtype=dtype,
initializer=init_ops.zeros_initializer,
trainable=False,
collections=moving_mean_collections)
moving_variance_collections = utils.get_variable_collections(
variables_collections, 'moving_variance')
moving_variance = variables.model_variable(
'moving_variance',
shape=params_shape,
dtype=dtype,
initializer=init_ops.ones_initializer,
trainable=False,
collections=moving_variance_collections)
# Calculate the moments based on the individual batch.
mean, variance = nn.moments(inputs, axis, shift=moving_mean)
# Update the moving_mean and moving_variance moments.
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
if updates_collections is None:
# Make sure the updates are computed here.
with ops.control_dependencies([update_moving_mean,
update_moving_variance]):
outputs = nn.batch_normalization(
inputs, mean, variance, beta, gamma, epsilon)
else:
# Collect the updates to be computed later.
ops.add_to_collections(updates_collections, update_moving_mean)
ops.add_to_collections(updates_collections, update_moving_variance)
outputs = nn.batch_normalization(
inputs, mean, variance, beta, gamma, epsilon)
test_outputs = nn.batch_normalization(
inputs, moving_mean, moving_variance, beta, gamma, epsilon)
outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs)
outputs.set_shape(inputs_shape)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
示例14: call
def call(self, inputs, training=False):
# First, compute the axes along which to reduce the mean / variance,
# as well as the broadcast shape to be used for all parameters.
input_shape = inputs.get_shape()
ndim = len(input_shape)
reduction_axes = list(range(len(input_shape)))
del reduction_axes[self.axis]
broadcast_shape = [1] * len(input_shape)
broadcast_shape[self.axis] = input_shape[self.axis].value
# Determines whether broadcasting is needed.
needs_broadcasting = (sorted(reduction_axes) != range(ndim)[:-1])
# Determine boolean training boolean value. May be False, True, None.
# If None, it is assumed that `training` is a variable to be used in `cond`.
if isinstance(training, bool):
training_bool = training
else:
try:
training_bool = tensor_util.constant_value(training)
except TypeError:
training_bool = None
# Obtain current current batch mean, variance, if necessary.
if training_bool is not False:
# Use a copy of moving_mean as a shift to compute more reliable moments.
shift = math_ops.add(self.moving_mean, 0)
if needs_broadcasting:
shift = array_ops.reshape(shift, broadcast_shape)
broadcast_mean, broadcast_variance = nn.moments(
inputs, reduction_axes, shift=shift, keep_dims=True)
mean = array_ops.reshape(broadcast_mean, [-1])
variance = array_ops.reshape(broadcast_variance, [-1])
else:
mean, variance = nn.moments(inputs, reduction_axes, shift=shift)
# Prepare updates if necessary.
if training_bool is not False and not self.updates:
mean_update = moving_averages.assign_moving_average(
self.moving_mean, mean, self.momentum, zero_debias=False)
variance_update = moving_averages.assign_moving_average(
self.moving_variance, variance, self.momentum, zero_debias=False)
# In the future this should be refactored into a self.add_update
# methods in order to allow for instance-based BN layer sharing
# across unrelated input streams (e.g. like in Keras).
self.updates.append(mean_update)
self.updates.append(variance_update)
# Normalize batch.
if needs_broadcasting:
# In this case we must explictly broadcast all parameters.
broadcast_moving_mean = array_ops.reshape(self.moving_mean,
broadcast_shape)
broadcast_moving_variance = array_ops.reshape(self.moving_variance,
broadcast_shape)
if self.center:
broadcast_beta = array_ops.reshape(self.beta, broadcast_shape)
else:
broadcast_beta = None
if self.scale:
broadcast_gamma = array_ops.reshape(self.gamma, broadcast_shape)
else:
broadcast_gamma = None
if training_bool is not False:
normed_inputs_training = nn.batch_normalization(inputs,
broadcast_mean,
broadcast_variance,
broadcast_beta,
broadcast_gamma,
self.epsilon)
normed_inputs = nn.batch_normalization(inputs,
broadcast_moving_mean,
broadcast_moving_variance,
broadcast_beta,
broadcast_gamma,
self.epsilon)
else:
# No need for broadcasting.
if training_bool is not False:
normed_inputs_training = nn.batch_normalization(
inputs,
mean,
variance,
self.beta if self.center else None,
self.gamma if self.scale else None,
self.epsilon)
normed_inputs = nn.batch_normalization(inputs,
self.moving_mean,
self.moving_variance,
self.beta if self.center else None,
self.gamma if self.scale else None,
self.epsilon)
# Return the proper output depending on the boolean training phase.
if training_bool is True:
return normed_inputs_training
if training_bool is False:
return normed_inputs
return control_flow_ops.cond(training,
#.........这里部分代码省略.........