本文整理汇总了Python中tensorflow.python.ops.control_flow_ops.tuple函数的典型用法代码示例。如果您正苦于以下问题:Python tuple函数的具体用法?Python tuple怎么用?Python tuple使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tuple函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testIndexedSlices
def testIndexedSlices(self):
for v1_first in [True, False]:
with self.test_session():
v1 = tf.Variable(np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32))
v1_at_1 = tf.IndexedSlices(
control_flow_ops.with_dependencies([v1.initializer], v1.ref()), tf.constant([1])
)
v2 = tf.Variable(np.array([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]]).astype(np.float32))
v2_at_1 = tf.IndexedSlices(
control_flow_ops.with_dependencies([v2.initializer], v2.ref()), tf.constant([1])
)
st1, st2 = control_flow_ops.tuple([v1_at_1, v2_at_1])
g1 = tf.gather(st1.values, st1.indices)
g2 = tf.gather(st2.values, st2.indices)
# v1 is not initialized.
with self.assertRaisesOpError("Attempting to use uninitialized value"):
v1.eval()
# v2 is not initialized.
with self.assertRaisesOpError("Attempting to use uninitialized value"):
v2.eval()
if v1_first:
# Getting g1 initializes v2.
self.assertAllClose([[10.0, 11.0]], g1.eval())
self.assertAllClose([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]], v2.eval())
else:
# Getting g2 initializes v1.
self.assertAllClose([[10.1, 11.1]], g2.eval())
self.assertAllClose([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]], v1.eval())
示例2: compute_gradients
def compute_gradients(self, loss, var_list=None,
gate_gradients=GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
grad_loss=None):
"""Compute gradients of `loss` for the variables in `var_list`.
This is the first part of `minimize()`. It returns a list
of (gradient, variable) pairs where "gradient" is the gradient
for "variable". Note that "gradient" can be a `Tensor`, an
`IndexedSlices`, or `None` if there is no gradient for the
given variable.
Args:
loss: A Tensor containing the value to minimize.
var_list: Optional list of `tf.Variable` to update to minimize
`loss`. Defaults to the list of variables collected in the graph
under the key `GraphKey.TRAINABLE_VARIABLES`.
gate_gradients: How to gate the computation of gradients. Can be
`GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
aggregation_method: Specifies the method used to combine gradient terms.
Valid values are defined in the class `AggregationMethod`.
colocate_gradients_with_ops: If True, try colocating gradients with
the corresponding op.
grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
Returns:
A list of (gradient, variable) pairs. Variable is always present, but
gradient can be `None`.
Raises:
TypeError: If `var_list` contains anything else than `Variable` objects.
ValueError: If some arguments are invalid.
"""
if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
Optimizer.GATE_GRAPH]:
raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
"Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" %
gate_gradients)
self._assert_valid_dtypes([loss])
if grad_loss is not None:
self._assert_valid_dtypes([grad_loss])
if var_list is None:
var_list = (
variables.trainable_variables() +
ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
processors = [_get_processor(v) for v in var_list]
if not var_list:
raise ValueError("No variables to optimize.")
var_refs = [p.target() for p in processors]
grads = gradients.gradients(
loss, var_refs, grad_ys=grad_loss,
gate_gradients=(gate_gradients == Optimizer.GATE_OP),
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops)
if gate_gradients == Optimizer.GATE_GRAPH:
grads = control_flow_ops.tuple(grads)
grads_and_vars = list(zip(grads, var_list))
self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
return grads_and_vars
示例3: compute_gradients
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP,
aggregation_method=None, colocate_gradients_with_ops=False):
""""""
# Error checking
if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
Optimizer.GATE_GRAPH]:
raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " +
"Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients)
self._assert_valid_dtypes([loss])
if var_list is None:
var_list = variables.trainable_variables()
for x_tm1 in var_list:
if not isinstance(x_tm1, variables.Variable):
raise TypeError("Argument is not a tf.Variable: %s" % x_tm1)
if not var_list:
raise ValueError("No variables to optimize")
# The actual stuff
var_refs = [x_tm1.ref() for x_tm1 in var_list]
grads = gradients.gradients(loss, var_refs,
gate_gradients=(gate_gradients == Optimizer.GATE_OP),
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops)
if gate_gradients == Optimizer.GATE_GRAPH:
grads = control_flow_ops.tuple(grads)
grads_and_vars = list(zip(grads, var_list))
self._assert_valid_dtypes([x_tm1 for g_t, x_tm1 in grads_and_vars if g_t is not None])
return grads_and_vars
示例4: testTensors
def testTensors(self):
for v1_first in [True, False]:
with self.test_session():
v1 = tf.Variable([1.0])
add1 = tf.add(
control_flow_ops.with_dependencies([v1.initializer], v1.ref()),
2.0)
v2 = tf.Variable([10.0])
add2 = tf.add(
control_flow_ops.with_dependencies([v2.initializer], v2.ref()),
20.0)
t1, _, t2 = control_flow_ops.tuple([add1, None, add2])
# v1 is not initialized.
with self.assertRaisesOpError("Attempting to use uninitialized value"):
v1.eval()
# v2 is not initialized.
with self.assertRaisesOpError("Attempting to use uninitialized value"):
v2.eval()
if v1_first:
# Getting t1 initializes v2.
self.assertAllClose([3.0], t1.eval())
self.assertAllClose([10.0], v2.eval())
else:
# Getting t2 initializes v1.
self.assertAllClose([30.0], t2.eval())
self.assertAllClose([1.0], v1.eval())
示例5: grad_fn
def grad_fn(inputs, variables, outputs, output_grads):
"""Recompute outputs for gradient computation."""
del outputs
# Recompute outputs
with framework_ops.control_dependencies(output_grads):
if use_data_dep_:
inputs = _force_data_dependency(output_grads, inputs)
with contrib_framework_ops.arg_scope(cached_arg_scope[0]):
with variable_scope.variable_scope(cached_vs[0], reuse=True):
outputs = fn(*inputs)
if not (isinstance(outputs, list) or isinstance(outputs, tuple)):
outputs = [outputs]
outputs = list(outputs)
grads = gradients_impl.gradients(outputs, inputs + variables, output_grads)
if tupleize_grads:
if use_data_dep_:
grads = _tuple_with_data_dep(grads)
else:
grads = control_flow_ops.tuple(grads)
grad_inputs = grads[:len(inputs)]
grad_vars = grads[len(inputs):]
return grad_inputs, grad_vars
示例6: _rev_layer_forward
def _rev_layer_forward(xs, f, g, f_side_input, g_side_input,
gate_outputs=False):
"""Forward for 1 reversible layer."""
x1, x2 = xs
y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2))
y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1))
if gate_outputs:
return control_flow_ops.tuple([y1, y2])
else:
return (y1, y2)
示例7: _recomputing_grad_fn
def _recomputing_grad_fn(compute_fn,
original_args,
original_vars,
output_grads,
grad_fn_variables,
use_data_dep,
tupleize_grads,
arg_scope,
var_scope,
has_is_recompute_kwarg):
"""Grad fn for recompute_grad."""
variables = grad_fn_variables or []
# Identity ops around the inputs ensures correct gradient graph-walking.
inputs = [array_ops.identity(x) for x in list(original_args)]
# Recompute outputs
# Use a control dependency to ensure that the recompute is not eliminated by
# CSE and that it happens on the backwards pass.
ctrl_dep_grads = [g for g in output_grads if g is not None]
with framework_ops.control_dependencies(ctrl_dep_grads):
if use_data_dep:
inputs = _force_data_dependency(output_grads, inputs)
# Re-enter scopes
with contrib_framework_ops.arg_scope(arg_scope):
with variable_scope.variable_scope(var_scope, reuse=True):
# Re-call the function and ensure that the touched variables are the
# same as in the first call.
with backprop.GradientTape() as tape:
fn_kwargs = {}
if has_is_recompute_kwarg:
fn_kwargs["is_recomputing"] = True
outputs = compute_fn(*inputs, **fn_kwargs)
recompute_vars = set(tape.watched_variables())
if original_vars != recompute_vars:
raise ValueError(_WRONG_VARS_ERR)
if not isinstance(outputs, (list, tuple)):
outputs = [outputs]
outputs = list(outputs)
# Compute gradients
grads = gradients_impl.gradients(outputs, inputs + variables,
output_grads)
if tupleize_grads:
if use_data_dep:
grads = _tuple_with_data_dep(grads)
else:
grads = control_flow_ops.tuple(grads)
grad_inputs = grads[:len(inputs)]
grad_vars = grads[len(inputs):]
return grad_inputs, grad_vars
示例8: compute_gradients
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP,
aggregation_method=None):
"""Compute gradients of `loss` for the variables in `var_list`.
This is the first part of `minimize()`. It returns a list
of (gradient, variable) pairs where "gradient" is the gradient
for "variable". Note that "gradient" can be a `Tensor`, an
`IndexedSlices`, or `None` if there is no gradient for the
given variable.
Args:
loss: A Tensor containing the value to minimize.
var_list: Optional list of tf.Variable to update to minimize
`loss`. Defaults to the list of variables collected in the graph
under the key `GraphKey.TRAINABLE_VARIABLES`.
gate_gradients: How to gate the computation of gradients. Can be
`GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
aggregation_method: Specifies the method used to combine gradient terms.
Valid values are defined in the class `AggregationMethod`.
Returns:
A list of (gradient, variable) pairs.
Raises:
TypeError: If `var_list` contains anything else than `Variable` objects.
ValueError: If some arguments are invalid.
"""
if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
Optimizer.GATE_GRAPH]:
raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
"Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" %
gate_gradients)
self._assert_valid_dtypes([loss])
if var_list is None:
var_list = variables.trainable_variables()
for var in var_list:
if not isinstance(var, variables.Variable):
raise TypeError("Argument is not a tf.Variable: %s" % var)
if not var_list:
raise ValueError("No variables to optimize")
grads = gradients.gradients(
loss, var_list, gate_gradients=(gate_gradients == Optimizer.GATE_OP),
aggregation_method=aggregation_method)
if gate_gradients == Optimizer.GATE_GRAPH:
grads = control_flow_ops.tuple(grads)
grads_and_vars = list(zip(grads, var_list))
self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
return grads_and_vars
示例9: _rev_layer_backward
def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars,
g_side_input):
"""Backprop for 1 layer."""
y1, y2 = ys
grad_y1, grad_y2 = grad_ys
# Reconstruct intermediates and inputs (x1, x2)
# stop_gradients required on fn inputs to prevent infinite recursion into this
# grad function on the calls to gradients.
y1_stop = array_ops.stop_gradient(y1)
g_side_input = [array_ops.stop_gradient(t) for t in g_side_input]
gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop)
x2 = y2 - gy1
x2_stop = array_ops.stop_gradient(x2)
f_side_input = [array_ops.stop_gradient(t) for t in f_side_input]
fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop)
x1 = y1 - fx2
# Compute gradients wrt to inputs
# dL/dy2 * dG(y1)/y1
grad_gy1_y2 = gradients_impl.gradients(gy1, y1_stop, grad_y2)[0]
grad_x1 = grad_y1 + grad_gy1_y2
grad_x2 = (
gradients_impl.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 +
gradients_impl.gradients(fx2, x2_stop, grad_gy1_y2)[0])
# Compute gradients wrt to vars and side inputs in f and g
grads1 = gradients_impl.gradients(gy1, g_vars + g_side_input, grad_y2)
grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):]
grads2 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_y1)
grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):]
grads3 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_gy1_y2)
grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):]
grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2)
grad_f_side = _acc_grads(grad_f_side1, grad_f_side2)
# Put returns in a tuple to ensure a constant memory budget (i.e. don't want
# the subsequent layer to start computing and consuming memory based on a
# subset of these values).
outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side),
(grad_g_vars, grad_g_side))
tupled = control_flow_ops.tuple(nest.flatten(outputs))
return nest.pack_sequence_as(outputs, tupled)
示例10: compute_gradients
def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP):
"""Compute gradients of "loss" for the variables in "var_list".
This is the first part of minimize(). It returns a list
of (gradient, variable) pairs where "gradient" is the gradient
for "variable". Note that "gradient" can be a Tensor, a
IndexedSlices, or None if there is no gradient for the
given variable.
Args:
loss: A Tensor containing the value to minimize.
var_list: Optional list of variables.Variable to update to minimize
"loss". Defaults to the list of variables collected in the graph
under the key GraphKey.TRAINABLE_VARIABLES.
gate_gradients: How to gate the computation of gradients. Can be
GATE_NONE, GATE_OP, or GATE_GRAPH.
Returns:
A list of (gradient, variable) pairs.
Raises:
TypeError: If var_list contains anything else than variables.Variable.
ValueError: If some arguments are invalid.
"""
if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
Optimizer.GATE_GRAPH]:
raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
"Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" %
gate_gradients)
self._assert_valid_dtypes([loss])
if var_list is None:
var_list = variables.trainable_variables()
for var in var_list:
if not isinstance(var, variables.Variable):
raise TypeError("Argument is not a variables.Variable: %s" % var)
grads = gradients.gradients(
loss, var_list, gate_gradients=(gate_gradients == Optimizer.GATE_OP))
if gate_gradients == Optimizer.GATE_GRAPH:
grads = control_flow_ops.tuple(grads)
grads_and_vars = list(zip(grads, var_list))
self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
return grads_and_vars
示例11: grad_fn
def grad_fn(*output_grads, **kwargs):
"""Recompute outputs for gradient computation."""
variables = []
if original_vars:
variables = kwargs["variables"]
if set(variables) != original_vars:
raise ValueError(_WRONG_VARS_ERR)
del kwargs
inputs = list(args)
# Recompute outputs
with framework_ops.control_dependencies(output_grads):
if use_data_dep_:
inputs = _force_data_dependency(output_grads, inputs)
with contrib_framework_ops.arg_scope(arg_scope):
with variable_scope.variable_scope(vs, reuse=True):
with backprop.GradientTape() as tape:
fn_kwargs = {}
if has_is_recompute_kwarg:
fn_kwargs["is_recomputing"] = True
outputs = fn(*inputs, **fn_kwargs)
recompute_vars = set(tape.watched_variables())
if original_vars != recompute_vars:
raise ValueError(_WRONG_VARS_ERR)
if not (isinstance(outputs, list) or isinstance(outputs, tuple)):
outputs = [outputs]
outputs = list(outputs)
grads = gradients_impl.gradients(outputs, inputs + variables,
output_grads)
if tupleize_grads:
if use_data_dep_:
grads = _tuple_with_data_dep(grads)
else:
grads = control_flow_ops.tuple(grads)
grad_inputs = grads[:len(inputs)]
grad_vars = grads[len(inputs):]
return grad_inputs, grad_vars
示例12: _grad_fn
def _grad_fn(output_grads, variables=None):
"""Recompute outputs for gradient computation."""
variables = variables or []
if original_vars:
assert variables, ("Fn created variables but the variables were not "
"passed to the gradient fn.")
if set(variables) != original_vars:
raise ValueError(_WRONG_VARS_ERR)
inputs = [array_ops.identity(x) for x in list(args)]
# Recompute outputs
with framework_ops.control_dependencies(output_grads):
if use_data_dep_:
inputs = _force_data_dependency(output_grads, inputs)
with contrib_framework_ops.arg_scope(arg_scope):
with variable_scope.variable_scope(vs, reuse=True):
with backprop.GradientTape() as tape:
fn_kwargs = {}
if has_is_recompute_kwarg:
fn_kwargs["is_recomputing"] = True
outputs = fn(*inputs, **fn_kwargs)
recompute_vars = set(tape.watched_variables())
if original_vars != recompute_vars:
raise ValueError(_WRONG_VARS_ERR)
if not isinstance(outputs, (list, tuple)):
outputs = [outputs]
outputs = list(outputs)
grads = gradients_impl.gradients(outputs, inputs + variables,
output_grads)
if tupleize_grads:
if use_data_dep_:
grads = _tuple_with_data_dep(grads)
else:
grads = control_flow_ops.tuple(grads)
grad_inputs = grads[:len(inputs)]
grad_vars = grads[len(inputs):]
return grad_inputs, grad_vars
示例13: _update
def _update(self, var, fn, *args, **kwargs):
# TODO(jhseu): Consider supporting grouped==False.
assert isinstance(var, values.TPUMirroredVariable)
if values._enclosing_tpu_context() is not None: # pylint: disable=protected-access
return fn(var, *args, **kwargs)
# Otherwise, we revert to MirroredStrategy behavior and update each variable
# directly.
updates = {}
for d, v in var._index.items(): # pylint: disable=protected-access
name = "update_%d" % self._device_index.get(d)
with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
# If args and kwargs are not mirrored, the value is returned as is.
updates[d] = fn(v,
*values.select_device_mirrored(d, args),
**values.select_device_mirrored(d, kwargs))
# Make a single control dependency to keep the variables mirrored. If one
# assignment is fetched, then run all assignments.
sorted_keys = sorted(updates.keys())
update_tuple = control_flow_ops.tuple([updates[d] for d in sorted_keys])
for i, d in enumerate(sorted_keys):
updates[d] = update_tuple[i]
return values.regroup(updates, values.Mirrored)
示例14: compute_gradients
def compute_gradients(self, loss, var_list=None,
gate_gradients=GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False,
grad_loss=None):
"""Compute gradients of `loss` for the variables in `var_list`.
This is the first part of `minimize()`. It returns a list
of (gradient, variable) pairs where "gradient" is the gradient
for "variable". Note that "gradient" can be a `Tensor`, an
`IndexedSlices`, or `None` if there is no gradient for the
given variable.
Args:
loss: A Tensor containing the value to minimize or a callable taking
no arguments which returns the value to minimize. When eager execution
is enabled it must be a callable.
var_list: Optional list or tuple of `tf.Variable` to update to minimize
`loss`. Defaults to the list of variables collected in the graph
under the key `GraphKeys.TRAINABLE_VARIABLES`.
gate_gradients: How to gate the computation of gradients. Can be
`GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
aggregation_method: Specifies the method used to combine gradient terms.
Valid values are defined in the class `AggregationMethod`.
colocate_gradients_with_ops: If True, try colocating gradients with
the corresponding op.
grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
Returns:
A list of (gradient, variable) pairs. Variable is always present, but
gradient can be `None`.
Raises:
TypeError: If `var_list` contains anything else than `Variable` objects.
ValueError: If some arguments are invalid.
RuntimeError: If called with eager execution enabled and `loss` is
not callable.
@compatibility(eager)
When eager execution is enabled, `gate_gradients`, `aggregation_method`,
and `colocate_gradients_with_ops` are ignored.
@end_compatibility
"""
if callable(loss):
with backprop.GradientTape() as tape:
if var_list is not None:
tape.watch(var_list)
loss_value = loss()
if var_list is None:
var_list = tape.watched_variables()
# TODO(jhseu): Figure out why GradientTape's gradients don't require loss
# to be executed.
with ops.control_dependencies([loss_value]):
grads = tape.gradient(loss_value, var_list, grad_loss)
return list(zip(grads, var_list))
# Non-callable/Tensor loss case
if context.executing_eagerly():
raise RuntimeError(
"`loss` passed to Optimizer.compute_gradients should "
"be a function when eager execution is enabled.")
if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
Optimizer.GATE_GRAPH]:
raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
"Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" %
gate_gradients)
self._assert_valid_dtypes([loss])
if grad_loss is not None:
self._assert_valid_dtypes([grad_loss])
if var_list is None:
var_list = (
variables.trainable_variables() +
ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
else:
var_list = nest.flatten(var_list)
# pylint: disable=protected-access
var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
# pylint: enable=protected-access
processors = [_get_processor(v) for v in var_list]
if not var_list:
raise ValueError("No variables to optimize.")
var_refs = [p.target() for p in processors]
grads = gradients.gradients(
loss, var_refs, grad_ys=grad_loss,
gate_gradients=(gate_gradients == Optimizer.GATE_OP),
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops)
if gate_gradients == Optimizer.GATE_GRAPH:
grads = control_flow_ops.tuple(grads)
grads_and_vars = list(zip(grads, var_list))
self._assert_valid_dtypes(
[v for g, v in grads_and_vars
if g is not None and v.dtype != dtypes.resource])
return grads_and_vars
示例15: gradients
def gradients(ys,
xs,
grad_ys=None,
name="gradients",
colocate_gradients_with_ops=False,
gate_gradients=False,
aggregation_method=None):
"""Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.
`ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys`
is a list of `Tensor`, holding the gradients received by the
`ys`. The list must be the same length as `ys`.
`gradients()` adds ops to the graph to output the partial
derivatives of `ys` with respect to `xs`. It returns a list of
`Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
for y in `ys`.
`grad_ys` is a list of tensors of the same length as `ys` that holds
the initial gradients for each y in `ys`. When `grad_ys` is None,
we fill in a tensor of '1's of the shape of y for each y in `ys`. A
user can provide their own initial `grad_ys` to compute the
derivatives using a different initial gradient for each y (e.g., if
one wanted to weight the gradient differently for each value in
each y).
Args:
ys: A `Tensor` or list of tensors to be differentiated.
xs: A `Tensor` or list of tensors to be used for differentiation.
grad_ys: Optional. A `Tensor` or list of tensors the same size as
`ys` and holding the gradients computed for each y in `ys`.
name: Optional name to use for grouping all the gradient ops together.
defaults to 'gradients'.
colocate_gradients_with_ops: If True, try colocating gradients with
the corresponding op.
gate_gradients: If True, add a tuple around the gradients returned
for an operations. This avoids some race conditions.
aggregation_method: Specifies the method used to combine gradient terms.
Accepted values are constants defined in the class `AggregationMethod`.
Returns:
A list of `sum(dy/dx)` for each x in `xs`.
Raises:
LookupError: if one of the operations between `x` and `y` does not
have a registered gradient function.
ValueError: if the arguments are invalid.
"""
ys = _AsList(ys)
xs = _AsList(xs)
if grad_ys is None:
grad_ys = [None] * len(ys)
else:
grad_ys = _AsList(grad_ys)
with ops.name_scope(name, "gradients", ys + xs + grad_ys):
ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)
# The approach we take here is as follows: Create a list of all ops in the
# subgraph between the ys and xs. Visit these ops in reverse order of ids
# to ensure that when we visit an op the gradients w.r.t its outputs have
# been collected. Then aggregate these gradients if needed, call the op's
# gradient function, and add the generated gradients to the gradients for
# its input.
# Initialize the pending count for ops in the connected subgraph from ys
# to the xs.
to_ops = [t.op for t in ys]
from_ops = [t.op for t in xs]
pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops,
from_ops,
colocate_gradients_with_ops)
# Iterate over the collected ops.
#
# grads: op => list of gradients received on each output endpoint of the
# op. The gradients for each endpoint are initially collected as a list.
# When it is time to call the op's gradient function, for each endpoint we
# aggregate the list of received gradients into a Add() Operation if there
# is more than one.
grads = {}
# Add the initial gradients for the ys.
for y, grad_y in zip(ys, grad_ys):
_SetGrad(grads, y, grad_y)
# Initialize queue with to_ops.
queue = collections.deque()
# Add the ops in 'to_ops' into the queue.
to_ops_set = set()
for op in to_ops:
# 'ready' handles the case where one output gradient relies on
# another output's gradient.
# pylint: disable=protected-access
ready = (pending_count[op._id] == 0)
if ready and op._id not in to_ops_set:
to_ops_set.add(op._id)
#.........这里部分代码省略.........