本文整理汇总了Python中tensorflow.python.ops.array_ops.gather函数的典型用法代码示例。如果您正苦于以下问题:Python gather函数的具体用法?Python gather怎么用?Python gather使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了gather函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _apply_sparse_shared
def _apply_sparse_shared(self,
grad,
var,
indices,
scatter_update,
scatter_sub):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m")
m_t = scatter_update(m, indices,
beta1_t * array_ops.gather(m, indices) +
(1 - beta1_t) * grad)
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v")
v_t = scatter_update(v, indices,
beta2_t * array_ops.gather(v, indices) +
(1 - beta2_t) * math_ops.square(grad))
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
m_t_slice = array_ops.gather(m_t, indices)
v_t_slice = array_ops.gather(v_t, indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
var_update = scatter_sub(var, indices,
lr * m_t_slice / denominator_slice)
return control_flow_ops.group(var_update, m_t, v_t)
示例2: quantiles_ready
def quantiles_ready():
"""The subgraph for when the quantiles are ready."""
quantized_feature = quantile_ops.quantiles([sparse_column_values], [],
[quantile_buckets], [])
quantized_feature = math_ops.cast(quantized_feature[0], dtypes.int64)
quantized_feature = array_ops.reshape(quantized_feature, [-1])
example_indices, _ = array_ops.split(
sparse_column_indices, num_or_size_splits=2, axis=1)
example_indices = array_ops.squeeze(example_indices, [1])
filtered_gradients = array_ops.gather(gradients, example_indices)
filtered_hessians = array_ops.gather(hessians, example_indices)
filtered_partition_ids = array_ops.gather(example_partition_ids,
example_indices)
unique_partitions, mapped_partitions = array_ops.unique(
example_partition_ids)
# Compute aggregate stats for each partition.
per_partition_gradients = math_ops.unsorted_segment_sum(
gradients, mapped_partitions, array_ops.size(unique_partitions))
per_partition_hessians = math_ops.unsorted_segment_sum(
hessians, mapped_partitions, array_ops.size(unique_partitions))
# Prepend a bias feature per partition that accumulates the stats for all
# examples in that partition.
bias_feature_ids = array_ops.fill(
array_ops.shape(unique_partitions), _BIAS_FEATURE_ID)
bias_feature_ids = math_ops.cast(bias_feature_ids, dtypes.int64)
partition_ids = array_ops.concat(
[unique_partitions, filtered_partition_ids], 0)
filtered_gradients = array_ops.concat(
[per_partition_gradients, filtered_gradients], 0)
filtered_hessians = array_ops.concat(
[per_partition_hessians, filtered_hessians], 0)
bucket_ids = array_ops.concat([bias_feature_ids, quantized_feature], 0)
return partition_ids, bucket_ids, filtered_gradients, filtered_hessians
示例3: _check_shapes_dynamic
def _check_shapes_dynamic(self, operator, v, diag):
"""Return (v, diag) with Assert dependencies, which check shape."""
checks = []
with ops.op_scope([operator, v, diag], 'check_shapes'):
s_v = array_ops.shape(v)
r_op = operator.rank()
r_v = array_ops.rank(v)
if diag is not None:
s_d = array_ops.shape(diag)
r_d = array_ops.rank(diag)
# Check tensor rank.
checks.append(check_ops.assert_rank(v, r_op))
if diag is not None:
checks.append(check_ops.assert_rank(diag, r_op - 1))
# Check batch shape
checks.append(check_ops.assert_equal(
operator.batch_shape(), array_ops.slice(s_v, [0], [r_v - 2])))
if diag is not None:
checks.append(check_ops.assert_equal(
operator.batch_shape(), array_ops.slice(s_d, [0], [r_d - 1])))
# Check event shape
checks.append(check_ops.assert_equal(
operator.vector_space_dimension(), array_ops.gather(s_v, r_v - 2)))
if diag is not None:
checks.append(check_ops.assert_equal(
array_ops.gather(s_v, r_v - 1), array_ops.gather(s_d, r_d - 1)))
v = control_flow_ops.with_dependencies(checks, v)
if diag is not None:
diag = control_flow_ops.with_dependencies(checks, diag)
return v, diag
示例4: _apply_sparse
def _apply_sparse(self, grad, var):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m := beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t = state_ops.scatter_update(m, grad.indices,
beta1_t * array_ops.gather(m, grad.indices) +
(1 - beta1_t) * grad.values,
use_locking=self._use_locking)
# v := beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_t = state_ops.scatter_update(v, grad.indices,
beta2_t * array_ops.gather(v, grad.indices) +
(1 - beta2_t) * math_ops.square(grad.values),
use_locking=self._use_locking)
# variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))
m_t_slice = array_ops.gather(m_t, grad.indices)
v_t_slice = array_ops.gather(v_t, grad.indices)
denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
var_update = state_ops.scatter_sub(var, grad.indices,
lr * m_t_slice / denominator_slice,
use_locking=self._use_locking)
return control_flow_ops.group(var_update, m_t, v_t)
示例5: _apply_sparse_shared
def _apply_sparse_shared(self, grad, var, indices,
scatter_add, scatter_update):
beta1_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_slice = array_ops.gather(m, indices)
m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t)
with ops.control_dependencies([m_t_slice]):
m_t = scatter_update(m, indices, m_t_slice)
# u_t = max(beta2 * u, abs(g_t))
v = self.get_slot(var, "v")
v_slice = array_ops.gather(v, indices)
v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad))
with ops.control_dependencies([v_t_slice]):
v_t = scatter_update(v, indices, v_t_slice)
# theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
var_slice = -lr_t / (1 - beta1_power) * (m_t_slice /
(v_t_slice + epsilon_t))
with ops.control_dependencies([var_slice]):
var_update = scatter_add(var, indices, var_slice)
return control_flow_ops.group(*[var_update, m_t, v_t])
示例6: _apply_sparse
def _apply_sparse(self, g_t, x_tm1, prepare):
""""""
idxs, idxs_ = array_ops.unique(g_t.indices)
g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
updates = []
if self._mu > 0:
m_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', self._mu)
m_t_ = array_ops.gather(m_and_t[0], idxs)
gamma_t = ops.convert_to_tensor(self._gamma)
m_bar_t_ = (1-gamma_t)*m_t_ + gamma_t*g_t_
updates.extend(m_and_t)
else:
m_bar_t_ = g_t_
if self._ups > 0:
v_and_t = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', self._ups)
v_t_ = array_ops.gather(v_and_t[0], idxs)
eps_t = ops.convert_to_tensor(self._eps)
v_bar_t_ = math_ops.sqrt(v_t_ + eps_t)
updates.extend(v_and_t)
else:
v_bar_t_ = 1.
lr_t = ops.convert_to_tensor(self._lr)
s_t_ = lr_t * m_bar_t_ / v_bar_t_
return [[s_t_, x_tm1, idxs, g_t]] + updates
示例7: _resource_apply_sparse
def _resource_apply_sparse(self, grad, var, indices):
var_dtype = var.dtype.base_dtype
lr_t = self._decayed_lr(var_dtype)
beta_1_t = self._get_hyper('beta_1', var_dtype)
beta_2_t = self._get_hyper('beta_2', var_dtype)
local_step = math_ops.cast(self.iterations + 1, var_dtype)
beta_1_power = math_ops.pow(beta_1_t, local_step)
epsilon_t = self._get_hyper('epsilon', var_dtype)
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, 'm')
m_slice = array_ops.gather(m, indices)
m_t_slice = m_slice * beta_1_t + grad * (1 - beta_1_t)
with ops.control_dependencies([m_t_slice]):
m_t = self._resource_scatter_update(m, indices, m_t_slice)
# u_t = max(beta2 * u, abs(g_t))
v = self.get_slot(var, 'v')
v_slice = array_ops.gather(v, indices)
v_t_slice = math_ops.maximum(v_slice * beta_2_t, math_ops.abs(grad))
with ops.control_dependencies([v_t_slice]):
v_t = self._resource_scatter_update(v, indices, v_t_slice)
# theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
var_slice = -lr_t / (1 - beta_1_power) * (
m_t_slice / (v_t_slice + epsilon_t))
with ops.control_dependencies([var_slice]):
var_update = self._resource_scatter_add(var, indices, var_slice)
return control_flow_ops.group(*[var_update, m_t, v_t])
示例8: _sparse_moving_average
def _sparse_moving_average(self, x_tm1, idxs, b_t_, name, beta=.9):
"""
Creates a moving average for a sparse variable.
Inputs:
x_tm1: the associated parameter (e.g. a weight matrix)
idxs: the tensor representing the indices used
b_t_: the value to accumulate (e.g. slices of the gradient)
name: a string to use to retrieve it later (e.g. 'm')
beta: the decay factor (defaults to .9)
Outputs:
a_t: the average after moving (same shape as x_tm1, not b_t_)
t: the internal timestep (used to correct initialization bias)
"""
a_tm1 = self._zeros_slot(x_tm1, '%s' % name, self._name)
a_tm1_ = array_ops.gather(a_tm1, idxs)
tm1 = self._zeros_idx_slot(x_tm1, '%s/tm1' % name, self._name)
tm1_ = array_ops.gather(tm1, idxs)
t = state_ops.scatter_add(tm1, idxs, tm1_*0+1, use_locking=self._use_locking)
t_ = array_ops.gather(t, idxs)
if beta < 1:
beta_t = ops.convert_to_tensor(beta, name='%s/decay' % name)
beta_t_ = beta_t * (1-beta_t**tm1_) / (1-beta_t**t_)
else:
beta_t_ = tm1_/t_
a_t = state_ops.scatter_update(a_tm1, idxs, beta_t_*a_tm1_, use_locking=self._use_locking)
a_t = state_ops.scatter_add(a_t, idxs, (1-beta_t)*b_t_, use_locking=self._use_locking)
return a_t, t
示例9: _prepare
def _prepare(self, grads_and_vars):
""""""
if self._lr is None:
sTy = 0
sTs = 0
yTy = 0
for g_t, x_tm1 in grads_and_vars:
if g_t is None:
continue
with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
if isinstance(g_t, ops.Tensor):
g_tm1 = self.get_slot(x_tm1, 'g')
s_tm1 = self.get_slot(x_tm1, 's')
y_t = (g_t-g_tm1)
sTy += math_ops.reduce_sum(s_tm1*y_t)
sTs += math_ops.reduce_sum(s_tm1**2)
yTy += math_ops.reduce_sum(y_t**2)
else:
idxs, idxs_ = array_ops.unique(g_t.indices)
g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
g_tm1 = self.get_slot(x_tm1, 'g')
g_tm1_ = array_ops.gather(g_tm1, idxs)
s_tm1 = self.get_slot(x_tm1, 's')
s_tm1_ = array_ops.gather(s_tm1, idxs)
y_t_ = (g_t_-g_tm1_)
sTy += math_ops.reduce_sum(s_tm1_*y_t_)
sTs += math_ops.reduce_sum(s_tm1_**2)
yTy += math_ops.reduce_sum(y_t_**2)
sTy = math_ops.abs(sTy)
self._lr = sTs / (sTy + self._eps)
示例10: _resource_apply_sparse
def _resource_apply_sparse(self, grad, var, indices):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# \\(m := beta1 * m + (1 - beta1) * g_t\\)
m = self.get_slot(var, "m")
m_t_slice = beta1_t * array_ops.gather(m, indices) + (1 - beta1_t) * grad
m_update_op = resource_variable_ops.resource_scatter_update(m.handle,
indices,
m_t_slice)
# \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
v = self.get_slot(var, "v")
v_t_slice = (beta2_t * array_ops.gather(v, indices) +
(1 - beta2_t) * math_ops.square(grad))
v_update_op = resource_variable_ops.resource_scatter_update(v.handle,
indices,
v_t_slice)
# \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
var_slice = lr * m_t_slice / (math_ops.sqrt(v_t_slice) + epsilon_t)
var_update_op = resource_variable_ops.resource_scatter_sub(var.handle,
indices,
var_slice)
return control_flow_ops.group(var_update_op, m_update_op, v_update_op)
示例11: _resource_apply_sparse
def _resource_apply_sparse(self, grad, var, indices):
var_dtype = var.dtype.base_dtype
lr_t = self._decayed_lr(var_dtype)
beta_1_t = self._get_hyper('beta_1', var_dtype)
beta_2_t = self._get_hyper('beta_2', var_dtype)
local_step = math_ops.cast(self.iterations + 1, var_dtype)
beta_1_power = math_ops.pow(beta_1_t, local_step)
beta_2_power = math_ops.pow(beta_2_t, local_step)
epsilon_t = self._get_hyper('epsilon', var_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, 'm')
m_scaled_g_values = grad * (1 - beta_1_t)
m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
# m_bar = (1 - beta1) * g_t + beta1 * m_t
m_bar = m_scaled_g_values + beta_1_t * array_ops.gather(m_t, indices)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, 'v')
v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
v_t_slice = array_ops.gather(v_t, indices)
v_sqrt = math_ops.sqrt(v_t_slice)
var_update = self._resource_scatter_add(var, indices,
-lr * m_bar / (v_sqrt + epsilon_t))
return control_flow_ops.group(*[var_update, m_bar, v_t])
示例12: testHigherRankMaxNorm
def testHigherRankMaxNorm(self):
np.random.seed(8)
with self.cached_session():
for params_shape in (12,), (6, 3), (6, 2, 3):
# Test embedding rank 0, 1, 2.
# Note: the first dimension must be a common multiple of procs below.
params = 2 * np.ones(params_shape)
params_norm = params / np.sqrt(
np.sum(
params * params, tuple(range(params.ndim)[1:]), keepdims=True))
for ids_shape in (), (3), (4, 3), (2, 3, 4):
ids = np.random.randint(
params.shape[0], size=np.prod(ids_shape,
dtype=np.int64)).reshape(ids_shape)
# Compare nonsharded to gather
simple = embedding_ops.embedding_lookup(
params, ids, max_norm=1.0).eval()
# assertAllClose is used here as different implementations of sqrt may
# be used to compute each of the values being compared. For example,
# on AVX512 builds the embedding operation makes use of Eigen's fast
# vectorized square root algorithm for doubles. These different
# implementations of sqrt are not guaranteed to produce exactly the
# same results. Therefore, an exact comparison cannot be made.
self.assertAllClose(simple, array_ops.gather(params_norm, ids).eval())
# Run a few different sharded versions.
for procs in 1, 2, 3:
stride = procs * math_ops.range(params.shape[0] // procs)
split_params = [
array_ops.gather(params, stride + p) for p in xrange(procs)
]
sharded = embedding_ops.embedding_lookup(
split_params, ids, max_norm=1.0).eval()
self.assertAllEqual(simple, sharded)
示例13: _apply_sparse
def _apply_sparse(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
# the following equations given in [1]
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_t = state_ops.scatter_update(m, grad.indices,
beta1_t * array_ops.gather(m, grad.indices) +
(1. - beta1_t) * grad.values,
use_locking=self._use_locking)
m_t_slice = tf.gather(m_t, grad.indices)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_t = state_ops.scatter_update(v, grad.indices,
beta2_t * array_ops.gather(v, grad.indices) +
(1. - beta2_t) * tf.square(grad.values),
use_locking=self._use_locking)
v_prime = self.get_slot(var, "v_prime")
v_t_slice = tf.gather(v_t, grad.indices)
v_prime_slice = tf.gather(v_prime, grad.indices)
v_t_prime = state_ops.scatter_update(v_prime, grad.indices, tf.maximum(v_prime_slice, v_t_slice))
v_t_prime_slice = array_ops.gather(v_t_prime, grad.indices)
var_update = state_ops.scatter_sub(var, grad.indices,
lr_t * m_t_slice / (math_ops.sqrt(v_t_prime_slice) + epsilon_t),
use_locking=self._use_locking)
return control_flow_ops.group(*[var_update, m_t, v_t, v_t_prime])
示例14: _apply_sparse_shared
def _apply_sparse_shared(self, grad, var, indices, scatter_add):
beta1_power, beta2_power = self._get_beta_accumulators()
beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
# m_t = beta1 * m + (1 - beta1) * g_t
m = self.get_slot(var, "m")
m_scaled_g_values = grad * (1 - beta1_t)
m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
with ops.control_dependencies([m_t]):
m_t = scatter_add(m, indices, m_scaled_g_values)
# m_bar = (1 - beta1) * g_t + beta1 * m_t
m_bar = m_scaled_g_values + beta1_t * array_ops.gather(m_t, indices)
# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
v = self.get_slot(var, "v")
v_scaled_g_values = (grad * grad) * (1 - beta2_t)
v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
with ops.control_dependencies([v_t]):
v_t = scatter_add(v, indices, v_scaled_g_values)
v_t_slice = array_ops.gather(v_t, indices)
v_sqrt = math_ops.sqrt(v_t_slice)
var_update = scatter_add(var, indices, -lr * m_bar / (v_sqrt + epsilon_t))
return control_flow_ops.group(*[var_update, m_bar, v_t])
示例15: testString
def testString(self):
params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]])
with self.cached_session():
self.assertAllEqual([b"qwer", b"uiop"],
array_ops.gather(params, 1, axis=0).eval())
self.assertAllEqual([b"asdf", b"qwer"],
array_ops.gather(params, 0, axis=1).eval())