本文整理汇总了Python中tensorflow.einsum函数的典型用法代码示例。如果您正苦于以下问题:Python einsum函数的具体用法?Python einsum怎么用?Python einsum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了einsum函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _sample_conditional
def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
"""
`sample_conditional` will return a sample from the conditinoal distribution.
In most cases this means calculating the conditional mean m and variance v and then
returning m + sqrt(v) * eps, with eps ~ N(0, 1).
However, for some combinations of Mok and Mof more efficient sampling routines exists.
The dispatcher will make sure that we use the most efficent one.
:return: N x P (full_output_cov = False) or N x P x P (full_output_cov = True)
"""
logger.debug("sample conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok")
if full_cov:
raise NotImplementedError("full_cov not yet implemented")
if full_output_cov:
raise NotImplementedError("full_output_cov not yet implemented")
independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object)
g_mu, g_var = independent_cond(Xnew, feat, kern, f, white=white, q_sqrt=q_sqrt,
full_output_cov=False, full_cov=False) # N x L, N x L
g_sample = _sample_mvn(g_mu, g_var, "diag", num_samples=num_samples) # N x L
with params_as_tensors_for(kern):
f_sample = tf.einsum("pl,nl->np", kern.W, g_sample)
f_mu = tf.einsum("pl,nl->np", kern.W, g_mu)
# W g_var W.T
# [P, L] @ [L, L] @ [L, P]
# \sum_l,l' W_pl g_var_ll' W_p'l'
# \sum_l W_pl g_var_nl W_p'l
# ->
f_var = tf.einsum("pl,nl,pl->np", kern.W, g_var, kern.W)
return f_sample, f_mu, f_var
示例2: _variance
def _variance(self):
with tf.control_dependencies(self._runtime_assertions):
probs = self._marginal_hidden_probs()
# probs :: num_steps batch_shape num_states
means = self._observation_distribution.mean()
# means :: observation_batch_shape[:-1] num_states
# observation_event_shape
means_shape = tf.concat(
[self.batch_shape_tensor(),
[self._num_states],
self._observation_distribution.event_shape_tensor()],
axis=0)
means = tf.broadcast_to(means, means_shape)
# means :: batch_shape num_states observation_event_shape
observation_event_shape = (
self._observation_distribution.event_shape_tensor())
batch_size = tf.reduce_prod(self.batch_shape_tensor())
flat_probs_shape = [self._num_steps, batch_size, self._num_states]
flat_means_shape = [
batch_size,
1,
self._num_states,
tf.reduce_prod(observation_event_shape)]
flat_probs = tf.reshape(probs, flat_probs_shape)
# flat_probs :: num_steps batch_size num_states
flat_means = tf.reshape(means, flat_means_shape)
# flat_means :: batch_size 1 num_states observation_event_size
flat_mean = tf.einsum("ijk,jmkl->jiml", flat_probs, flat_means)
# flat_mean :: batch_size num_steps 1 observation_event_size
variances = self._observation_distribution.variance()
variances = tf.broadcast_to(variances, means_shape)
# variances :: batch_shape num_states observation_event_shape
flat_variances = tf.reshape(variances, flat_means_shape)
# flat_variances :: batch_size 1 num_states observation_event_size
# For a mixture of n distributions with mixture probabilities
# p[i], and where the individual distributions have means and
# variances given by mean[i] and var[i], the variance of
# the mixture is given by:
#
# var = sum i=1..n p[i] * ((mean[i] - mean)**2 + var[i]**2)
flat_variance = tf.einsum("ijk,jikl->jil",
flat_probs,
(flat_means - flat_mean)**2 + flat_variances)
# flat_variance :: batch_size num_steps observation_event_size
unflat_mean_shape = tf.concat(
[self.batch_shape_tensor(),
[self._num_steps],
observation_event_shape],
axis=0)
# returns :: batch_shape num_steps observation_event_shape
return tf.reshape(flat_variance, unflat_mean_shape)
示例3: _build_clp_multiplication
def _build_clp_multiplication(self, clp_kernel):
from TFUtil import safe_log
input_placeholder = self.input_data.get_placeholder_as_batch_major()
tf.assert_equal(tf.shape(clp_kernel)[1], tf.shape(input_placeholder)[2] // 2)
tf.assert_equal(tf.shape(clp_kernel)[2], self._nr_of_filters)
input_real = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2])
input_imag = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2])
kernel_real = self._clp_kernel[0, :, :]
kernel_imag = self._clp_kernel[1, :, :]
output_real = tf.einsum('btf,fp->btp', input_real, kernel_real) - tf.einsum('btf,fp->btp', input_imag, kernel_imag)
output_imag = tf.einsum('btf,fp->btp', input_imag, kernel_real) + tf.einsum('btf,fp->btp', input_real, kernel_imag)
output_uncompressed = tf.sqrt(tf.pow(output_real, 2) + tf.pow(output_imag, 2))
output_compressed = safe_log(output_uncompressed)
return output_compressed
示例4: dense_word_embedding_from_chars
def dense_word_embedding_from_chars(chars, embed_dim, bias=True, scope='dense-word-embed', reuse=False):
"""
Word embeddings via dense transformation + maxpooling of character sequences.
Args:
chars: Tensor of shape [batch_size, word sequence length, char sequence length, alphabet size].
embed_dim: Dimension of word embeddings. Integer.
Returns:
Sequence of embedding vectors. Tensor of shape [batch_size, word sequence length, embed_dim].
"""
with tf.variable_scope(scope, reuse=reuse):
chars = tf.cast(chars, tf.float32)
W = tf.get_variable(
name='weights',
initializer=tf.contrib.layers.variance_scaling_initializer(),
shape=[shape(chars, -1), embed_dim]
)
z = tf.einsum('ijkl,lm->ijkm', chars, W)
if bias:
b = tf.get_variable(
name='biases',
initializer=tf.constant_initializer(),
shape=[embed_dim]
)
z = z + b
dense_word_embedding = tf.reduce_max(z, 2)
return dense_word_embedding
示例5: _expectation
def _expectation(p, mean1, none1, mean2, none2, nghp=None):
"""
Compute the expectation:
expectation[n] = <m1(x_n)^T m2(x_n)>_p(x_n)
- m1(.), m2(.) :: Linear mean functions
:return: NxQ1xQ2
"""
with params_as_tensors_for(mean1), params_as_tensors_for(mean2):
e_xxt = p.cov + (p.mu[:, :, None] * p.mu[:, None, :]) # NxDxD
e_A1t_xxt_A2 = tf.einsum("iq,nij,jz->nqz", mean1.A, e_xxt, mean2.A) # NxQ1xQ2
e_A1t_x_b2t = tf.einsum("iq,ni,z->nqz", mean1.A, p.mu, mean2.b) # NxQ1xQ2
e_b1_xt_A2 = tf.einsum("q,ni,iz->nqz", mean1.b, p.mu, mean2.A) # NxQ1xQ2
e_b1_b2t = mean1.b[:, None] * mean2.b[None, :] # Q1xQ2
return e_A1t_xxt_A2 + e_A1t_x_b2t + e_b1_xt_A2 + e_b1_b2t
示例6: maxpool_attentive_matching
def maxpool_attentive_matching(a, b, a_lengths, b_lengths, max_seq_len, attention_func=dot_attention,
attention_func_kwargs={}):
"""
Matches each vector in a with a vector created by maxpooling over the weighted vectors in b.
The weightings are determined by the attention matrix. The attention matrix is
computed using attention_func.
Args:
a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size].
b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size].
a_lengths: Lengths of sequences in a. Tensor of shape [batch_size].
b_lengths: Lengths of sequences in b. Tensor of shape [batch_size].
max_seq_len: Length of padded sequences a and b. Integer.
attention_func: Function used to calculate attention matrix. Can be one of the following:
multiplicative_attention, additive_attention, concat_attention, dot_attention,
or cosine_attention.
attention_func_kwargs: Keyword arguments to pass to attention_func.
Returns:
Tensor of shape [batch_size, max_seq_len, input_size] consisting of the matching vectors for
each timestep in a.
"""
attn = attention_func(a, b, a_lengths, b_lengths, max_seq_len, **attention_func_kwargs)
return tf.reduce_max(tf.einsum('ijk,ikl->ijkl', attn, b), axis=2)
示例7: additive_attention
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
scope='additive-attention', reuse=False):
"""
For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)). v is a learnable vector and W is a learnable
matrix. The rows of attn are softmax normalized.
Args:
a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size].
b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size].
a_lengths: Lengths of sequences in a. Tensor of shape [batch_size].
b_lengths: Lengths of sequences in b. Tensor of shape [batch_size].
max_seq_len: Length of padded sequences a and b. Integer.
hidden_units: Number of hidden units. Integer.
Returns:
Attention matrix. Tensor of shape [max_seq_len, max_seq_len].
"""
with tf.variable_scope(scope, reuse=reuse):
aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
aW = tf.expand_dims(aW, 2)
bW = tf.expand_dims(bW, 1)
v = tf.get_variable(
name='dot_weights',
initializer=tf.variance_scaling_initializer(),
shape=[hidden_units]
)
logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
attn = tf.exp(logits)
attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
示例8: time_distributed_dense_layer
def time_distributed_dense_layer(inputs, output_units, bias=True, activation=None, dropout=None,
scope='time-distributed-dense-layer', reuse=False):
"""
Applies a shared dense layer to each timestep of a tensor of shape [batch_size, max_seq_len, input_units]
to produce a tensor of shape [batch_size, max_seq_len, output_units].
Args:
inputs: Tensor of shape [batch size, max sequence length, ...].
output_units: Number of output units.
activation: activation function.
dropout: dropout keep prob.
Returns:
Tensor of shape [batch size, max sequence length, output_units].
"""
with tf.variable_scope(scope, reuse=reuse):
W = tf.get_variable(
name='weights',
initializer=tf.contrib.layers.variance_scaling_initializer(),
shape=[shape(inputs, -1), output_units]
)
z = tf.einsum('ijk,kl->ijl', inputs, W)
if bias:
b = tf.get_variable(
name='biases',
initializer=tf.constant_initializer(),
shape=[output_units]
)
z = z + b
z = activation(z) if activation else z
z = tf.nn.dropout(z, dropout) if dropout else z
return z
示例9: lookahead
def lookahead(self, t, z_prev):
"""Compute the 'lookahead' distribution, p(x_{t:T} | z_{t-1}).
Args:
t: A scalar Tensor int, the current timestep. Must be at least 1.
z_prev: The latent state at time t-1. A Tensor of shape [batch_size].
Returns:
p(x_{t:T} | z_{t-1}) as a multivariate normal distribution.
"""
z_prev = tf.convert_to_tensor(z_prev)
sigma_zx = self.sigma_zx[t-1, t:]
z_var = self.sigma_z[t-1, t-1]
mean = tf.einsum("i,j->ij", z_prev, sigma_zx) / z_var
variance = (self.sigma_x[t:, t:] -
tf.einsum("i,j->ij", sigma_zx, sigma_zx) / z_var)
return tfd.MultivariateNormalFullCovariance(
loc=mean, covariance_matrix=variance)
示例10: test_invalid
def test_invalid(self):
for axes in self.invalid_cases:
inputs = [
tf.placeholder(tf.float32, shape=(3,4)),
tf.placeholder(tf.float32, shape=(3,4)),
]
with self.assertRaises(ValueError):
_ = tf.einsum(axes, *inputs)
示例11: test_dim_mismatch
def test_dim_mismatch(self):
for axes, input_shapes in self.dim_mismatch_cases:
inputs = [
tf.placeholder(tf.float32, shape=shape)
for shape in input_shapes
]
with self.assertRaises(ValueError):
_ = tf.einsum(axes, *inputs)
示例12: not_fully_connected_layer
def not_fully_connected_layer(inputs, segment_count, segment_dim, num_kernels, nonlinearity=tf.nn.relu):
weights = tf.Variable(
tf.truncated_normal(
[segment_dim, num_kernels], stddev=2. / (num_kernels + segment_dim) ** 0.5),
'weights')
biases = tf.Variable(tf.zeros([num_kernels]), 'biases')
inputs_1 = tf.reshape(inputs, [50, segment_count, segment_dim])
output = tf.einsum('ijk,kl->ijl', inputs_1, weights) + biases
temp = tf.reshape(output, [50, segment_count * num_kernels])
outputs = nonlinearity(temp)
return outputs, weights
开发者ID:raulsoutelo,项目名称:Music-genre-classification-with-the-Million-Song-Dataset,代码行数:11,代码来源:baseline_regularization_L2_in_FC.py
示例13: test_input_is_placeholder
def test_input_is_placeholder(self):
with tf.Graph().as_default():
m0 = tf.placeholder(tf.int32, shape=(1, None))
m1 = tf.placeholder(tf.int32, shape=(None, 1))
out = tf.einsum('ij,jk->ik', m0, m1)
with tf.Session() as sess:
feed_dict = {
m0: [[1, 2, 3]],
m1: [[2], [1], [1]],
}
np.testing.assert_almost_equal([[7]],
sess.run(out, feed_dict=feed_dict))
示例14: test_dim_mismatch
def test_dim_mismatch(self):
for axes, input_shapes in self.dim_mismatch_cases:
inputs = [
tf.placeholder(tf.float32, shape=shape)
for shape in input_shapes
]
result = None
try:
result = tf.einsum(axes, *inputs)
except AssertionError:
pass
assert result is None, "An exception should have been thrown."
示例15: concat_attention
def concat_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
scope='concat-attention', reuse=False):
"""
For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
where attn(i, j) = dot(v, tanh(W*[a_i; b_j])). v is a learnable vector and W is a learnable
matrix. The rows of attn are softmax normalized.
Args:
a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size].
b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size].
a_lengths: Lengths of sequences in a. Tensor of shape [batch_size].
b_lengths: Lengths of sequences in b. Tensor of shape [batch_size].
max_seq_len: Length of padded sequences a and b. Integer.
hidden_units: Number of hidden units. Integer.
Returns:
Attention matrix. Tensor of shape [max_seq_len, max_seq_len].
"""
with tf.variable_scope(scope, reuse=reuse):
a = tf.expand_dims(a, 2)
b = tf.expand_dims(b, 1)
c = tf.concat([a, b], axis=3)
W = tf.get_variable(
name='matmul_weights',
initializer=tf.contrib.layers.variance_scaling_initializer(),
shape=[shape(c, -1), hidden_units]
)
cW = tf.einsum('ijkl,lm->ijkm', c, W)
v = tf.get_variable(
name='dot_weights',
initializer=tf.ones_initializer(),
shape=[hidden_units]
)
logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(cW), v)
logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
attn = tf.exp(logits)
attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)