Python tensorflow.einsum函数代码示例

本文整理汇总了Python中tensorflow.einsum函数的典型用法代码示例。


示例1: _sample_conditional

def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
    `sample_conditional` will return a sample from the conditinoal distribution.
    In most cases this means calculating the conditional mean m and variance v and then
    returning m + sqrt(v) * eps, with eps ~ N(0, 1).
    However, for some combinations of Mok and Mof more efficient sampling routines exists.
    The dispatcher will make sure that we use the most efficent one.

    :return: N x P (full_output_cov = False) or N x P x P (full_output_cov = True)
    logger.debug("sample conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok")
    if full_cov:
        raise NotImplementedError("full_cov not yet implemented")
    if full_output_cov:
        raise NotImplementedError("full_output_cov not yet implemented")
    independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object)
    g_mu, g_var = independent_cond(Xnew, feat, kern, f, white=white, q_sqrt=q_sqrt,
                                   full_output_cov=False, full_cov=False)  # N x L, N x L
    g_sample = _sample_mvn(g_mu, g_var, "diag", num_samples=num_samples)  # N x L
    with params_as_tensors_for(kern):
        f_sample = tf.einsum("pl,nl->np", kern.W, g_sample)
        f_mu = tf.einsum("pl,nl->np", kern.W, g_mu)
        # W g_var W.T
        # [P, L] @ [L, L] @ [L, P]
        # \sum_l,l' W_pl g_var_ll' W_p'l'
        # \sum_l W_pl g_var_nl W_p'l
        # -> 
        f_var = tf.einsum("pl,nl,pl->np", kern.W, g_var, kern.W)
    return f_sample, f_mu, f_var

示例2: _variance

  def _variance(self):
    with tf.control_dependencies(self._runtime_assertions):
      probs = self._marginal_hidden_probs()
      # probs :: num_steps batch_shape num_states
      means = self._observation_distribution.mean()
      # means :: observation_batch_shape[:-1] num_states
      #          observation_event_shape
      means_shape = tf.concat(
      means = tf.broadcast_to(means, means_shape)
      # means :: batch_shape num_states observation_event_shape

      observation_event_shape = (
      batch_size = tf.reduce_prod(self.batch_shape_tensor())
      flat_probs_shape = [self._num_steps, batch_size, self._num_states]
      flat_means_shape = [

      flat_probs = tf.reshape(probs, flat_probs_shape)
      # flat_probs :: num_steps batch_size num_states
      flat_means = tf.reshape(means, flat_means_shape)
      # flat_means :: batch_size 1 num_states observation_event_size
      flat_mean = tf.einsum("ijk,jmkl->jiml", flat_probs, flat_means)
      # flat_mean :: batch_size num_steps 1 observation_event_size

      variances = self._observation_distribution.variance()
      variances = tf.broadcast_to(variances, means_shape)
      # variances :: batch_shape num_states observation_event_shape
      flat_variances = tf.reshape(variances, flat_means_shape)
      # flat_variances :: batch_size 1 num_states observation_event_size

      # For a mixture of n distributions with mixture probabilities
      # p[i], and where the individual distributions have means and
      # variances given by mean[i] and var[i], the variance of
      # the mixture is given by:
      # var = sum i=1..n p[i] * ((mean[i] - mean)**2 + var[i]**2)

      flat_variance = tf.einsum("ijk,jikl->jil",
                                (flat_means - flat_mean)**2 + flat_variances)
      # flat_variance :: batch_size num_steps observation_event_size

      unflat_mean_shape = tf.concat(

      # returns :: batch_shape num_steps observation_event_shape
      return tf.reshape(flat_variance, unflat_mean_shape)

示例3: _build_clp_multiplication

 def _build_clp_multiplication(self, clp_kernel):
   from TFUtil import safe_log
   input_placeholder = self.input_data.get_placeholder_as_batch_major()
   tf.assert_equal(tf.shape(clp_kernel)[1], tf.shape(input_placeholder)[2] // 2)
   tf.assert_equal(tf.shape(clp_kernel)[2], self._nr_of_filters)
   input_real = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2])
   input_imag = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2])
   kernel_real = self._clp_kernel[0, :, :]
   kernel_imag = self._clp_kernel[1, :, :]
   output_real = tf.einsum('btf,fp->btp', input_real, kernel_real) - tf.einsum('btf,fp->btp', input_imag, kernel_imag)
   output_imag = tf.einsum('btf,fp->btp', input_imag, kernel_real) + tf.einsum('btf,fp->btp', input_real, kernel_imag)
   output_uncompressed = tf.sqrt(tf.pow(output_real, 2) + tf.pow(output_imag, 2))
   output_compressed = safe_log(output_uncompressed)
   return output_compressed

示例4: dense_word_embedding_from_chars

def dense_word_embedding_from_chars(chars, embed_dim, bias=True, scope='dense-word-embed', reuse=False):
    Word embeddings via dense transformation + maxpooling of character sequences.

        chars: Tensor of shape [batch_size, word sequence length, char sequence length, alphabet size].
        embed_dim: Dimension of word embeddings.  Integer.

        Sequence of embedding vectors.  Tensor of shape [batch_size, word sequence length, embed_dim].

    with tf.variable_scope(scope, reuse=reuse):
        chars = tf.cast(chars, tf.float32)
        W = tf.get_variable(
            shape=[shape(chars, -1), embed_dim]
        z = tf.einsum('ijkl,lm->ijkm', chars, W)
        if bias:
            b = tf.get_variable(
            z = z + b
        dense_word_embedding = tf.reduce_max(z, 2)
        return dense_word_embedding

示例5: _expectation

def _expectation(p, mean1, none1, mean2, none2, nghp=None):
    Compute the expectation:
    expectation[n] = <m1(x_n)^T m2(x_n)>_p(x_n)
        - m1(.), m2(.) :: Linear mean functions

    :return: NxQ1xQ2
    with params_as_tensors_for(mean1), params_as_tensors_for(mean2):
        e_xxt = p.cov + (p.mu[:, :, None] * p.mu[:, None, :])  # NxDxD
        e_A1t_xxt_A2 = tf.einsum("iq,nij,jz->nqz", mean1.A, e_xxt, mean2.A)  # NxQ1xQ2
        e_A1t_x_b2t = tf.einsum("iq,ni,z->nqz", mean1.A, p.mu, mean2.b)  # NxQ1xQ2
        e_b1_xt_A2 = tf.einsum("q,ni,iz->nqz", mean1.b, p.mu, mean2.A)  # NxQ1xQ2
        e_b1_b2t = mean1.b[:, None] * mean2.b[None, :]  # Q1xQ2

        return e_A1t_xxt_A2 + e_A1t_x_b2t + e_b1_xt_A2 + e_b1_b2t

示例6: maxpool_attentive_matching

def maxpool_attentive_matching(a, b, a_lengths, b_lengths, max_seq_len, attention_func=dot_attention,
    Matches each vector in a with a vector created by maxpooling over the weighted vectors in b.
    The weightings are determined by the attention matrix.  The attention matrix is
    computed using attention_func.

        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        attention_func: Function used to calculate attention matrix.  Can be one of the following:
            multiplicative_attention, additive_attention, concat_attention, dot_attention,
            or cosine_attention.
        attention_func_kwargs: Keyword arguments to pass to attention_func.

        Tensor of shape [batch_size, max_seq_len, input_size] consisting of the matching vectors for
        each timestep in a.

    attn = attention_func(a, b, a_lengths, b_lengths, max_seq_len, **attention_func_kwargs)
    return tf.reduce_max(tf.einsum('ijk,ikl->ijkl', attn, b), axis=2)

示例7: additive_attention

def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                       scope='additive-attention', reuse=False):
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)).  v is a learnable vector and W is a learnable
    matrix. The rows of attn are softmax normalized.

        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        aW = tf.expand_dims(aW, 2)
        bW = tf.expand_dims(bW, 1)
        v = tf.get_variable(
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)

示例8: time_distributed_dense_layer

def time_distributed_dense_layer(inputs, output_units, bias=True, activation=None, dropout=None,
                                 scope='time-distributed-dense-layer', reuse=False):
    Applies a shared dense layer to each timestep of a tensor of shape [batch_size, max_seq_len, input_units]
    to produce a tensor of shape [batch_size, max_seq_len, output_units].

        inputs: Tensor of shape [batch size, max sequence length, ...].
        output_units: Number of output units.
        activation: activation function.
        dropout: dropout keep prob.

        Tensor of shape [batch size, max sequence length, output_units].

    with tf.variable_scope(scope, reuse=reuse):
        W = tf.get_variable(
            shape=[shape(inputs, -1), output_units]
        z = tf.einsum('ijk,kl->ijl', inputs, W)
        if bias:
            b = tf.get_variable(
            z = z + b
        z = activation(z) if activation else z
        z = tf.nn.dropout(z, dropout) if dropout else z
        return z

示例9: lookahead

  def lookahead(self, t, z_prev):
    """Compute the 'lookahead' distribution, p(x_{t:T} | z_{t-1}).

      t: A scalar Tensor int, the current timestep. Must be at least 1.
      z_prev: The latent state at time t-1. A Tensor of shape [batch_size].
      p(x_{t:T} | z_{t-1}) as a multivariate normal distribution.
    z_prev = tf.convert_to_tensor(z_prev)
    sigma_zx = self.sigma_zx[t-1, t:]
    z_var = self.sigma_z[t-1, t-1]
    mean = tf.einsum("i,j->ij", z_prev, sigma_zx) / z_var
    variance = (self.sigma_x[t:, t:] -
                tf.einsum("i,j->ij", sigma_zx, sigma_zx) / z_var)
    return tfd.MultivariateNormalFullCovariance(
        loc=mean, covariance_matrix=variance)

示例10: test_invalid

 def test_invalid(self):
   for axes in self.invalid_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=(3,4)),
       tf.placeholder(tf.float32, shape=(3,4)),
     with self.assertRaises(ValueError):
       _ = tf.einsum(axes, *inputs)

示例11: test_dim_mismatch

 def test_dim_mismatch(self):
   for axes, input_shapes in self.dim_mismatch_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=shape)
       for shape in input_shapes
     with self.assertRaises(ValueError):
       _ = tf.einsum(axes, *inputs)

示例12: not_fully_connected_layer

def not_fully_connected_layer(inputs, segment_count, segment_dim, num_kernels, nonlinearity=tf.nn.relu):
    weights = tf.Variable(
            [segment_dim, num_kernels], stddev=2. / (num_kernels + segment_dim) ** 0.5), 
    biases = tf.Variable(tf.zeros([num_kernels]), 'biases')
    inputs_1 = tf.reshape(inputs, [50, segment_count, segment_dim])
    output = tf.einsum('ijk,kl->ijl', inputs_1, weights) + biases
    temp = tf.reshape(output, [50, segment_count * num_kernels])
    outputs = nonlinearity(temp)
    return outputs, weights

示例13: test_input_is_placeholder

 def test_input_is_placeholder(self):
   with tf.Graph().as_default():
     m0 = tf.placeholder(tf.int32, shape=(1, None))
     m1 = tf.placeholder(tf.int32, shape=(None, 1))
     out = tf.einsum('ij,jk->ik', m0, m1)
     with tf.Session() as sess:
       feed_dict = {
           m0: [[1, 2, 3]],
           m1: [[2], [1], [1]],
                                      sess.run(out, feed_dict=feed_dict))

示例14: test_dim_mismatch

 def test_dim_mismatch(self):
   for axes, input_shapes in self.dim_mismatch_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=shape)
       for shape in input_shapes
     result = None
       result = tf.einsum(axes, *inputs)
     except AssertionError:
     assert result is None, "An exception should have been thrown."

示例15: concat_attention

def concat_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                     scope='concat-attention', reuse=False):
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*[a_i; b_j])).  v is a learnable vector and W is a learnable
    matrix.  The rows of attn are softmax normalized.

        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    with tf.variable_scope(scope, reuse=reuse):
        a = tf.expand_dims(a, 2)
        b = tf.expand_dims(b, 1)
        c = tf.concat([a, b], axis=3)
        W = tf.get_variable(
            shape=[shape(c, -1), hidden_units]
        cW = tf.einsum('ijkl,lm->ijkm', c, W)
        v = tf.get_variable(
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(cW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
