Python torch.matmul函数代码示例

本文整理汇总了Python中torch.matmul函数的典型用法代码示例。如果您正苦于以下问题：Python matmul函数的具体用法？Python matmul怎么用？Python matmul使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了matmul函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: forward

    def forward(self,x):
        max_sample = x.size()[1]
        x = x.view(-1,self.feature_size)
        assignment = th.matmul(x,self.clusters)

        if self.add_batch_norm:
            assignment = self.batch_norm(assignment)

        assignment = F.softmax(assignment, dim=1)
        assignment = assignment.view(-1, max_sample, self.cluster_size)

        assignment = assignment.transpose(1,2)

        x = x.view(-1, max_sample, self.feature_size)
        rvlad = th.matmul(assignment, x)
        rvlad = rvlad.transpose(-1,1)

        # L2 intra norm
        rvlad = F.normalize(rvlad)
        
        # flattening + L2 norm
        rvlad = rvlad.view(-1, self.cluster_size*self.feature_size)
        rvlad = F.normalize(rvlad)

        return rvlad

开发者ID:lvaleriu，项目名称:Mixture-of-Embedding-Experts，代码行数:25，代码来源:loupe.py

示例2: forward

    def forward(self, hidden_states, attention_mask):
        mixed_query_layer = self.query(hidden_states)
        mixed_key_layer = self.key(hidden_states)
        mixed_value_layer = self.value(hidden_states)

        query_layer = self.transpose_for_scores(mixed_query_layer)
        key_layer = self.transpose_for_scores(mixed_key_layer)
        value_layer = self.transpose_for_scores(mixed_value_layer)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
        attention_scores = attention_scores + attention_mask

        # Normalize the attention scores to probabilities.
        attention_probs = nn.Softmax(dim=-1)(attention_scores)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = self.dropout(attention_probs)

        context_layer = torch.matmul(attention_probs, value_layer)
        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
        context_layer = context_layer.view(*new_context_layer_shape)
        return context_layer

开发者ID:zhouleidcc，项目名称:bert-Chinese-classification-task，代码行数:27，代码来源:modeling.py

示例3: forward

	def forward(self, sequence, graph):
		"""
		Apply self-attention to the sequence, ignores
		the graph
		"""
		sequence = sequence.squeeze(1)	
		
		#get the dimension
		n, d = sequence.size()
		
		#project the sequence into key, value, and query sequences
		keySeq = f.relu(self.keyProj(sequence))
		valueSeq = f.relu(self.valueProj(sequence))
		querySeq = f.relu(self.queryProj(sequence))
		
		#combine query with each key
		#a_ijh = softmax( (q_ih^T k_jh) / sqrt(d) )
		#the result is, row i is the importance of the sequence for key i
		importance = f.softmax(t.matmul(querySeq, keySeq.permute(1,0)) * math.sqrt(d),0).permute(1,0)

		#apply the importance weights to the value sequence
		attention = t.matmul(valueSeq.permute(1,0), importance).permute(1,0)
	
		#sum the sequence for a complete representation
		final = t.sum(attention, 0)
		
		return attention.unsqueeze(1), final

开发者ID:jworr，项目名称:ml_tools，代码行数:27，代码来源:attention.py

示例4: train

def train(ep):
    model.train()
    total_loss = 0
    count = 0
    train_idx_list = np.arange(len(X_train), dtype="int32")
    np.random.shuffle(train_idx_list)
    for idx in train_idx_list:
        data_line = X_train[idx]
        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
        if args.cuda:
            x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        output = model(x.unsqueeze(0)).squeeze(0)
        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
                            torch.matmul((1 - y), torch.log(1 - output).float().t()))
        total_loss += loss.data[0]
        count += output.size(0)

        if args.clip > 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        loss.backward()
        optimizer.step()
        if idx > 0 and idx % args.log_interval == 0:
            cur_loss = total_loss / count
            print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
            total_loss = 0.0
            count = 0

开发者ID:wasaCheney，项目名称:TCN，代码行数:28，代码来源:music_test.py

示例5: forward

 def forward(self, context, state, input_):
     output = (torch.matmul(context, self._v_c.unsqueeze(1))
               + torch.matmul(state, self._v_s.unsqueeze(1))
               + torch.matmul(input_, self._v_i.unsqueeze(1)))
     if self._b is not None:
         output = output + self._b.unsqueeze(0)
     return output

开发者ID:ShawnXiha，项目名称:fast_abs_rl，代码行数:7，代码来源:copy_summ.py

示例6: score

 def score(self, hidden, encoder_output):
     
     if self.method == 'dot':            
         # hidden is 1 by 256
         # encoder_output is 22 by 256
         encoder_output = torch.transpose(encoder_output, 0, 1)
         # encoder_output is 256 by 22
         energy = torch.matmul(hidden, encoder_output)
         return energy
     
     elif self.method == 'general':
         # hidden is 1 by 256
         # encoder_output is 256 by 22
         # encoder_output = torch.transpose(encoder_output, 0, 1)
         hidden = hidden.view(1, -1)
         a = self.attn(encoder_output)
         a = torch.transpose(a, 0, 1)
         energy = torch.matmul(hidden, a)
         return energy
     
     elif self.method == 'concat':
         len_encoder_output = encoder_output.size()[1]
         # hidden is 1 by 256
         # encoder_output is 256 by 22
         hidden = torch.transpose(hidden, 0, 1)
         # hidden is 256 by 1
         hidden = hidden.repeat(hidden_size, len_encoder_output)
         # hidden is 256 by 22
         concat = torch.cat((hidden, encoder_output), dim=0)
         # concat is 512 by 22
         # self.attn(concat) --> 256 by 22
         energy = torch.matmul(self.v, F.tanh(self.attn(concat)))
         return energy

开发者ID:vwrj，项目名称:neural_machine_translation，代码行数:33，代码来源:V2-Attention-Vish.py

示例7: write

    def write(self, z, time, debug=False):
        # update usage indicator
        self.u = self.u + T.matmul(Variable(T.from_numpy(np.ones((1, Kr), dtype=np.float32))), self.W_predictor)

        # update writing weights
        prev_v_wr = self.v_wr
        v_wr = np.zeros((N_mem, 1), dtype=np.float32)
        if time < N_mem:
            v_wr[time][0] = 1
        else:
            waste_index = int(T.argmin(self.u).data)
            v_wr[waste_index][0] = 1
        self.v_wr = Variable(T.from_numpy(v_wr))

        # writing
        # z: (1, Z_DIM)
        if debug:
            print(self.M)
        if USE_RETROACTIVE:
            # update retroactive weights
            self.v_ret = GAMMA*self.v_ret + (1-GAMMA)*prev_v_wr
            z_wr = T.cat([z, Variable(T.from_numpy(np.zeros((1, Z_DIM), dtype=np.float32)))], 1)
            z_ret = T.cat([Variable(T.from_numpy(np.zeros((1, Z_DIM), dtype=np.float32))), z], 1)
            self.M = self.M + T.matmul(self.v_wr, z_wr) + T.matmul(self.v_ret, z_ret)
        else:
            self.M = self.M + T.matmul(self.v_wr, z)
        if debug:
            return self.M

开发者ID:andreofner，项目名称:MERLIN，代码行数:28，代码来源:memory.py

示例8: grad2

def grad2():
    W = Variable(torch.rand(2, 2), requires_grad=True)
    W2 = Variable(torch.rand(2, 1), requires_grad=True)
    x1 = Variable(torch.rand(1, 2), requires_grad=True)
    x2 = Variable(torch.rand(1, 2), requires_grad=True)

    print("w: ")
    print(W)
    print("x1: ")
    print(x1)
    print("x2: ")
    print(x2)
    print("--------------------")

    y1 = torch.matmul(torch.matmul(x1, W), W2)
    print(torch.matmul(W, W2))
    # y = Variable(y, requires_grad=True)
    # print("y1:")
    # print(y1)

    y1.backward()
    # print(W.grad)
    print(x1.grad)

    # W.grad.data.zero_()
    # x1.grad.data.zero_()
    y2 = torch.matmul(torch.matmul(x2, W), W2)
    y2.backward()
    # print("y2: ")
    # print(y2)
    # print(W.grad)
    print(x2.grad)

开发者ID:gonglixue，项目名称:PRML_Python，代码行数:32，代码来源:gradient.py

示例9: _attn

 def _attn(self, q, k, v):
     w = torch.matmul(q, k)
     if self.scale:
         w = w / math.sqrt(v.size(-1))
     w = w * self.b + -1e9 * (1 - self.b)  # TF implem method: mask_attn_weights
     w = nn.Softmax(dim=-1)(w)
     w = self.attn_dropout(w)
     return torch.matmul(w, v)

开发者ID:chenghuige，项目名称:pytorch-openai-transformer-lm，代码行数:8，代码来源:model_pytorch.py

示例10: test

def test():
    x = torch.ones(1, 2)
    Sigma = torch.FloatTensor([[1, 0.8], [0.8, 1]])

    z = torch.ones(x.size())
    y = torch.matmul(x, Sigma)
    y = torch.matmul(y, x.t())
    print(y)

开发者ID:gonglixue，项目名称:PRML_Python，代码行数:8，代码来源:test.py

示例11: attention_score

 def attention_score(attention, query, v, w):
     """ unnormalized attention score"""
     sum_ = attention.unsqueeze(1) + torch.matmul(
         query, w.unsqueeze(0)
     ).unsqueeze(2)  # [B, Nq, Ns, D]
     score = torch.matmul(
         F.tanh(sum_), v.unsqueeze(0).unsqueeze(1).unsqueeze(3)
     ).squeeze(3)  # [B, Nq, Ns]
     return score

开发者ID:ShawnXiha，项目名称:fast_abs_rl，代码行数:9，代码来源:extract.py

示例12: attention

 def attention(cls, query, key, value, mask=None, dropout=None):
     d_k = query.size(-1)
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
     if mask is not None:
         scores = scores.masked_fill(mask == 0, -1e9)
     p_attn = F.softmax(scores, dim=-1)
     if dropout is not None:
         p_attn = dropout(p_attn)
     return torch.matmul(p_attn, value), p_attn

开发者ID:daixiangau，项目名称:naacl2019-select-pretraining-data-for-ner，代码行数:9，代码来源:attention.py

示例13: _prepare

 def _prepare(self, attn_mem):
     attn_feat = torch.matmul(attn_mem, self._attn_wm.unsqueeze(0))
     hop_feat = torch.matmul(attn_mem, self._hop_wm.unsqueeze(0))
     bs = attn_mem.size(0)
     n_l, d = self._init_h.size()
     size = (n_l, bs, d)
     lstm_states = (self._init_h.unsqueeze(1).expand(*size).contiguous(),
                    self._init_c.unsqueeze(1).expand(*size).contiguous())
     d = self._init_i.size(0)
     init_i = self._init_i.unsqueeze(0).unsqueeze(1).expand(bs, 1, d)
     return attn_feat, hop_feat, lstm_states, init_i

开发者ID:ShawnXiha，项目名称:fast_abs_rl，代码行数:11，代码来源:extract.py

示例14: forward

    def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:

        if self._use_input_biases:
            bias1 = matrix_1.new_ones(matrix_1.size()[:-1] + (1,))
            bias2 = matrix_2.new_ones(matrix_2.size()[:-1] + (1,))

            matrix_1 = torch.cat([matrix_1, bias1], -1)
            matrix_2 = torch.cat([matrix_2, bias2], -1)
        intermediate = torch.matmul(matrix_1.unsqueeze(1), self._weight_matrix.unsqueeze(0))
        final = torch.matmul(intermediate, matrix_2.unsqueeze(1).transpose(2, 3))
        return self._activation(final.squeeze(1) + self._bias)

开发者ID:pyknife，项目名称:allennlp，代码行数:11，代码来源:bilinear_matrix_attention.py

示例15: high_dimension_gaussain_energy

def high_dimension_gaussain_energy(x):
    u = High_mu
    Sigma = High_Sigma
    Sigma = torch.inverse(Sigma)

    if isinstance(x, Variable):
        u = Variable(u, requires_grad=True)
        Sigma = Variable(Sigma, requires_grad=True)

    diff = x - u
    temp = 0.5 * torch.matmul(torch.matmul(diff, Sigma), diff.t())

    return temp

开发者ID:gonglixue，项目名称:PRML_Python，代码行数:13，代码来源:HamiltonianDynamics.py

注：本文中的torch.matmul函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。