本文整理汇总了Python中torch.Tensor.new_ones方法的典型用法代码示例。如果您正苦于以下问题:Python Tensor.new_ones方法的具体用法?Python Tensor.new_ones怎么用?Python Tensor.new_ones使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.Tensor
的用法示例。
在下文中一共展示了Tensor.new_ones方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import new_ones [as 别名]
def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
if self._use_input_biases:
bias1 = matrix_1.new_ones(matrix_1.size()[:-1] + (1,))
bias2 = matrix_2.new_ones(matrix_2.size()[:-1] + (1,))
matrix_1 = torch.cat([matrix_1, bias1], -1)
matrix_2 = torch.cat([matrix_2, bias2], -1)
intermediate = torch.matmul(matrix_1.unsqueeze(1), self._weight_matrix.unsqueeze(0))
final = torch.matmul(intermediate, matrix_2.unsqueeze(1).transpose(2, 3))
return self._activation(final.squeeze(1) + self._bias)
示例2: get_mask_from_sequence_lengths
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import new_ones [as 别名]
def get_mask_from_sequence_lengths(sequence_lengths: torch.Tensor, max_length: int) -> torch.Tensor:
"""
Given a variable of shape ``(batch_size,)`` that represents the sequence lengths of each batch
element, this function returns a ``(batch_size, max_length)`` mask variable. For example, if
our input was ``[2, 2, 3]``, with a ``max_length`` of 4, we'd return
``[[1, 1, 0, 0], [1, 1, 0, 0], [1, 1, 1, 0]]``.
We require ``max_length`` here instead of just computing it from the input ``sequence_lengths``
because it lets us avoid finding the max, then copying that value from the GPU to the CPU so
that we can use it to construct a new tensor.
"""
# (batch_size, max_length)
ones = sequence_lengths.new_ones(sequence_lengths.size(0), max_length)
range_tensor = ones.cumsum(dim=1)
return (sequence_lengths.unsqueeze(1) >= range_tensor).long()
示例3: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import new_ones [as 别名]
def forward(self, # pylint: disable=arguments-differ
inputs: torch.Tensor,
mask: torch.LongTensor = None) -> torch.FloatTensor:
"""
Parameters
----------
inputs : ``torch.FloatTensor``, required.
A tensor of shape (batch_size, timesteps, input_dim)
mask : ``torch.FloatTensor``, optional (default = None).
A tensor of shape (batch_size, timesteps).
Returns
-------
A tensor of shape (batch_size, timesteps, output_projection_dim),
where output_projection_dim = input_dim by default.
"""
num_heads = self._num_heads
batch_size, timesteps, _ = inputs.size()
if mask is None:
mask = inputs.new_ones(batch_size, timesteps)
# Shape (batch_size, timesteps, 2 * attention_dim + values_dim)
combined_projection = self._combined_projection(inputs)
# split by attention dim - if values_dim > attention_dim, we will get more
# than 3 elements returned. All of the rest are the values vector, so we
# just concatenate them back together again below.
queries, keys, *values = combined_projection.split(self._attention_dim, -1)
queries = queries.contiguous()
keys = keys.contiguous()
values = torch.cat(values, -1).contiguous()
# Shape (num_heads * batch_size, timesteps, values_dim / num_heads)
values_per_head = values.view(batch_size, timesteps, num_heads, int(self._values_dim/num_heads))
values_per_head = values_per_head.transpose(1, 2).contiguous()
values_per_head = values_per_head.view(batch_size * num_heads, timesteps, int(self._values_dim/num_heads))
# Shape (num_heads * batch_size, timesteps, attention_dim / num_heads)
queries_per_head = queries.view(batch_size, timesteps, num_heads, int(self._attention_dim/num_heads))
queries_per_head = queries_per_head.transpose(1, 2).contiguous()
queries_per_head = queries_per_head.view(batch_size * num_heads, timesteps, int(self._attention_dim/num_heads))
# Shape (num_heads * batch_size, timesteps, attention_dim / num_heads)
keys_per_head = keys.view(batch_size, timesteps, num_heads, int(self._attention_dim/num_heads))
keys_per_head = keys_per_head.transpose(1, 2).contiguous()
keys_per_head = keys_per_head.view(batch_size * num_heads, timesteps, int(self._attention_dim/num_heads))
# shape (num_heads * batch_size, timesteps, timesteps)
scaled_similarities = torch.bmm(queries_per_head, keys_per_head.transpose(1, 2)) / self._scale
# shape (num_heads * batch_size, timesteps, timesteps)
# Normalise the distributions, using the same mask for all heads.
attention = last_dim_softmax(scaled_similarities, mask.repeat(1, num_heads).view(batch_size * num_heads, timesteps))
attention = self._attention_dropout(attention)
# Take a weighted sum of the values with respect to the attention
# distributions for each element in the num_heads * batch_size dimension.
# shape (num_heads * batch_size, timesteps, values_dim/num_heads)
outputs = weighted_sum(values_per_head, attention)
# Reshape back to original shape (batch_size, timesteps, values_dim)
# shape (batch_size, num_heads, timesteps, values_dim/num_heads)
outputs = outputs.view(batch_size, num_heads, timesteps, int(self._values_dim / num_heads))
# shape (batch_size, timesteps, num_heads, values_dim/num_heads)
outputs = outputs.transpose(1, 2).contiguous()
# shape (batch_size, timesteps, values_dim)
outputs = outputs.view(batch_size, timesteps, self._values_dim)
# Project back to original input size.
# shape (batch_size, timesteps, input_size)
outputs = self._output_projection(outputs)
return outputs