本文整理汇总了Python中torch.LongTensor.repeat方法的典型用法代码示例。如果您正苦于以下问题:Python LongTensor.repeat方法的具体用法?Python LongTensor.repeat怎么用?Python LongTensor.repeat使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.LongTensor
的用法示例。
在下文中一共展示了LongTensor.repeat方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: forward
# 需要导入模块: from torch import LongTensor [as 别名]
# 或者: from torch.LongTensor import repeat [as 别名]
def forward(self, # pylint: disable=arguments-differ
inputs: torch.Tensor,
mask: torch.LongTensor = None) -> torch.FloatTensor:
"""
Parameters
----------
inputs : ``torch.FloatTensor``, required.
A tensor of shape (batch_size, timesteps, input_dim)
mask : ``torch.FloatTensor``, optional (default = None).
A tensor of shape (batch_size, timesteps).
Returns
-------
A tensor of shape (batch_size, timesteps, output_projection_dim),
where output_projection_dim = input_dim by default.
"""
num_heads = self._num_heads
batch_size, timesteps, _ = inputs.size()
if mask is None:
mask = inputs.new_ones(batch_size, timesteps)
# Shape (batch_size, timesteps, 2 * attention_dim + values_dim)
combined_projection = self._combined_projection(inputs)
# split by attention dim - if values_dim > attention_dim, we will get more
# than 3 elements returned. All of the rest are the values vector, so we
# just concatenate them back together again below.
queries, keys, *values = combined_projection.split(self._attention_dim, -1)
queries = queries.contiguous()
keys = keys.contiguous()
values = torch.cat(values, -1).contiguous()
# Shape (num_heads * batch_size, timesteps, values_dim / num_heads)
values_per_head = values.view(batch_size, timesteps, num_heads, int(self._values_dim/num_heads))
values_per_head = values_per_head.transpose(1, 2).contiguous()
values_per_head = values_per_head.view(batch_size * num_heads, timesteps, int(self._values_dim/num_heads))
# Shape (num_heads * batch_size, timesteps, attention_dim / num_heads)
queries_per_head = queries.view(batch_size, timesteps, num_heads, int(self._attention_dim/num_heads))
queries_per_head = queries_per_head.transpose(1, 2).contiguous()
queries_per_head = queries_per_head.view(batch_size * num_heads, timesteps, int(self._attention_dim/num_heads))
# Shape (num_heads * batch_size, timesteps, attention_dim / num_heads)
keys_per_head = keys.view(batch_size, timesteps, num_heads, int(self._attention_dim/num_heads))
keys_per_head = keys_per_head.transpose(1, 2).contiguous()
keys_per_head = keys_per_head.view(batch_size * num_heads, timesteps, int(self._attention_dim/num_heads))
# shape (num_heads * batch_size, timesteps, timesteps)
scaled_similarities = torch.bmm(queries_per_head, keys_per_head.transpose(1, 2)) / self._scale
# shape (num_heads * batch_size, timesteps, timesteps)
# Normalise the distributions, using the same mask for all heads.
attention = last_dim_softmax(scaled_similarities, mask.repeat(1, num_heads).view(batch_size * num_heads, timesteps))
attention = self._attention_dropout(attention)
# Take a weighted sum of the values with respect to the attention
# distributions for each element in the num_heads * batch_size dimension.
# shape (num_heads * batch_size, timesteps, values_dim/num_heads)
outputs = weighted_sum(values_per_head, attention)
# Reshape back to original shape (batch_size, timesteps, values_dim)
# shape (batch_size, num_heads, timesteps, values_dim/num_heads)
outputs = outputs.view(batch_size, num_heads, timesteps, int(self._values_dim / num_heads))
# shape (batch_size, timesteps, num_heads, values_dim/num_heads)
outputs = outputs.transpose(1, 2).contiguous()
# shape (batch_size, timesteps, values_dim)
outputs = outputs.view(batch_size, timesteps, self._values_dim)
# Project back to original input size.
# shape (batch_size, timesteps, input_size)
outputs = self._output_projection(outputs)
return outputs