本文整理汇总了Python中torch.tril方法的典型用法代码示例。如果您正苦于以下问题:Python torch.tril方法的具体用法?Python torch.tril怎么用?Python torch.tril使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch
的用法示例。
在下文中一共展示了torch.tril方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, nx, n_ctx, cfg, scale=False):
super(Attention, self).__init__()
n_state = nx # in Attention: n_state=768 (nx=n_embd)
assert n_state % cfg.nH == 0
self.register_buffer('b', torch.tril(torch.ones(
n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = cfg.nH
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, 1, nx)
self.c_proj = Conv1D(n_state, 1, nx)
self.attn_dropout = nn.Dropout(cfg.adpt)
self.resid_dropout = nn.Dropout(cfg.rdpt)
# dimensions of w: (batch_size x num_heads x seq_length x seq_length)
示例2: select_merge_data
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def select_merge_data(self, u_feas, label, label_to_images, ratio_n, dists):
dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas))))#blocking the triangle
cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))])
dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1)) # dist += |A|+|B|
for idx in range(len(u_feas)):
for j in range(idx + 1, len(u_feas)):
if label[idx] == label[j]:
dists[idx, j] = 100000 # set the distance within the same cluster
dists = dists.numpy()
ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape) # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension
idx1 = ind[0] # the first dimension index
idx2 = ind[1] # the second dimension index
return idx1, idx2
示例3: select_merge_data_v2
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def select_merge_data_v2(self, u_feas, labels, linkages):
linkages+=(np.tril(100000 * np.ones((len(u_feas), len(u_feas))))) # blocking the triangle
print('Linkage adding')
for idx in range(len(u_feas)):
for j in range(idx + 1, len(u_feas)):
if labels[idx] == labels[j]:
linkages[idx, j] = 100000 # set the distance within the same cluster
ind = np.unravel_index(np.argsort(linkages, axis=None),
linkages.shape) # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension
idx1 = ind[0] # the first cluster index
idx2 = ind[1] # the second cluster index
print('Linkage add finished')
return idx1, idx2
#after
示例4: select_merge_data
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def select_merge_data(self, u_feas, label, label_to_images, ratio_n, dists):
dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas))))
cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))])
dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1))
for idx in range(len(u_feas)):
for j in range(idx + 1, len(u_feas)):
if label[idx] == label[j]:
dists[idx, j] = 100000
dists = dists.numpy()
ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape)
idx1 = ind[0]
idx2 = ind[1]
return idx1, idx2
示例5: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self,
nx: int,
n_ctx: int,
config: TransformerConfig,
scale: bool = False) -> None:
super().__init__()
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.num_heads == 0
self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = config.num_heads
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, 1, nx)
self.c_proj = Conv1D(n_state, 1, nx)
self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability)
self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability)
示例6: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, num_inputs):
super(LUInvertibleMM, self).__init__()
self.W = torch.Tensor(num_inputs, num_inputs)
nn.init.orthogonal_(self.W)
self.L_mask = torch.tril(torch.ones(self.W.size()), -1)
self.U_mask = self.L_mask.t().clone()
P, L, U = sp.linalg.lu(self.W.numpy())
self.P = torch.from_numpy(P)
self.L = nn.Parameter(torch.from_numpy(L))
self.U = nn.Parameter(torch.from_numpy(U))
S = np.diag(U)
sign_S = np.sign(S)
log_S = np.log(abs(S))
self.sign_S = torch.from_numpy(sign_S)
self.log_S = nn.Parameter(torch.from_numpy(log_S))
self.I = torch.eye(self.L.size(0))
示例7: cumulative_average_mask
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def cumulative_average_mask(self, batch_size, inputs_len):
"""
Builds the mask to compute the cumulative average as described in
https://arxiv.org/abs/1805.00631 -- Figure 3
Args:
batch_size (int): batch size
inputs_len (int): length of the inputs
Returns:
(`FloatTensor`):
* A Tensor of shape `[batch_size x input_len x input_len]`
"""
triangle = torch.tril(torch.ones(inputs_len, inputs_len))
weights = torch.ones(1, inputs_len) / torch.arange(
1, inputs_len + 1, dtype=torch.float)
mask = triangle * weights.transpose(0, 1)
return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len)
示例8: apply_masks
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def apply_masks(scores, batch_size, unseen_mask, src_lengths):
seq_len = scores.shape[-1]
# [1, seq_len, seq_len]
sequence_mask = torch.ones(seq_len, seq_len).unsqueeze(0).int()
if unseen_mask:
# [1, seq_len, seq_len]
sequence_mask = (
torch.tril(torch.ones(seq_len, seq_len), diagonal=0).unsqueeze(0).int()
)
if src_lengths is not None:
# [batch_size, 1, seq_len]
src_lengths_mask = create_src_lengths_mask(
batch_size=batch_size, src_lengths=src_lengths
).unsqueeze(-2)
# [batch_size, seq_len, seq_len]
sequence_mask = sequence_mask & src_lengths_mask
# [batch_size, 1, seq_len, seq_len]
sequence_mask = sequence_mask.unsqueeze(1)
scores = scores.masked_fill(sequence_mask == 0, -np.inf)
return scores
示例9: buffered_future_mask
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def buffered_future_mask(self, tensor):
"""attend all surounding words except itself
[[0, -inf, 0]
[0, 0, -inf]
[0, 0, 0]]
The attention map is not ture diagonal since we predict y_{t+1} at time-step t
"""
dim = tensor.size(0)
if (
not hasattr(self, "_future_mask")
or self._future_mask is None
or self._future_mask.device != tensor.device
):
self._future_mask = torch.triu(
utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
)
self._future_mask = torch.tril(self._future_mask, 1)
if self._future_mask.size(0) < dim:
self._future_mask = torch.triu(
utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
)
self._future_mask = torch.tril(self._future_mask, 1)
return self._future_mask[:dim, :dim]
示例10: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, nx, n_ctx, config, scale=False):
super(Attention, self).__init__()
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.n_head == 0
self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = config.n_head
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, nx)
self.c_proj = Conv1D(n_state, nx)
self.attn_dropout = nn.Dropout(config.attn_pdrop)
self.resid_dropout = nn.Dropout(config.resid_pdrop)
self.pruned_heads = set()
示例11: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, nx, n_ctx, config, scale=False):
super(Attention, self).__init__()
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.n_head == 0
self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = config.n_head
self.split_size = n_state
self.scale = scale
self.output_attentions = config.output_attentions
self.c_attn = Conv1D(n_state * 3, nx)
self.c_proj = Conv1D(n_state, nx)
self.attn_dropout = nn.Dropout(config.attn_pdrop)
self.resid_dropout = nn.Dropout(config.resid_pdrop)
示例12: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, nx, n_ctx, config, scale=False):
super(Attention, self).__init__()
self.output_attentions = config.output_attentions
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.n_head == 0
self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = config.n_head
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, nx)
self.c_proj = Conv1D(n_state, nx)
self.attn_dropout = nn.Dropout(config.attn_pdrop)
self.resid_dropout = nn.Dropout(config.resid_pdrop)
示例13: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, height, width, nb_channel, nb_head, scale=False):
super(RelationalMHDPA, self).__init__()
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert nb_channel % nb_head == 0
seq_len = height * width
self.register_buffer(
"b",
torch.tril(torch.ones(seq_len, seq_len)).view(
1, 1, seq_len, seq_len
),
)
self.nb_head = nb_head
self.split_size = nb_channel
self.scale = scale
self.projection = nn.Linear(nb_channel, nb_channel * 3)
self.mlp = nn.Linear(nb_channel, nb_channel)
示例14: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self, nx, n_ctx, config, scale=False):
super().__init__()
self.output_attentions = config.output_attentions
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.n_head == 0
self.register_buffer(
"bias", torch.tril(torch.ones((n_ctx, n_ctx), dtype=torch.uint8)).view(1, 1, n_ctx, n_ctx)
)
self.register_buffer("masked_bias", torch.tensor(-1e4))
self.n_head = config.n_head
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, nx)
self.c_proj = Conv1D(n_state, nx)
self.attn_dropout = nn.Dropout(config.attn_pdrop)
self.resid_dropout = nn.Dropout(config.resid_pdrop)
self.pruned_heads = set()
示例15: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import tril [as 别名]
def __init__(self,
nx: int,
n_ctx: int,
config: TransformerConfig,
scale: bool = False) -> None:
super().__init__()
self.nx = nx
n_state = nx # in Attention: n_state=768 (nx=n_embd)
# [switch nx => n_state from Block to Attention to keep identical to TF implem]
assert n_state % config.num_heads == 0
self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
self.n_head = config.num_heads
self.split_size = n_state
self.scale = scale
self.c_attn = Conv1D(n_state * 3, 1, nx)
self.c_proj = Conv1D(n_state, 1, nx)
self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability)
self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability)