本文整理汇总了Python中allennlp.nn.util.add_sentence_boundary_token_ids方法的典型用法代码示例。如果您正苦于以下问题:Python util.add_sentence_boundary_token_ids方法的具体用法?Python util.add_sentence_boundary_token_ids怎么用?Python util.add_sentence_boundary_token_ids使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.nn.util
的用法示例。
在下文中一共展示了util.add_sentence_boundary_token_ids方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_add_sentence_boundary_token_ids_handles_3D_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_3D_input(self):
tensor = torch.from_numpy(
numpy.array(
[
[[1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2]],
[[4, 3, 2, 1], [8, 7, 6, 5], [0, 0, 0, 0]],
]
)
)
mask = (tensor > 0).sum(dim=-1) > 0
bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
expected_new_tensor = numpy.array(
[
[[9, 9, 9, 9], [1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2], [10, 10, 10, 10]],
[[9, 9, 9, 9], [4, 3, 2, 1], [8, 7, 6, 5], [10, 10, 10, 10], [0, 0, 0, 0]],
]
)
assert (new_tensor.data.numpy() == expected_new_tensor).all()
assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all()
示例2: test_add_sentence_boundary_token_ids_handles_3D_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_3D_input(self):
tensor = torch.from_numpy(
numpy.array([[[1, 2, 3, 4],
[5, 5, 5, 5],
[6, 8, 1, 2]],
[[4, 3, 2, 1],
[8, 7, 6, 5],
[0, 0, 0, 0]]]))
mask = ((tensor > 0).sum(dim=-1) > 0).type(torch.LongTensor)
bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
expected_new_tensor = numpy.array([[[9, 9, 9, 9],
[1, 2, 3, 4],
[5, 5, 5, 5],
[6, 8, 1, 2],
[10, 10, 10, 10]],
[[9, 9, 9, 9],
[4, 3, 2, 1],
[8, 7, 6, 5],
[10, 10, 10, 10],
[0, 0, 0, 0]]])
assert (new_tensor.data.numpy() == expected_new_tensor).all()
assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all()
示例3: test_add_sentence_boundary_token_ids_handles_2D_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_2D_input(self):
tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
mask = tensor > 0
bos = 9
eos = 10
new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
expected_new_tensor = numpy.array([[9, 1, 2, 3, 10], [9, 4, 5, 10, 0]])
assert (new_tensor.data.numpy() == expected_new_tensor).all()
assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all()
示例4: test_add_sentence_boundary_token_ids_handles_2D_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_2D_input(self):
tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
mask = (tensor > 0).long()
bos = 9
eos = 10
new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
expected_new_tensor = numpy.array([[9, 1, 2, 3, 10],
[9, 4, 5, 10, 0]])
assert (new_tensor.data.numpy() == expected_new_tensor).all()
assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all()
示例5: forward
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def forward(
self,
source_tokens: torch.LongTensor,
target_tokens: Optional[torch.LongTensor] = None,
decoding_strategy: str = "sampling",
) -> Dict[str, torch.Tensor]:
r"""
Override AllenNLP's forward, changing decoder logic. Perform either categorical sampling
or greedy decoding as per specified.
Parameters
----------
source_tokens: torch.LongTensor
Tokenized source sequences padded to maximum length. These are not padded with
@start@ and @end@ sentence boundaries. Shape: (batch_size, max_source_length)
target_tokens: torch.LongTensor, optional (default = None)
Tokenized target sequences padded to maximum length. These are not padded with
@start@ and @end@ sentence boundaries. Shape: (batch_size, max_target_length)
decoding_strategy: str, optional (default = "sampling")
How to perform decoding? One of "sampling" or "greedy".
Returns
-------
Dict[str, torch.Tensor]
"""
# Add "@start@" and "@end@" tokens to source and target sequences.
source_tokens, _ = add_sentence_boundary_token_ids(
source_tokens, (source_tokens != self._pad_index), self._start_index, self._end_index
)
if target_tokens is not None:
target_tokens, _ = add_sentence_boundary_token_ids(
target_tokens,
(target_tokens != self._pad_index),
self._start_index,
self._end_index,
)
# Remove "@start@" from source sequences anyway (it's being encoded).
source_tokens = {"tokens": source_tokens[:, 1:]}
if target_tokens is not None:
target_tokens = {"tokens": target_tokens}
# _encode and _init_decoder_state are super class methods, left untouched.
# keys: {"encoder_outputs", "source_mask"}
state = self._encode(source_tokens)
# keys: {"encoder_outputs", "source_mask", "decoder_hidden", "decoder_context"}
state = self._init_decoder_state(state)
# The `_forward_loop` decodes the input sequence and computes the loss during training
# and validation.
# keys: {"predictions", "loss"}
output_dict = self._forward_loop(state, target_tokens, decoding_strategy)
return output_dict