当前位置: 首页>>代码示例>>Python>>正文


Python util.add_sentence_boundary_token_ids方法代码示例

本文整理汇总了Python中allennlp.nn.util.add_sentence_boundary_token_ids方法的典型用法代码示例。如果您正苦于以下问题:Python util.add_sentence_boundary_token_ids方法的具体用法?Python util.add_sentence_boundary_token_ids怎么用?Python util.add_sentence_boundary_token_ids使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.nn.util的用法示例。


在下文中一共展示了util.add_sentence_boundary_token_ids方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_add_sentence_boundary_token_ids_handles_3D_input

# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_3D_input(self):
        tensor = torch.from_numpy(
            numpy.array(
                [
                    [[1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2]],
                    [[4, 3, 2, 1], [8, 7, 6, 5], [0, 0, 0, 0]],
                ]
            )
        )
        mask = (tensor > 0).sum(dim=-1) > 0
        bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
        eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array(
            [
                [[9, 9, 9, 9], [1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2], [10, 10, 10, 10]],
                [[9, 9, 9, 9], [4, 3, 2, 1], [8, 7, 6, 5], [10, 10, 10, 10], [0, 0, 0, 0]],
            ]
        )
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all() 
开发者ID:allenai,项目名称:allennlp,代码行数:23,代码来源:util_test.py

示例2: test_add_sentence_boundary_token_ids_handles_3D_input

# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_3D_input(self):
        tensor = torch.from_numpy(
                numpy.array([[[1, 2, 3, 4],
                              [5, 5, 5, 5],
                              [6, 8, 1, 2]],
                             [[4, 3, 2, 1],
                              [8, 7, 6, 5],
                              [0, 0, 0, 0]]]))
        mask = ((tensor > 0).sum(dim=-1) > 0).type(torch.LongTensor)
        bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
        eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[[9, 9, 9, 9],
                                            [1, 2, 3, 4],
                                            [5, 5, 5, 5],
                                            [6, 8, 1, 2],
                                            [10, 10, 10, 10]],
                                           [[9, 9, 9, 9],
                                            [4, 3, 2, 1],
                                            [8, 7, 6, 5],
                                            [10, 10, 10, 10],
                                            [0, 0, 0, 0]]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all() 
开发者ID:plasticityai,项目名称:magnitude,代码行数:26,代码来源:util_test.py

示例3: test_add_sentence_boundary_token_ids_handles_2D_input

# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_2D_input(self):
        tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
        mask = tensor > 0
        bos = 9
        eos = 10
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[9, 1, 2, 3, 10], [9, 4, 5, 10, 0]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all() 
开发者ID:allenai,项目名称:allennlp,代码行数:11,代码来源:util_test.py

示例4: test_add_sentence_boundary_token_ids_handles_2D_input

# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def test_add_sentence_boundary_token_ids_handles_2D_input(self):
        tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
        mask = (tensor > 0).long()
        bos = 9
        eos = 10
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[9, 1, 2, 3, 10],
                                           [9, 4, 5, 10, 0]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all() 
开发者ID:plasticityai,项目名称:magnitude,代码行数:12,代码来源:util_test.py

示例5: forward

# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import add_sentence_boundary_token_ids [as 别名]
def forward(
        self,
        source_tokens: torch.LongTensor,
        target_tokens: Optional[torch.LongTensor] = None,
        decoding_strategy: str = "sampling",
    ) -> Dict[str, torch.Tensor]:

        r"""
        Override AllenNLP's forward, changing decoder logic. Perform either categorical sampling
        or greedy decoding as per specified.

        Parameters
        ----------
        source_tokens: torch.LongTensor
            Tokenized source sequences padded to maximum length. These are not padded with
            @start@ and @end@ sentence boundaries. Shape: (batch_size, max_source_length)
        target_tokens: torch.LongTensor, optional (default = None)
            Tokenized target sequences padded to maximum length. These are not padded with
            @start@ and @end@ sentence boundaries. Shape: (batch_size, max_target_length)
        decoding_strategy: str, optional (default = "sampling")
            How to perform decoding? One of "sampling" or "greedy".

        Returns
        -------
        Dict[str, torch.Tensor]
        """
        # Add "@start@" and "@end@" tokens to source and target sequences.
        source_tokens, _ = add_sentence_boundary_token_ids(
            source_tokens, (source_tokens != self._pad_index), self._start_index, self._end_index
        )
        if target_tokens is not None:
            target_tokens, _ = add_sentence_boundary_token_ids(
                target_tokens,
                (target_tokens != self._pad_index),
                self._start_index,
                self._end_index,
            )
        # Remove "@start@" from source sequences anyway (it's being encoded).
        source_tokens = {"tokens": source_tokens[:, 1:]}
        if target_tokens is not None:
            target_tokens = {"tokens": target_tokens}

        # _encode and _init_decoder_state are super class methods, left untouched.
        # keys: {"encoder_outputs", "source_mask"}
        state = self._encode(source_tokens)

        # keys: {"encoder_outputs", "source_mask", "decoder_hidden", "decoder_context"}
        state = self._init_decoder_state(state)

        # The `_forward_loop` decodes the input sequence and computes the loss during training
        # and validation.
        # keys: {"predictions", "loss"}
        output_dict = self._forward_loop(state, target_tokens, decoding_strategy)

        return output_dict 
开发者ID:kdexd,项目名称:probnmn-clevr,代码行数:57,代码来源:seq2seq_base.py


注:本文中的allennlp.nn.util.add_sentence_boundary_token_ids方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。