当前位置: 首页>>代码示例>>Python>>正文


Python util.pad_sequence_to_length方法代码示例

本文整理汇总了Python中allennlp.common.util.pad_sequence_to_length方法的典型用法代码示例。如果您正苦于以下问题:Python util.pad_sequence_to_length方法的具体用法?Python util.pad_sequence_to_length怎么用?Python util.pad_sequence_to_length使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.common.util的用法示例。


在下文中一共展示了util.pad_sequence_to_length方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: as_padded_tensor_dict

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor_dict(
        self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]
    ) -> Dict[str, torch.Tensor]:
        """
        This method pads a list of tokens given the input padding lengths (which could actually
        truncate things, depending on settings) and returns that padded list of input tokens as a
        `Dict[str, torch.Tensor]`.  This is a dictionary because there should be one key per
        argument that the `TokenEmbedder` corresponding to this class expects in its `forward()`
        method (where the argument name in the `TokenEmbedder` needs to make the key in this
        dictionary).

        The base class implements the case when all you want to do is create a padded `LongTensor`
        for every list in the `tokens` dictionary.  If your `TokenIndexer` needs more complex
        logic than that, you need to override this method.
        """
        tensor_dict = {}
        for key, val in tokens.items():
            if val and isinstance(val[0], bool):
                tensor = torch.BoolTensor(
                    pad_sequence_to_length(val, padding_lengths[key], default_value=lambda: False)
                )
            else:
                tensor = torch.LongTensor(pad_sequence_to_length(val, padding_lengths[key]))
            tensor_dict[key] = tensor
        return tensor_dict 
开发者ID:allenai,项目名称:allennlp,代码行数:27,代码来源:token_indexer.py

示例2: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self,
                  padding_lengths                ,
                  cuda_device      = -1)             :
        padded_field_list = pad_sequence_to_length(self.field_list,
                                                   padding_lengths[u'num_fields'],
                                                   self.field_list[0].empty_field)
        # Here we're removing the scoping on the padding length keys that we added in
        # `get_padding_lengths`; see the note there for more detail.
        child_padding_lengths = dict((key.replace(u'list_', u'', 1), value)
                                 for key, value in list(padding_lengths.items())
                                 if key.startswith(u'list_'))
        padded_fields = [field.as_tensor(child_padding_lengths, cuda_device)
                         for field in padded_field_list]
        return self.field_list[0].batch_tensors(padded_fields)

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:18,代码来源:list_field.py

示例3: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]:
        text_tensors = self._entity_text_field.as_tensor(padding_lengths)
        padded_linking_features = util.pad_sequence_to_length(
            self.linking_features, padding_lengths["num_entities"], default_value=lambda: []
        )
        padded_linking_arrays = []

        def default_feature_value():
            return [0.0] * len(self._feature_extractors)

        for linking_features in padded_linking_features:
            padded_features = util.pad_sequence_to_length(
                linking_features,
                padding_lengths["num_utterance_tokens"],
                default_value=default_feature_value,
            )
            padded_linking_arrays.append(padded_features)
        linking_features_tensor = torch.FloatTensor(padded_linking_arrays)
        return {"text": text_tensors, "linking": linking_features_tensor} 
开发者ID:allenai,项目名称:allennlp-semparse,代码行数:21,代码来源:knowledge_graph_field.py

示例4: pad_token_sequence

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
                           tokens: Dict[str, List[int]],
                           desired_num_tokens: Dict[str, int],
                           padding_lengths: Dict[str, int]) -> Dict[str, List[int]]:  # pylint: disable=unused-argument
        return {key: pad_sequence_to_length(val, desired_num_tokens[key])
                for key, val in tokens.items()} 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:8,代码来源:byte_pair_indexer.py

示例5: as_padded_tensor_dict

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor_dict(
        self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]
    ) -> Dict[str, torch.Tensor]:
        # Pad the tokens.
        padded_tokens = pad_sequence_to_length(
            tokens["token_characters"],
            padding_lengths["token_characters"],
            default_value=lambda: [],
        )

        # Pad the characters within the tokens.
        desired_token_length = padding_lengths["num_token_characters"]
        longest_token: List[int] = max(tokens["token_characters"], key=len, default=[])  # type: ignore
        padding_value = 0
        if desired_token_length > len(longest_token):
            # Since we want to pad to greater than the longest token, we add a
            # "dummy token" so we can take advantage of the fast implementation of itertools.zip_longest.
            padded_tokens.append([padding_value] * desired_token_length)
        # pad the list of lists to the longest sublist, appending 0's
        padded_tokens = list(zip(*itertools.zip_longest(*padded_tokens, fillvalue=padding_value)))
        if desired_token_length > len(longest_token):
            # Removes the "dummy token".
            padded_tokens.pop()
        # Truncates all the tokens to the desired length, and return the result.
        return {
            "token_characters": torch.LongTensor(
                [list(token[:desired_token_length]) for token in padded_tokens]
            )
        } 
开发者ID:allenai,项目名称:allennlp,代码行数:31,代码来源:token_characters_indexer.py

示例6: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
        desired_length = padding_lengths["num_tokens"]
        padded_tokens = pad_sequence_to_length(self._mapping_array, desired_length)
        tensor = torch.LongTensor(padded_tokens)
        return tensor 
开发者ID:allenai,项目名称:allennlp,代码行数:7,代码来源:namespace_swapping_field.py

示例7: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> DataArray:
        padded_field_list = pad_sequence_to_length(
            self.field_list, padding_lengths["num_fields"], self.field_list[0].empty_field
        )
        # Here we're removing the scoping on the padding length keys that we added in
        # `get_padding_lengths`; see the note there for more detail.
        child_padding_lengths = {
            key.replace("list_", "", 1): value
            for key, value in padding_lengths.items()
            if key.startswith("list_")
        }
        padded_fields = [field.as_tensor(child_padding_lengths) for field in padded_field_list]
        return self.field_list[0].batch_tensors(padded_fields) 
开发者ID:allenai,项目名称:allennlp,代码行数:15,代码来源:list_field.py

示例8: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
        desired_num_tokens = padding_lengths["num_tokens"]
        padded_tags = pad_sequence_to_length(self._indexed_labels, desired_num_tokens)
        tensor = torch.LongTensor(padded_tags)
        return tensor 
开发者ID:allenai,项目名称:allennlp,代码行数:7,代码来源:sequence_label_field.py

示例9: as_array

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_array(self, padding_lengths: Dict[str, int]) -> numpy.array:
        padded_features = pad_sequence_to_length(self.features,
                                                 padding_lengths['num_features'],
                                                 (lambda: math.nan))
        return numpy.asarray(padded_features, dtype=numpy.float32) 
开发者ID:allenai,项目名称:scitail,代码行数:7,代码来源:features_field.py

示例10: as_padded_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor(
            self,
            tokens: Dict[str, List[int]],
            desired_num_tokens: Dict[str, int],
            padding_lengths: Dict[str, int],
    ) -> Dict[str, torch.Tensor]:
        return {
            key: torch.LongTensor(pad_sequence_to_length(val, desired_num_tokens[key]))
            for key, val in tokens.items()
        } 
开发者ID:Hyperparticle,项目名称:udify,代码行数:12,代码来源:bert_pretrained.py

示例11: pad_token_sequence

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
                           tokens                            ,
                           desired_num_tokens                ,
                           padding_lengths                )                              :
        # pylint: disable=unused-argument
        return dict((key, pad_sequence_to_length(val, desired_num_tokens[key],
                                            default_value=self._default_value_for_padding))
                for key, val in list(tokens.items())) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:10,代码来源:elmo_indexer.py

示例12: pad_token_sequence

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
                           tokens                      ,
                           desired_num_tokens                ,
                           padding_lengths                )                        :  # pylint: disable=unused-argument
        return dict((key, pad_sequence_to_length(val, desired_num_tokens[key]))
                for key, val in list(tokens.items())) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:8,代码来源:ner_tag_indexer.py

示例13: pad_token_sequence

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
                           tokens                      ,
                           desired_num_tokens                ,
                           padding_lengths                )                        :  # pylint: disable=unused-argument
        return dict((key, pad_sequence_to_length(val, desired_num_tokens[key]))
                for key, val in tokens.items()) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:8,代码来源:openai_transformer_byte_pair_indexer.py

示例14: pad_token_sequence

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
                           tokens                            ,
                           desired_num_tokens                ,
                           padding_lengths                )                              :
        # Pad the tokens.
        # tokens has only one key...
        key = list(tokens.keys())[0]

        padded_tokens = pad_sequence_to_length(
                tokens[key], desired_num_tokens[key],
                default_value=self.get_padding_token
        )

        # Pad the characters within the tokens.
        desired_token_length = padding_lengths[u'num_token_characters']
        longest_token            = max(tokens[key], key=len, default=[])
        padding_value = 0
        if desired_token_length > len(longest_token):
            # Since we want to pad to greater than the longest token, we add a
            # "dummy token" so we can take advantage of the fast implementation of itertools.zip_longest.
            padded_tokens.append([padding_value] * desired_token_length)
        # pad the list of lists to the longest sublist, appending 0's
        padded_tokens = list(izip(*itertools.zip_longest(*padded_tokens, fillvalue=padding_value)))
        if desired_token_length > len(longest_token):
            # Removes the "dummy token".
            padded_tokens.pop()
        # Truncates all the tokens to the desired length, and return the result.
        return {key: [list(token[:desired_token_length]) for token in padded_tokens]} 
开发者ID:plasticityai,项目名称:magnitude,代码行数:30,代码来源:token_characters_indexer.py

示例15: as_tensor

# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self,
                  padding_lengths                ,
                  cuda_device      = -1)                           :
        tensors = {}
        desired_num_entities = padding_lengths[u'num_entities']
        desired_num_entity_tokens = padding_lengths[u'num_entity_tokens']
        desired_num_utterance_tokens = padding_lengths[u'num_utterance_tokens']
        for indexer_name, indexer in list(self._token_indexers.items()):
            padded_entities = util.pad_sequence_to_length(self._indexed_entity_texts[indexer_name],
                                                          desired_num_entities,
                                                          default_value=lambda: [])
            padded_arrays = []
            for padded_entity in padded_entities:
                padded_array = indexer.pad_token_sequence({u'key': padded_entity},
                                                          {u'key': desired_num_entity_tokens},
                                                          padding_lengths)[u'key']
                padded_arrays.append(padded_array)
            tensor = torch.LongTensor(padded_arrays)
            tensors[indexer_name] = tensor if cuda_device == -1 else tensor.cuda(cuda_device)
        padded_linking_features = util.pad_sequence_to_length(self.linking_features,
                                                              desired_num_entities,
                                                              default_value=lambda: [])
        padded_linking_arrays = []
        default_feature_value = lambda: [0.0] * len(self._feature_extractors)
        for linking_features in padded_linking_features:
            padded_features = util.pad_sequence_to_length(linking_features,
                                                          desired_num_utterance_tokens,
                                                          default_value=default_feature_value)
            padded_linking_arrays.append(padded_features)
        linking_features_tensor = torch.FloatTensor(padded_linking_arrays)
        if cuda_device != -1:
            linking_features_tensor = linking_features_tensor.cuda(cuda_device)
        return {u'text': tensors, u'linking': linking_features_tensor} 
开发者ID:plasticityai,项目名称:magnitude,代码行数:35,代码来源:knowledge_graph_field.py


注:本文中的allennlp.common.util.pad_sequence_to_length方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。