本文整理汇总了Python中allennlp.common.util.pad_sequence_to_length方法的典型用法代码示例。如果您正苦于以下问题:Python util.pad_sequence_to_length方法的具体用法?Python util.pad_sequence_to_length怎么用?Python util.pad_sequence_to_length使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.common.util
的用法示例。
在下文中一共展示了util.pad_sequence_to_length方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: as_padded_tensor_dict
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor_dict(
self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]
) -> Dict[str, torch.Tensor]:
"""
This method pads a list of tokens given the input padding lengths (which could actually
truncate things, depending on settings) and returns that padded list of input tokens as a
`Dict[str, torch.Tensor]`. This is a dictionary because there should be one key per
argument that the `TokenEmbedder` corresponding to this class expects in its `forward()`
method (where the argument name in the `TokenEmbedder` needs to make the key in this
dictionary).
The base class implements the case when all you want to do is create a padded `LongTensor`
for every list in the `tokens` dictionary. If your `TokenIndexer` needs more complex
logic than that, you need to override this method.
"""
tensor_dict = {}
for key, val in tokens.items():
if val and isinstance(val[0], bool):
tensor = torch.BoolTensor(
pad_sequence_to_length(val, padding_lengths[key], default_value=lambda: False)
)
else:
tensor = torch.LongTensor(pad_sequence_to_length(val, padding_lengths[key]))
tensor_dict[key] = tensor
return tensor_dict
示例2: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self,
padding_lengths ,
cuda_device = -1) :
padded_field_list = pad_sequence_to_length(self.field_list,
padding_lengths[u'num_fields'],
self.field_list[0].empty_field)
# Here we're removing the scoping on the padding length keys that we added in
# `get_padding_lengths`; see the note there for more detail.
child_padding_lengths = dict((key.replace(u'list_', u'', 1), value)
for key, value in list(padding_lengths.items())
if key.startswith(u'list_'))
padded_fields = [field.as_tensor(child_padding_lengths, cuda_device)
for field in padded_field_list]
return self.field_list[0].batch_tensors(padded_fields)
#overrides
示例3: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]:
text_tensors = self._entity_text_field.as_tensor(padding_lengths)
padded_linking_features = util.pad_sequence_to_length(
self.linking_features, padding_lengths["num_entities"], default_value=lambda: []
)
padded_linking_arrays = []
def default_feature_value():
return [0.0] * len(self._feature_extractors)
for linking_features in padded_linking_features:
padded_features = util.pad_sequence_to_length(
linking_features,
padding_lengths["num_utterance_tokens"],
default_value=default_feature_value,
)
padded_linking_arrays.append(padded_features)
linking_features_tensor = torch.FloatTensor(padded_linking_arrays)
return {"text": text_tensors, "linking": linking_features_tensor}
示例4: pad_token_sequence
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
tokens: Dict[str, List[int]],
desired_num_tokens: Dict[str, int],
padding_lengths: Dict[str, int]) -> Dict[str, List[int]]: # pylint: disable=unused-argument
return {key: pad_sequence_to_length(val, desired_num_tokens[key])
for key, val in tokens.items()}
示例5: as_padded_tensor_dict
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor_dict(
self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]
) -> Dict[str, torch.Tensor]:
# Pad the tokens.
padded_tokens = pad_sequence_to_length(
tokens["token_characters"],
padding_lengths["token_characters"],
default_value=lambda: [],
)
# Pad the characters within the tokens.
desired_token_length = padding_lengths["num_token_characters"]
longest_token: List[int] = max(tokens["token_characters"], key=len, default=[]) # type: ignore
padding_value = 0
if desired_token_length > len(longest_token):
# Since we want to pad to greater than the longest token, we add a
# "dummy token" so we can take advantage of the fast implementation of itertools.zip_longest.
padded_tokens.append([padding_value] * desired_token_length)
# pad the list of lists to the longest sublist, appending 0's
padded_tokens = list(zip(*itertools.zip_longest(*padded_tokens, fillvalue=padding_value)))
if desired_token_length > len(longest_token):
# Removes the "dummy token".
padded_tokens.pop()
# Truncates all the tokens to the desired length, and return the result.
return {
"token_characters": torch.LongTensor(
[list(token[:desired_token_length]) for token in padded_tokens]
)
}
示例6: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
desired_length = padding_lengths["num_tokens"]
padded_tokens = pad_sequence_to_length(self._mapping_array, desired_length)
tensor = torch.LongTensor(padded_tokens)
return tensor
示例7: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> DataArray:
padded_field_list = pad_sequence_to_length(
self.field_list, padding_lengths["num_fields"], self.field_list[0].empty_field
)
# Here we're removing the scoping on the padding length keys that we added in
# `get_padding_lengths`; see the note there for more detail.
child_padding_lengths = {
key.replace("list_", "", 1): value
for key, value in padding_lengths.items()
if key.startswith("list_")
}
padded_fields = [field.as_tensor(child_padding_lengths) for field in padded_field_list]
return self.field_list[0].batch_tensors(padded_fields)
示例8: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
desired_num_tokens = padding_lengths["num_tokens"]
padded_tags = pad_sequence_to_length(self._indexed_labels, desired_num_tokens)
tensor = torch.LongTensor(padded_tags)
return tensor
示例9: as_array
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_array(self, padding_lengths: Dict[str, int]) -> numpy.array:
padded_features = pad_sequence_to_length(self.features,
padding_lengths['num_features'],
(lambda: math.nan))
return numpy.asarray(padded_features, dtype=numpy.float32)
示例10: as_padded_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_padded_tensor(
self,
tokens: Dict[str, List[int]],
desired_num_tokens: Dict[str, int],
padding_lengths: Dict[str, int],
) -> Dict[str, torch.Tensor]:
return {
key: torch.LongTensor(pad_sequence_to_length(val, desired_num_tokens[key]))
for key, val in tokens.items()
}
示例11: pad_token_sequence
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
tokens ,
desired_num_tokens ,
padding_lengths ) :
# pylint: disable=unused-argument
return dict((key, pad_sequence_to_length(val, desired_num_tokens[key],
default_value=self._default_value_for_padding))
for key, val in list(tokens.items()))
示例12: pad_token_sequence
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
tokens ,
desired_num_tokens ,
padding_lengths ) : # pylint: disable=unused-argument
return dict((key, pad_sequence_to_length(val, desired_num_tokens[key]))
for key, val in list(tokens.items()))
示例13: pad_token_sequence
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
tokens ,
desired_num_tokens ,
padding_lengths ) : # pylint: disable=unused-argument
return dict((key, pad_sequence_to_length(val, desired_num_tokens[key]))
for key, val in tokens.items())
示例14: pad_token_sequence
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def pad_token_sequence(self,
tokens ,
desired_num_tokens ,
padding_lengths ) :
# Pad the tokens.
# tokens has only one key...
key = list(tokens.keys())[0]
padded_tokens = pad_sequence_to_length(
tokens[key], desired_num_tokens[key],
default_value=self.get_padding_token
)
# Pad the characters within the tokens.
desired_token_length = padding_lengths[u'num_token_characters']
longest_token = max(tokens[key], key=len, default=[])
padding_value = 0
if desired_token_length > len(longest_token):
# Since we want to pad to greater than the longest token, we add a
# "dummy token" so we can take advantage of the fast implementation of itertools.zip_longest.
padded_tokens.append([padding_value] * desired_token_length)
# pad the list of lists to the longest sublist, appending 0's
padded_tokens = list(izip(*itertools.zip_longest(*padded_tokens, fillvalue=padding_value)))
if desired_token_length > len(longest_token):
# Removes the "dummy token".
padded_tokens.pop()
# Truncates all the tokens to the desired length, and return the result.
return {key: [list(token[:desired_token_length]) for token in padded_tokens]}
示例15: as_tensor
# 需要导入模块: from allennlp.common import util [as 别名]
# 或者: from allennlp.common.util import pad_sequence_to_length [as 别名]
def as_tensor(self,
padding_lengths ,
cuda_device = -1) :
tensors = {}
desired_num_entities = padding_lengths[u'num_entities']
desired_num_entity_tokens = padding_lengths[u'num_entity_tokens']
desired_num_utterance_tokens = padding_lengths[u'num_utterance_tokens']
for indexer_name, indexer in list(self._token_indexers.items()):
padded_entities = util.pad_sequence_to_length(self._indexed_entity_texts[indexer_name],
desired_num_entities,
default_value=lambda: [])
padded_arrays = []
for padded_entity in padded_entities:
padded_array = indexer.pad_token_sequence({u'key': padded_entity},
{u'key': desired_num_entity_tokens},
padding_lengths)[u'key']
padded_arrays.append(padded_array)
tensor = torch.LongTensor(padded_arrays)
tensors[indexer_name] = tensor if cuda_device == -1 else tensor.cuda(cuda_device)
padded_linking_features = util.pad_sequence_to_length(self.linking_features,
desired_num_entities,
default_value=lambda: [])
padded_linking_arrays = []
default_feature_value = lambda: [0.0] * len(self._feature_extractors)
for linking_features in padded_linking_features:
padded_features = util.pad_sequence_to_length(linking_features,
desired_num_utterance_tokens,
default_value=default_feature_value)
padded_linking_arrays.append(padded_features)
linking_features_tensor = torch.FloatTensor(padded_linking_arrays)
if cuda_device != -1:
linking_features_tensor = linking_features_tensor.cuda(cuda_device)
return {u'text': tensors, u'linking': linking_features_tensor}