本文整理汇总了Python中allennlp.nn.util.get_text_field_mask方法的典型用法代码示例。如果您正苦于以下问题:Python util.get_text_field_mask方法的具体用法?Python util.get_text_field_mask怎么用?Python util.get_text_field_mask使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.nn.util
的用法示例。
在下文中一共展示了util.get_text_field_mask方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: embedd_encode_and_aggregate_text_field
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def embedd_encode_and_aggregate_text_field(question: Dict[str, torch.LongTensor],
text_field_embedder,
embeddings_dropout,
encoder,
aggregation_type,
get_last_states=False):
embedded_question = text_field_embedder(question)
question_mask = get_text_field_mask(question).float()
embedded_question = embeddings_dropout(embedded_question)
encoded_question = encoder(embedded_question, question_mask)
# aggregate sequences to a single item
encoded_question_aggregated = seq2vec_seq_aggregate(encoded_question, question_mask, aggregation_type,
encoder.is_bidirectional(), 1) # bs X d
last_hidden_states = None
if get_last_states:
last_hidden_states = get_final_encoder_states(encoded_question, question_mask, encoder.is_bidirectional())
return encoded_question_aggregated, last_hidden_states
示例2: embed_encode_and_aggregate_text_field_with_feats
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def embed_encode_and_aggregate_text_field_with_feats(question: Dict[str, torch.LongTensor],
text_field_embedder,
embeddings_dropout,
encoder,
aggregation_type,
token_features=None,
get_last_states=False):
embedded_question = text_field_embedder(question)
question_mask = get_text_field_mask(question).float()
embedded_question = embeddings_dropout(embedded_question)
if token_features is not None:
embedded_question = torch.cat([embedded_question, token_features], dim=-1)
encoded_question = encoder(embedded_question, question_mask)
# aggregate sequences to a single item
encoded_question_aggregated = seq2vec_seq_aggregate(encoded_question, question_mask, aggregation_type,
encoder.is_bidirectional(), 1) # bs X d
last_hidden_states = None
if get_last_states:
last_hidden_states = get_final_encoder_states(encoded_question, question_mask, encoder.is_bidirectional())
return encoded_question_aggregated, last_hidden_states
示例3: embed_encode_and_aggregate_text_field_with_feats_only
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def embed_encode_and_aggregate_text_field_with_feats_only(question: Dict[str, torch.LongTensor],
text_field_embedder,
embeddings_dropout,
encoder,
aggregation_type,
token_features=None,
get_last_states=False):
embedded_question = text_field_embedder(question)
question_mask = get_text_field_mask(question).float()
embedded_question = embeddings_dropout(embedded_question)
if token_features is not None:
embedded_question = torch.cat([token_features], dim=-1)
encoded_question = encoder(embedded_question, question_mask)
# aggregate sequences to a single item
encoded_question_aggregated = seq2vec_seq_aggregate(encoded_question, question_mask, aggregation_type,
encoder.is_bidirectional(), 1) # bs X d
last_hidden_states = None
if get_last_states:
last_hidden_states = get_final_encoder_states(encoded_question, question_mask, encoder.is_bidirectional())
return encoded_question_aggregated, last_hidden_states
示例4: test_get_text_field_mask_returns_a_correct_mask
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_a_correct_mask(self):
text_field_tensors = {
"indexer_name": {
"tokens": torch.LongTensor([[3, 4, 5, 0, 0], [1, 2, 0, 0, 0]]),
"token_characters": torch.LongTensor(
[
[[1, 2], [3, 0], [2, 0], [0, 0], [0, 0]],
[[5, 0], [4, 6], [0, 0], [0, 0], [0, 0]],
]
),
}
}
assert_almost_equal(
util.get_text_field_mask(text_field_tensors).long().numpy(),
[[1, 1, 1, 0, 0], [1, 1, 0, 0, 0]],
)
示例5: test_get_text_field_mask_returns_a_correct_mask_custom_padding_id
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_a_correct_mask_custom_padding_id(self):
text_field_tensors = {
"indexer_name": {
"tokens": torch.LongTensor([[3, 4, 5, 9, 9], [1, 2, 9, 9, 9]]),
"token_characters": torch.LongTensor(
[
[[1, 2], [3, 9], [2, 9], [9, 9], [9, 9]],
[[5, 9], [4, 6], [9, 9], [9, 9], [9, 9]],
]
),
}
}
assert_almost_equal(
util.get_text_field_mask(text_field_tensors, padding_id=9).long().numpy(),
[[1, 1, 1, 0, 0], [1, 1, 0, 0, 0]],
)
示例6: test_get_text_field_mask_returns_a_correct_mask_list_field
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_a_correct_mask_list_field(self):
text_field_tensors = {
"indexer_name": {
"list_tokens": torch.LongTensor(
[
[[1, 2], [3, 0], [2, 0], [0, 0], [0, 0]],
[[5, 0], [4, 6], [0, 0], [0, 0], [0, 0]],
]
)
}
}
actual_mask = (
util.get_text_field_mask(text_field_tensors, num_wrapping_dims=1).long().numpy()
)
expected_mask = (text_field_tensors["indexer_name"]["list_tokens"].numpy() > 0).astype(
"int32"
)
assert_almost_equal(actual_mask, expected_mask)
示例7: forward
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def forward(self,
tokens: Dict[str, torch.Tensor],
label: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
mask = get_text_field_mask(tokens)
embedded = self._embedder(tokens)
encoded = self._encoder(embedded, mask)
classified = self._classifier(encoded)
output: Dict[str, torch.Tensor] = {}
output['logits'] = classified
if label is not None:
self._f1(classified, label, mask)
output['loss'] = sequence_cross_entropy_with_logits(classified, label, mask)
return output
示例8: forward
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def forward(self,
tokens: Dict[str, torch.Tensor],
label: torch.Tensor) -> Dict[str, torch.Tensor]:
mask = get_text_field_mask(tokens)
embedded = self._embedder(tokens)
encoded = self._encoder(embedded, mask)
classified = self._classifier(encoded)
viterbi_tags = self._crf.viterbi_tags(classified, mask)
viterbi_tags = [path for path, score in viterbi_tags]
broadcasted = self._broadcast_tags(viterbi_tags, classified)
log_likelihood = self._crf(classified, label, mask)
self._f1(broadcasted, label, mask)
output: Dict[str, torch.Tensor] = {}
output["loss"] = -log_likelihood
return output
示例9: embed_encode_and_aggregate_text_field
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def embed_encode_and_aggregate_text_field(question: Dict[str, torch.LongTensor],
text_field_embedder,
embeddings_dropout,
encoder,
aggregation_type):
"""
Given a batched token ids (2D) runs embeddings lookup with dropout, context encoding and aggregation
:param question:
:param text_field_embedder: The embedder to be used for embedding lookup
:param embeddings_dropout: Dropout
:param encoder: Context encoder
:param aggregation_type: The type of aggregation - max, sum, avg, last
:return:
"""
embedded_question = text_field_embedder(question)
question_mask = get_text_field_mask(question).float()
embedded_question = embeddings_dropout(embedded_question)
encoded_question = encoder(embedded_question, question_mask)
# aggregate sequences to a single item
encoded_question_aggregated = seq2vec_seq_aggregate(encoded_question, question_mask, aggregation_type,
None, 1) # bs X d
return encoded_question_aggregated
示例10: collate_fn
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def collate_fn(data, to_gpu=False):
"""Creates mini-batch tensors
"""
images, instances = zip(*data)
images = torch.stack(images, 0)
batch = Batch(instances)
td = batch.as_tensor_dict()
if 'question' in td:
td['question_mask'] = get_text_field_mask(td['question'], num_wrapping_dims=1)
td['question_tags'][td['question_mask'] == 0] = -2 # Padding
td['answer_mask'] = get_text_field_mask(td['answers'], num_wrapping_dims=1)
td['answer_tags'][td['answer_mask'] == 0] = -2
td['box_mask'] = torch.all(td['boxes'] >= 0, -1).long()
td['images'] = images
# Deprecated
# if to_gpu:
# for k in td:
# if k != 'metadata':
# td[k] = {k2: v.cuda(non_blocking=True) for k2, v in td[k].items()} if isinstance(td[k], dict) else td[k].cuda(
# non_blocking=True)
# # No nested dicts
# for k in sorted(td.keys()):
# if isinstance(td[k], dict):
# for k2 in sorted(td[k].keys()):
# td['{}_{}'.format(k, k2)] = td[k].pop(k2)
# td.pop(k)
return td
示例11: forward
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
"""
# Parameters
inputs : `torch.Tensor`
Shape `(batch_size, timesteps, sequence_length)` of word ids
representing the current batch.
# Returns
`torch.Tensor`
The bag-of-words representations for the input sequence, shape
`(batch_size, vocab_size)`
"""
bag_of_words_vectors = []
mask = get_text_field_mask({"tokens": {"tokens": inputs}})
if self._ignore_oov:
# also mask out positions corresponding to oov
mask &= inputs != self._oov_idx
for document, doc_mask in zip(inputs, mask):
document = torch.masked_select(document, doc_mask)
vec = torch.bincount(document, minlength=self.vocab_size).float()
vec = vec.view(1, -1)
bag_of_words_vectors.append(vec)
bag_of_words_output = torch.cat(bag_of_words_vectors, 0)
if self._projection:
projection = self._projection
bag_of_words_output = projection(bag_of_words_output)
return bag_of_words_output
示例12: test_get_text_field_mask_returns_a_correct_mask_character_only_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_a_correct_mask_character_only_input(self):
text_field_tensors = {
"indexer_name": {
"token_characters": torch.LongTensor(
[
[[1, 2, 3], [3, 0, 1], [2, 1, 0], [0, 0, 0]],
[[5, 5, 5], [4, 6, 0], [0, 0, 0], [0, 0, 0]],
]
)
}
}
assert_almost_equal(
util.get_text_field_mask(text_field_tensors).long().numpy(),
[[1, 1, 1, 0], [1, 1, 0, 0]],
)
示例13: test_get_text_field_mask_returns_mask_key
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_mask_key(self):
text_field_tensors = {
"indexer_name": {
"tokens": torch.LongTensor([[3, 4, 5, 0, 0], [1, 2, 0, 0, 0]]),
"mask": torch.tensor([[False, False, True]]),
}
}
assert_almost_equal(
util.get_text_field_mask(text_field_tensors).long().numpy(), [[0, 0, 1]]
)
示例14: _get_initial_rnn_state
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def _get_initial_rnn_state(self, sentence ):
embedded_input = self._sentence_embedder(sentence)
# (batch_size, sentence_length)
sentence_mask = util.get_text_field_mask(sentence).float()
batch_size = embedded_input.size(0)
# (batch_size, sentence_length, encoder_output_dim)
encoder_outputs = self._dropout(self._encoder(embedded_input, sentence_mask))
final_encoder_output = util.get_final_encoder_states(encoder_outputs,
sentence_mask,
self._encoder.is_bidirectional())
memory_cell = encoder_outputs.new_zeros(batch_size, self._encoder.get_output_dim())
attended_sentence = self._decoder_step.attend_on_sentence(final_encoder_output,
encoder_outputs, sentence_mask)
encoder_outputs_list = [encoder_outputs[i] for i in range(batch_size)]
sentence_mask_list = [sentence_mask[i] for i in range(batch_size)]
initial_rnn_state = []
for i in range(batch_size):
initial_rnn_state.append(RnnState(final_encoder_output[i],
memory_cell[i],
self._first_action_embedding,
attended_sentence[i],
encoder_outputs_list,
sentence_mask_list))
return initial_rnn_state
示例15: test_get_text_field_mask_returns_a_correct_mask
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import get_text_field_mask [as 别名]
def test_get_text_field_mask_returns_a_correct_mask(self):
text_field_tensors = {
u"tokens": torch.LongTensor([[3, 4, 5, 0, 0], [1, 2, 0, 0, 0]]),
u"token_characters": torch.LongTensor([[[1, 2], [3, 0], [2, 0], [0, 0], [0, 0]],
[[5, 0], [4, 6], [0, 0], [0, 0], [0, 0]]])
}
assert_almost_equal(util.get_text_field_mask(text_field_tensors).numpy(),
[[1, 1, 1, 0, 0], [1, 1, 0, 0, 0]])