本文整理汇总了Python中allennlp.data.vocabulary.Vocabulary.get_vocab_size方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.get_vocab_size方法的具体用法?Python Vocabulary.get_vocab_size怎么用?Python Vocabulary.get_vocab_size使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.vocabulary.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.get_vocab_size方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def __init__(self,
vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
contextualizer: Seq2SeqEncoder,
dropout: float = None,
num_samples: int = None,
sparse_embeddings: bool = False,
bidirectional: bool = False,
initializer: InitializerApplicator = None) -> None:
super().__init__(vocab)
self._text_field_embedder = text_field_embedder
if contextualizer.is_bidirectional() is not bidirectional:
raise ConfigurationError(
"Bidirectionality of contextualizer must match bidirectionality of "
"language model. "
f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, "
f"language model bidirectional: {bidirectional}")
self._contextualizer = contextualizer
self._bidirectional = bidirectional
# The dimension for making predictions just in the forward
# (or backward) direction.
if self._bidirectional:
self._forward_dim = contextualizer.get_output_dim() // 2
else:
self._forward_dim = contextualizer.get_output_dim()
# TODO(joelgrus): more sampled softmax configuration options, as needed.
if num_samples is not None:
self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(),
embedding_dim=self._forward_dim,
num_samples=num_samples,
sparse=sparse_embeddings)
else:
self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(),
embedding_dim=self._forward_dim)
# TODO(brendanr): Output perplexity here. e^loss
self.register_buffer('_last_average_loss', torch.zeros(1))
if dropout:
self._dropout = torch.nn.Dropout(dropout)
else:
self._dropout = lambda x: x
if initializer is not None:
initializer(self)
示例2: test_add_word_to_index_gives_consistent_results
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def test_add_word_to_index_gives_consistent_results(self):
vocab = Vocabulary()
initial_vocab_size = vocab.get_vocab_size()
word_index = vocab.add_token_to_namespace("word")
assert "word" in vocab.get_index_to_token_vocabulary().values()
assert vocab.get_token_index("word") == word_index
assert vocab.get_token_from_index(word_index) == "word"
assert vocab.get_vocab_size() == initial_vocab_size + 1
# Now add it again, and make sure nothing changes.
vocab.add_token_to_namespace("word")
assert "word" in vocab.get_index_to_token_vocabulary().values()
assert vocab.get_token_index("word") == word_index
assert vocab.get_token_from_index(word_index) == "word"
assert vocab.get_vocab_size() == initial_vocab_size + 1
示例3: __init__
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def __init__(self,
vocab: Vocabulary,
sentence_embedder: TextFieldEmbedder,
action_embedding_dim: int,
encoder: Seq2SeqEncoder,
dropout: float = 0.0,
rule_namespace: str = 'rule_labels') -> None:
super(NlvrSemanticParser, self).__init__(vocab=vocab)
self._sentence_embedder = sentence_embedder
self._denotation_accuracy = Average()
self._consistency = Average()
self._encoder = encoder
if dropout > 0:
self._dropout = torch.nn.Dropout(p=dropout)
else:
self._dropout = lambda x: x
self._rule_namespace = rule_namespace
self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size(self._rule_namespace),
embedding_dim=action_embedding_dim)
# This is what we pass as input in the first step of decoding, when we don't have a
# previous action.
self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim))
torch.nn.init.normal_(self._first_action_embedding)
示例4: __init__
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def __init__(self,
word_embeddings: TextFieldEmbedder,
encoder: Seq2SeqEncoder,
vocab: Vocabulary) -> None:
super().__init__(vocab)
self.word_embeddings = word_embeddings
self.encoder = encoder
self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
out_features=vocab.get_vocab_size('labels'))
self.accuracy = CategoricalAccuracy()
示例5: test_namespaces
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def test_namespaces(self):
vocab = Vocabulary()
initial_vocab_size = vocab.get_vocab_size()
word_index = vocab.add_token_to_namespace("word", namespace='1')
assert "word" in vocab.get_index_to_token_vocabulary(namespace='1').values()
assert vocab.get_token_index("word", namespace='1') == word_index
assert vocab.get_token_from_index(word_index, namespace='1') == "word"
assert vocab.get_vocab_size(namespace='1') == initial_vocab_size + 1
# Now add it again, in a different namespace and a different word, and make sure it's like
# new.
word2_index = vocab.add_token_to_namespace("word2", namespace='2')
word_index = vocab.add_token_to_namespace("word", namespace='2')
assert "word" in vocab.get_index_to_token_vocabulary(namespace='2').values()
assert "word2" in vocab.get_index_to_token_vocabulary(namespace='2').values()
assert vocab.get_token_index("word", namespace='2') == word_index
assert vocab.get_token_index("word2", namespace='2') == word2_index
assert vocab.get_token_from_index(word_index, namespace='2') == "word"
assert vocab.get_token_from_index(word2_index, namespace='2') == "word2"
assert vocab.get_vocab_size(namespace='2') == initial_vocab_size + 2
示例6: index
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def index(self, vocab: Vocabulary):
if self._label_ids is None:
self._label_ids = [vocab.get_token_index(label, self._label_namespace) # type: ignore
for label in self.labels]
if not self._num_labels:
self._num_labels = vocab.get_vocab_size(self._label_namespace)
示例7: test_from_params_valid_vocab_extension_thoroughly
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_vocab_size [as 别名]
def test_from_params_valid_vocab_extension_thoroughly(self):
'''
Tests for Valid Vocab Extension thoroughly: Vocab extension is valid
when overlapping namespaces have same padding behaviour (padded/non-padded)
Summary of namespace paddings in this test:
original_vocab namespaces
tokens0 padded
tokens1 non-padded
tokens2 padded
tokens3 non-padded
instances namespaces
tokens0 padded
tokens1 non-padded
tokens4 padded
tokens5 non-padded
TypicalExtention example: (of tokens1 namespace)
-> original_vocab index2token
apple #0->apple
bat #1->bat
cat #2->cat
-> Token to be extended with: cat, an, apple, banana, atom, bat
-> extended_vocab: index2token
apple #0->apple
bat #1->bat
cat #2->cat
an #3->an
atom #4->atom
banana #5->banana
'''
vocab_dir = self.TEST_DIR / 'vocab_save'
original_vocab = Vocabulary(non_padded_namespaces=["tokens1", "tokens3"])
original_vocab.add_token_to_namespace("apple", namespace="tokens0") # index:2
original_vocab.add_token_to_namespace("bat", namespace="tokens0") # index:3
original_vocab.add_token_to_namespace("cat", namespace="tokens0") # index:4
original_vocab.add_token_to_namespace("apple", namespace="tokens1") # index:0
original_vocab.add_token_to_namespace("bat", namespace="tokens1") # index:1
original_vocab.add_token_to_namespace("cat", namespace="tokens1") # index:2
original_vocab.add_token_to_namespace("a", namespace="tokens2") # index:0
original_vocab.add_token_to_namespace("b", namespace="tokens2") # index:1
original_vocab.add_token_to_namespace("c", namespace="tokens2") # index:2
original_vocab.add_token_to_namespace("p", namespace="tokens3") # index:0
original_vocab.add_token_to_namespace("q", namespace="tokens3") # index:1
original_vocab.save_to_files(vocab_dir)
text_field0 = TextField([Token(t) for t in ["cat", "an", "apple", "banana", "atom", "bat"]],
{"tokens0": SingleIdTokenIndexer("tokens0")})
text_field1 = TextField([Token(t) for t in ["cat", "an", "apple", "banana", "atom", "bat"]],
{"tokens1": SingleIdTokenIndexer("tokens1")})
text_field4 = TextField([Token(t) for t in ["l", "m", "n", "o"]],
{"tokens4": SingleIdTokenIndexer("tokens4")})
text_field5 = TextField([Token(t) for t in ["x", "y", "z"]],
{"tokens5": SingleIdTokenIndexer("tokens5")})
instances = Batch([Instance({"text0": text_field0, "text1": text_field1,
"text4": text_field4, "text5": text_field5})])
params = Params({"directory_path": vocab_dir,
"extend": True,
"non_padded_namespaces": ["tokens1", "tokens5"]})
extended_vocab = Vocabulary.from_params(params, instances)
# namespaces: tokens0, tokens1 is common.
# tokens2, tokens3 only vocab has. tokens4, tokens5 only instances
extended_namespaces = {*extended_vocab._token_to_index}
assert extended_namespaces == {"tokens{}".format(i) for i in range(6)}
# # Check that _non_padded_namespaces list is consistent after extension
assert extended_vocab._non_padded_namespaces == {"tokens1", "tokens3", "tokens5"}
# # original_vocab["tokens1"] has 3 tokens, instances of "tokens1" ns has 5 tokens. 2 overlapping
assert extended_vocab.get_vocab_size("tokens1") == 6
assert extended_vocab.get_vocab_size("tokens0") == 8 # 2 extra overlapping because padded
# namespace tokens3, tokens4 was only in original_vocab,
# and its token count should be same in extended_vocab
assert extended_vocab.get_vocab_size("tokens2") == original_vocab.get_vocab_size("tokens2")
assert extended_vocab.get_vocab_size("tokens3") == original_vocab.get_vocab_size("tokens3")
# namespace tokens2 was only in instances,
# and its token count should be same in extended_vocab
assert extended_vocab.get_vocab_size("tokens4") == 6 # l,m,n,o + oov + padding
assert extended_vocab.get_vocab_size("tokens5") == 3 # x,y,z
# Word2index mapping of all words in all namespaces of original_vocab
# should be maintained in extended_vocab
for namespace, token2index in original_vocab._token_to_index.items():
for token, _ in token2index.items():
vocab_index = original_vocab.get_token_index(token, namespace)
extended_vocab_index = extended_vocab.get_token_index(token, namespace)
assert vocab_index == extended_vocab_index
# And same for Index2Word mapping
for namespace, index2token in original_vocab._index_to_token.items():
for index, _ in index2token.items():
vocab_token = original_vocab.get_token_from_index(index, namespace)
extended_vocab_token = extended_vocab.get_token_from_index(index, namespace)
assert vocab_token == extended_vocab_token