本文整理汇总了Python中allennlp.data.vocabulary.Vocabulary.from_files方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.from_files方法的具体用法?Python Vocabulary.from_files怎么用?Python Vocabulary.from_files使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.vocabulary.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.from_files方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_saving_and_loading_works_with_byte_encoding
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import from_files [as 别名]
def test_saving_and_loading_works_with_byte_encoding(self):
# We're going to set a vocabulary from a TextField using byte encoding, index it, save the
# vocab, load the vocab, then index the text field again, and make sure we get the same
# result.
tokenizer = CharacterTokenizer(byte_encoding='utf-8')
token_indexer = TokenCharactersIndexer(character_tokenizer=tokenizer)
tokens = [Token(t) for t in ["Øyvind", "für", "汉字"]]
text_field = TextField(tokens, {"characters": token_indexer})
dataset = Batch([Instance({"sentence": text_field})])
vocab = Vocabulary.from_instances(dataset)
text_field.index(vocab)
indexed_tokens = deepcopy(text_field._indexed_tokens) # pylint: disable=protected-access
vocab_dir = self.TEST_DIR / 'vocab_save'
vocab.save_to_files(vocab_dir)
vocab2 = Vocabulary.from_files(vocab_dir)
text_field2 = TextField(tokens, {"characters": token_indexer})
text_field2.index(vocab2)
indexed_tokens2 = deepcopy(text_field2._indexed_tokens) # pylint: disable=protected-access
assert indexed_tokens == indexed_tokens2
示例2: test_saving_and_loading
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import from_files [as 别名]
def test_saving_and_loading(self):
# pylint: disable=protected-access
vocab_dir = self.TEST_DIR / 'vocab_save'
vocab = Vocabulary(non_padded_namespaces=["a", "c"])
vocab.add_token_to_namespace("a0", namespace="a") # non-padded, should start at 0
vocab.add_token_to_namespace("a1", namespace="a")
vocab.add_token_to_namespace("a2", namespace="a")
vocab.add_token_to_namespace("b2", namespace="b") # padded, should start at 2
vocab.add_token_to_namespace("b3", namespace="b")
vocab.save_to_files(vocab_dir)
vocab2 = Vocabulary.from_files(vocab_dir)
assert vocab2._non_padded_namespaces == {"a", "c"}
# Check namespace a.
assert vocab2.get_vocab_size(namespace='a') == 3
assert vocab2.get_token_from_index(0, namespace='a') == 'a0'
assert vocab2.get_token_from_index(1, namespace='a') == 'a1'
assert vocab2.get_token_from_index(2, namespace='a') == 'a2'
assert vocab2.get_token_index('a0', namespace='a') == 0
assert vocab2.get_token_index('a1', namespace='a') == 1
assert vocab2.get_token_index('a2', namespace='a') == 2
# Check namespace b.
assert vocab2.get_vocab_size(namespace='b') == 4 # (unk + padding + two tokens)
assert vocab2.get_token_from_index(0, namespace='b') == vocab._padding_token
assert vocab2.get_token_from_index(1, namespace='b') == vocab._oov_token
assert vocab2.get_token_from_index(2, namespace='b') == 'b2'
assert vocab2.get_token_from_index(3, namespace='b') == 'b3'
assert vocab2.get_token_index(vocab._padding_token, namespace='b') == 0
assert vocab2.get_token_index(vocab._oov_token, namespace='b') == 1
assert vocab2.get_token_index('b2', namespace='b') == 2
assert vocab2.get_token_index('b3', namespace='b') == 3
# Check the dictionaries containing the reverse mapping are identical.
assert vocab.get_index_to_token_vocabulary("a") == vocab2.get_index_to_token_vocabulary("a")
assert vocab.get_index_to_token_vocabulary("b") == vocab2.get_index_to_token_vocabulary("b")