本文整理汇总了Python中allennlp.data.Vocabulary.save_to_files方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.save_to_files方法的具体用法?Python Vocabulary.save_to_files怎么用?Python Vocabulary.save_to_files使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.save_to_files方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_dry_run_without_extension
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import save_to_files [as 别名]
def test_dry_run_without_extension(self):
existing_serialization_dir = self.TEST_DIR / 'existing'
extended_serialization_dir = self.TEST_DIR / 'extended'
existing_vocab_path = existing_serialization_dir / 'vocabulary'
extended_vocab_path = extended_serialization_dir / 'vocabulary'
vocab = Vocabulary()
# if extend is False, its users responsibility to make sure that dataset instances
# will be indexible by provided vocabulary. At least @@[email protected]@ should be present in
# namespace for which there could be OOV entries seen in dataset during indexing.
# For `tokens` ns, new words will be seen but `tokens` has @@[email protected]@ token.
# but for 'labels' ns, there is no @@[email protected]@ so required to add 'N', 'V' upfront.
vocab.add_token_to_namespace('some_weird_token_1', namespace='tokens')
vocab.add_token_to_namespace('some_weird_token_2', namespace='tokens')
vocab.add_token_to_namespace('N', namespace='labels')
vocab.add_token_to_namespace('V', namespace='labels')
os.makedirs(existing_serialization_dir, exist_ok=True)
vocab.save_to_files(existing_vocab_path)
self.params['vocabulary'] = {}
self.params['vocabulary']['directory_path'] = existing_vocab_path
self.params['vocabulary']['extend'] = False
dry_run_from_params(self.params, extended_serialization_dir)
with open(extended_vocab_path / 'tokens.txt') as f:
tokens = [line.strip() for line in f]
assert tokens[0] == '@@[email protected]@'
assert tokens[1] == 'some_weird_token_1'
assert tokens[2] == 'some_weird_token_2'
assert len(tokens) == 3
示例2: test_dry_run_with_extension
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import save_to_files [as 别名]
def test_dry_run_with_extension(self):
existing_serialization_dir = self.TEST_DIR / 'existing'
extended_serialization_dir = self.TEST_DIR / 'extended'
existing_vocab_path = existing_serialization_dir / 'vocabulary'
extended_vocab_path = extended_serialization_dir / 'vocabulary'
vocab = Vocabulary()
vocab.add_token_to_namespace('some_weird_token_1', namespace='tokens')
vocab.add_token_to_namespace('some_weird_token_2', namespace='tokens')
os.makedirs(existing_serialization_dir, exist_ok=True)
vocab.save_to_files(existing_vocab_path)
self.params['vocabulary'] = {}
self.params['vocabulary']['directory_path'] = existing_vocab_path
self.params['vocabulary']['extend'] = True
self.params['vocabulary']['min_count'] = {"tokens" : 3}
dry_run_from_params(self.params, extended_serialization_dir)
vocab_files = os.listdir(extended_vocab_path)
assert set(vocab_files) == {'labels.txt', 'non_padded_namespaces.txt', 'tokens.txt'}
with open(extended_vocab_path / 'tokens.txt') as f:
tokens = [line.strip() for line in f]
assert tokens[0] == '@@[email protected]@'
assert tokens[1] == 'some_weird_token_1'
assert tokens[2] == 'some_weird_token_2'
tokens.sort()
assert tokens == ['.', '@@[email protected]@', 'animals', 'are',
'some_weird_token_1', 'some_weird_token_2']
with open(extended_vocab_path / 'labels.txt') as f:
labels = [line.strip() for line in f]
labels.sort()
assert labels == ['N', 'V']