本文整理汇总了Python中allennlp.data.vocabulary.Vocabulary.get_index_to_token_vocabulary方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.get_index_to_token_vocabulary方法的具体用法?Python Vocabulary.get_index_to_token_vocabulary怎么用?Python Vocabulary.get_index_to_token_vocabulary使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.vocabulary.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.get_index_to_token_vocabulary方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_add_word_to_index_gives_consistent_results
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def test_add_word_to_index_gives_consistent_results(self):
vocab = Vocabulary()
initial_vocab_size = vocab.get_vocab_size()
word_index = vocab.add_token_to_namespace("word")
assert "word" in vocab.get_index_to_token_vocabulary().values()
assert vocab.get_token_index("word") == word_index
assert vocab.get_token_from_index(word_index) == "word"
assert vocab.get_vocab_size() == initial_vocab_size + 1
# Now add it again, and make sure nothing changes.
vocab.add_token_to_namespace("word")
assert "word" in vocab.get_index_to_token_vocabulary().values()
assert vocab.get_token_index("word") == word_index
assert vocab.get_token_from_index(word_index) == "word"
assert vocab.get_vocab_size() == initial_vocab_size + 1
示例2: test_from_params
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def test_from_params(self):
# Save a vocab to check we can load it from_params.
vocab_dir = self.TEST_DIR / 'vocab_save'
vocab = Vocabulary(non_padded_namespaces=["a", "c"])
vocab.add_token_to_namespace("a0", namespace="a") # non-padded, should start at 0
vocab.add_token_to_namespace("a1", namespace="a")
vocab.add_token_to_namespace("a2", namespace="a")
vocab.add_token_to_namespace("b2", namespace="b") # padded, should start at 2
vocab.add_token_to_namespace("b3", namespace="b")
vocab.save_to_files(vocab_dir)
params = Params({"directory_path": vocab_dir})
vocab2 = Vocabulary.from_params(params)
assert vocab.get_index_to_token_vocabulary("a") == vocab2.get_index_to_token_vocabulary("a")
assert vocab.get_index_to_token_vocabulary("b") == vocab2.get_index_to_token_vocabulary("b")
# Test case where we build a vocab from a dataset.
vocab2 = Vocabulary.from_params(Params({}), self.dataset)
assert vocab2.get_index_to_token_vocabulary("tokens") == {0: '@@[email protected]@',
1: '@@[email protected]@',
2: 'a', 3: 'c', 4: 'b'}
# Test from_params raises when we have neither a dataset and a vocab_directory.
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(Params({}))
# Test from_params raises when there are any other dict keys
# present apart from 'directory_path' and we aren't calling from_dataset.
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(Params({"directory_path": vocab_dir, "min_count": {'tokens': 2}}))
示例3: __init__
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def __init__(self,
vocabulary: Vocabulary,
tag_namespace: str = "tags",
ignore_classes: List[str] = None) -> None:
"""
Parameters
----------
vocabulary : ``Vocabulary``, required.
A vocabulary containing the tag namespace.
tag_namespace : str, required.
This metric assumes that a BIO format is used in which the
labels are of the format: ["B-LABEL", "I-LABEL"].
ignore_classes : List[str], optional.
Span labels which will be ignored when computing span metrics.
A "span label" is the part that comes after the BIO label, so it
would be "ARG1" for the tag "B-ARG1". For example by passing:
``ignore_classes=["V"]``
the following sequence would not consider the "V" span at index (2, 3)
when computing the precision, recall and F1 metrics.
["O", "O", "B-V", "I-V", "B-ARG1", "I-ARG1"]
This is helpful for instance, to avoid computing metrics for "V"
spans in a BIO tagging scheme which are typically not included.
"""
self._label_vocabulary = vocabulary.get_index_to_token_vocabulary(tag_namespace)
self._ignore_classes: List[str] = ignore_classes or []
# These will hold per label span counts.
self._true_positives: Dict[str, int] = defaultdict(int)
self._false_positives: Dict[str, int] = defaultdict(int)
self._false_negatives: Dict[str, int] = defaultdict(int)
示例4: __init__
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def __init__(self,
vocabulary: Vocabulary,
tag_namespace: str = "tags",
ignore_classes: List[str] = None,
label_encoding: Optional[str] = "BIO",
tags_to_spans_function: Optional[TAGS_TO_SPANS_FUNCTION_TYPE] = None) -> None:
"""
Parameters
----------
vocabulary : ``Vocabulary``, required.
A vocabulary containing the tag namespace.
tag_namespace : str, required.
This metric assumes that a BIO format is used in which the
labels are of the format: ["B-LABEL", "I-LABEL"].
ignore_classes : List[str], optional.
Span labels which will be ignored when computing span metrics.
A "span label" is the part that comes after the BIO label, so it
would be "ARG1" for the tag "B-ARG1". For example by passing:
``ignore_classes=["V"]``
the following sequence would not consider the "V" span at index (2, 3)
when computing the precision, recall and F1 metrics.
["O", "O", "B-V", "I-V", "B-ARG1", "I-ARG1"]
This is helpful for instance, to avoid computing metrics for "V"
spans in a BIO tagging scheme which are typically not included.
label_encoding : ``str``, optional (default = "BIO")
The encoding used to specify label span endpoints in the sequence.
Valid options are "BIO", "IOB1", "BIOUL" or "BMES".
tags_to_spans_function: ``Callable``, optional (default = ``None``)
If ``label_encoding`` is ``None``, ``tags_to_spans_function`` will be
used to generate spans.
"""
if label_encoding and tags_to_spans_function:
raise ConfigurationError(
'Both label_encoding and tags_to_spans_function are provided. '
'Set "label_encoding=None" explicitly to enable tags_to_spans_function.'
)
if label_encoding:
if label_encoding not in ["BIO", "IOB1", "BIOUL", "BMES"]:
raise ConfigurationError("Unknown label encoding - expected 'BIO', 'IOB1', 'BIOUL', 'BMES'.")
elif tags_to_spans_function is None:
raise ConfigurationError(
'At least one of the (label_encoding, tags_to_spans_function) should be provided.'
)
self._label_encoding = label_encoding
self._tags_to_spans_function = tags_to_spans_function
self._label_vocabulary = vocabulary.get_index_to_token_vocabulary(tag_namespace)
self._ignore_classes: List[str] = ignore_classes or []
# These will hold per label span counts.
self._true_positives: Dict[str, int] = defaultdict(int)
self._false_positives: Dict[str, int] = defaultdict(int)
self._false_negatives: Dict[str, int] = defaultdict(int)
示例5: test_namespaces
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def test_namespaces(self):
vocab = Vocabulary()
initial_vocab_size = vocab.get_vocab_size()
word_index = vocab.add_token_to_namespace("word", namespace='1')
assert "word" in vocab.get_index_to_token_vocabulary(namespace='1').values()
assert vocab.get_token_index("word", namespace='1') == word_index
assert vocab.get_token_from_index(word_index, namespace='1') == "word"
assert vocab.get_vocab_size(namespace='1') == initial_vocab_size + 1
# Now add it again, in a different namespace and a different word, and make sure it's like
# new.
word2_index = vocab.add_token_to_namespace("word2", namespace='2')
word_index = vocab.add_token_to_namespace("word", namespace='2')
assert "word" in vocab.get_index_to_token_vocabulary(namespace='2').values()
assert "word2" in vocab.get_index_to_token_vocabulary(namespace='2').values()
assert vocab.get_token_index("word", namespace='2') == word_index
assert vocab.get_token_index("word2", namespace='2') == word2_index
assert vocab.get_token_from_index(word_index, namespace='2') == "word"
assert vocab.get_token_from_index(word2_index, namespace='2') == "word2"
assert vocab.get_vocab_size(namespace='2') == initial_vocab_size + 2
示例6: test_saving_and_loading
# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_index_to_token_vocabulary [as 别名]
def test_saving_and_loading(self):
# pylint: disable=protected-access
vocab_dir = self.TEST_DIR / 'vocab_save'
vocab = Vocabulary(non_padded_namespaces=["a", "c"])
vocab.add_token_to_namespace("a0", namespace="a") # non-padded, should start at 0
vocab.add_token_to_namespace("a1", namespace="a")
vocab.add_token_to_namespace("a2", namespace="a")
vocab.add_token_to_namespace("b2", namespace="b") # padded, should start at 2
vocab.add_token_to_namespace("b3", namespace="b")
vocab.save_to_files(vocab_dir)
vocab2 = Vocabulary.from_files(vocab_dir)
assert vocab2._non_padded_namespaces == {"a", "c"}
# Check namespace a.
assert vocab2.get_vocab_size(namespace='a') == 3
assert vocab2.get_token_from_index(0, namespace='a') == 'a0'
assert vocab2.get_token_from_index(1, namespace='a') == 'a1'
assert vocab2.get_token_from_index(2, namespace='a') == 'a2'
assert vocab2.get_token_index('a0', namespace='a') == 0
assert vocab2.get_token_index('a1', namespace='a') == 1
assert vocab2.get_token_index('a2', namespace='a') == 2
# Check namespace b.
assert vocab2.get_vocab_size(namespace='b') == 4 # (unk + padding + two tokens)
assert vocab2.get_token_from_index(0, namespace='b') == vocab._padding_token
assert vocab2.get_token_from_index(1, namespace='b') == vocab._oov_token
assert vocab2.get_token_from_index(2, namespace='b') == 'b2'
assert vocab2.get_token_from_index(3, namespace='b') == 'b3'
assert vocab2.get_token_index(vocab._padding_token, namespace='b') == 0
assert vocab2.get_token_index(vocab._oov_token, namespace='b') == 1
assert vocab2.get_token_index('b2', namespace='b') == 2
assert vocab2.get_token_index('b3', namespace='b') == 3
# Check the dictionaries containing the reverse mapping are identical.
assert vocab.get_index_to_token_vocabulary("a") == vocab2.get_index_to_token_vocabulary("a")
assert vocab.get_index_to_token_vocabulary("b") == vocab2.get_index_to_token_vocabulary("b")