当前位置: 首页>>代码示例>>Python>>正文


Python Vocabulary.get_token_index方法代码示例

本文整理汇总了Python中allennlp.data.vocabulary.Vocabulary.get_token_index方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.get_token_index方法的具体用法?Python Vocabulary.get_token_index怎么用?Python Vocabulary.get_token_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.vocabulary.Vocabulary的用法示例。


在下文中一共展示了Vocabulary.get_token_index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_unknown_token

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def test_unknown_token(self):
     # pylint: disable=protected-access
     # We're putting this behavior in a test so that the behavior is documented.  There is
     # solver code that depends in a small way on how we treat the unknown token, so any
     # breaking change to this behavior should break a test, so you know you've done something
     # that needs more consideration.
     vocab = Vocabulary()
     oov_token = vocab._oov_token
     oov_index = vocab.get_token_index(oov_token)
     assert oov_index == 1
     assert vocab.get_token_index("unseen word") == oov_index
开发者ID:apmoore1,项目名称:allennlp,代码行数:13,代码来源:vocabulary_test.py

示例2: test_set_from_file_reads_padded_files

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def test_set_from_file_reads_padded_files(self):
        # pylint: disable=protected-access
        vocab_filename = self.TEST_DIR / 'vocab_file'
        with codecs.open(vocab_filename, 'w', 'utf-8') as vocab_file:
            vocab_file.write('<S>\n')
            vocab_file.write('</S>\n')
            vocab_file.write('<UNK>\n')
            vocab_file.write('a\n')
            vocab_file.write('tricky\x0bchar\n')
            vocab_file.write('word\n')
            vocab_file.write('another\n')

        vocab = Vocabulary()
        vocab.set_from_file(vocab_filename, is_padded=True, oov_token="<UNK>")

        assert vocab._oov_token == DEFAULT_OOV_TOKEN
        assert vocab.get_token_index("random string") == 3
        assert vocab.get_token_index("<S>") == 1
        assert vocab.get_token_index("</S>") == 2
        assert vocab.get_token_index(DEFAULT_OOV_TOKEN) == 3
        assert vocab.get_token_index("a") == 4
        assert vocab.get_token_index("tricky\x0bchar") == 5
        assert vocab.get_token_index("word") == 6
        assert vocab.get_token_index("another") == 7
        assert vocab.get_token_from_index(0) == vocab._padding_token
        assert vocab.get_token_from_index(1) == "<S>"
        assert vocab.get_token_from_index(2) == "</S>"
        assert vocab.get_token_from_index(3) == DEFAULT_OOV_TOKEN
        assert vocab.get_token_from_index(4) == "a"
        assert vocab.get_token_from_index(5) == "tricky\x0bchar"
        assert vocab.get_token_from_index(6) == "word"
        assert vocab.get_token_from_index(7) == "another"
开发者ID:apmoore1,项目名称:allennlp,代码行数:34,代码来源:vocabulary_test.py

示例3: test_add_word_to_index_gives_consistent_results

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def test_add_word_to_index_gives_consistent_results(self):
        vocab = Vocabulary()
        initial_vocab_size = vocab.get_vocab_size()
        word_index = vocab.add_token_to_namespace("word")
        assert "word" in vocab.get_index_to_token_vocabulary().values()
        assert vocab.get_token_index("word") == word_index
        assert vocab.get_token_from_index(word_index) == "word"
        assert vocab.get_vocab_size() == initial_vocab_size + 1

        # Now add it again, and make sure nothing changes.
        vocab.add_token_to_namespace("word")
        assert "word" in vocab.get_index_to_token_vocabulary().values()
        assert vocab.get_token_index("word") == word_index
        assert vocab.get_token_from_index(word_index) == "word"
        assert vocab.get_vocab_size() == initial_vocab_size + 1
开发者ID:apmoore1,项目名称:allennlp,代码行数:17,代码来源:vocabulary_test.py

示例4: tokens_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def tokens_to_indices(self,
                          tokens: List[Token],
                          vocabulary: Vocabulary,
                          index_name: str) -> Dict[str, List[int]]:
        tags = ['NONE' if not token.ent_type_ else token.ent_type_ for token in tokens]

        return {index_name: [vocabulary.get_token_index(tag, self._namespace) for tag in tags]}
开发者ID:apmoore1,项目名称:allennlp,代码行数:9,代码来源:ner_tag_indexer.py

示例5: tokens_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def tokens_to_indices(self,
                          tokens: List[Token],
                          vocabulary: Vocabulary,
                          index_name: str) -> Dict[str, List[int]]:
        dep_labels = [token.dep_ or 'NONE' for token in tokens]

        return {index_name: [vocabulary.get_token_index(dep_label, self.namespace) for dep_label in dep_labels]}
开发者ID:apmoore1,项目名称:allennlp,代码行数:9,代码来源:dep_label_indexer.py

示例6: token_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def token_to_indices(self, token: Token, vocabulary: Vocabulary) -> int:
     if self._coarse_tags:
         tag = token.pos_
     else:
         tag = token.tag_
     if tag is None:
         tag = 'NONE'
     return vocabulary.get_token_index(tag, self._namespace)
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:10,代码来源:pos_tag_indexer.py

示例7: test_namespaces

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def test_namespaces(self):
        vocab = Vocabulary()
        initial_vocab_size = vocab.get_vocab_size()
        word_index = vocab.add_token_to_namespace("word", namespace='1')
        assert "word" in vocab.get_index_to_token_vocabulary(namespace='1').values()
        assert vocab.get_token_index("word", namespace='1') == word_index
        assert vocab.get_token_from_index(word_index, namespace='1') == "word"
        assert vocab.get_vocab_size(namespace='1') == initial_vocab_size + 1

        # Now add it again, in a different namespace and a different word, and make sure it's like
        # new.
        word2_index = vocab.add_token_to_namespace("word2", namespace='2')
        word_index = vocab.add_token_to_namespace("word", namespace='2')
        assert "word" in vocab.get_index_to_token_vocabulary(namespace='2').values()
        assert "word2" in vocab.get_index_to_token_vocabulary(namespace='2').values()
        assert vocab.get_token_index("word", namespace='2') == word_index
        assert vocab.get_token_index("word2", namespace='2') == word2_index
        assert vocab.get_token_from_index(word_index, namespace='2') == "word"
        assert vocab.get_token_from_index(word2_index, namespace='2') == "word2"
        assert vocab.get_vocab_size(namespace='2') == initial_vocab_size + 2
开发者ID:apmoore1,项目名称:allennlp,代码行数:22,代码来源:vocabulary_test.py

示例8: token_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def token_to_indices(self, token: Token, vocabulary: Vocabulary) -> int:
     if getattr(token, 'text_id', None) is not None:
         # `text_id` being set on the token means that we aren't using the vocab, we just use
         # this id instead.
         index = token.text_id
     else:
         text = token.text
         if self.lowercase_tokens:
             text = text.lower()
         index = vocabulary.get_token_index(text, self.namespace)
     return index
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:13,代码来源:single_id_token_indexer.py

示例9: _get_vocab_index_mapping

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def _get_vocab_index_mapping(self, archived_vocab: Vocabulary) -> List[Tuple[int, int]]:
     vocab_index_mapping: List[Tuple[int, int]] = []
     for index in range(self.vocab.get_vocab_size(namespace='tokens')):
         token = self.vocab.get_token_from_index(index=index, namespace='tokens')
         archived_token_index = archived_vocab.get_token_index(token, namespace='tokens')
         # Checking if we got the UNK token index, because we don't want all new token
         # representations initialized to UNK token's representation. We do that by checking if
         # the two tokens are the same. They will not be if the token at the archived index is
         # UNK.
         if archived_vocab.get_token_from_index(archived_token_index, namespace="tokens") == token:
             vocab_index_mapping.append((index, archived_token_index))
     return vocab_index_mapping
开发者ID:pyknife,项目名称:allennlp,代码行数:14,代码来源:nlvr_coverage_semantic_parser.py

示例10: token_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def token_to_indices(self, token: Token, vocabulary: Vocabulary) -> List[int]:
     indices = []
     if token.text is None:
         raise ConfigurationError('TokenCharactersIndexer needs a tokenizer that retains text')
     for character in self._character_tokenizer.tokenize(token.text):
         if getattr(character, 'text_id', None) is not None:
             # `text_id` being set on the token means that we aren't using the vocab, we just
             # use this id instead.
             index = character.text_id
         else:
             index = vocabulary.get_token_index(character.text, self._namespace)
         indices.append(index)
     return indices
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:15,代码来源:token_characters_indexer.py

示例11: test_set_from_file_reads_non_padded_files

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def test_set_from_file_reads_non_padded_files(self):
        # pylint: disable=protected-access
        vocab_filename = self.TEST_DIR / 'vocab_file'
        with codecs.open(vocab_filename, 'w', 'utf-8') as vocab_file:
            vocab_file.write('B-PERS\n')
            vocab_file.write('I-PERS\n')
            vocab_file.write('O\n')
            vocab_file.write('B-ORG\n')
            vocab_file.write('I-ORG\n')

        vocab = Vocabulary()
        vocab.set_from_file(vocab_filename, is_padded=False, namespace='tags')
        assert vocab.get_token_index("B-PERS", namespace='tags') == 0
        assert vocab.get_token_index("I-PERS", namespace='tags') == 1
        assert vocab.get_token_index("O", namespace='tags') == 2
        assert vocab.get_token_index("B-ORG", namespace='tags') == 3
        assert vocab.get_token_index("I-ORG", namespace='tags') == 4
        assert vocab.get_token_from_index(0, namespace='tags') == "B-PERS"
        assert vocab.get_token_from_index(1, namespace='tags') == "I-PERS"
        assert vocab.get_token_from_index(2, namespace='tags') == "O"
        assert vocab.get_token_from_index(3, namespace='tags') == "B-ORG"
        assert vocab.get_token_from_index(4, namespace='tags') == "I-ORG"
开发者ID:apmoore1,项目名称:allennlp,代码行数:24,代码来源:vocabulary_test.py

示例12: tokens_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def tokens_to_indices(self,
                          tokens: List[Token],
                          vocabulary: Vocabulary,
                          index_name: str) -> Dict[str, List[int]]:
        tags: List[str] = []

        for token in tokens:
            if self._coarse_tags:
                tag = token.pos_
            else:
                tag = token.tag_
            if tag is None:
                tag = 'NONE'

            tags.append(tag)

        return {index_name: [vocabulary.get_token_index(tag, self._namespace) for tag in tags]}
开发者ID:pyknife,项目名称:allennlp,代码行数:19,代码来源:pos_tag_indexer.py

示例13: tokens_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
    def tokens_to_indices(self,
                          tokens: List[Token],
                          vocabulary: Vocabulary,
                          index_name: str) -> Dict[str, List[int]]:
        indices: List[int] = []

        for token in itertools.chain(self._start_tokens, tokens, self._end_tokens):
            if getattr(token, 'text_id', None) is not None:
                # `text_id` being set on the token means that we aren't using the vocab, we just use
                # this id instead.
                indices.append(token.text_id)
            else:
                text = token.text
                if self.lowercase_tokens:
                    text = text.lower()
                indices.append(vocabulary.get_token_index(text, self.namespace))

        return {index_name: indices}
开发者ID:apmoore1,项目名称:allennlp,代码行数:20,代码来源:single_id_token_indexer.py

示例14: tokens_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def tokens_to_indices(self,
                       tokens: List[Token],
                       vocabulary: Vocabulary,
                       index_name: str) -> Dict[str, List[List[int]]]:
     indices: List[List[int]] = []
     for token in itertools.chain(self._start_tokens, tokens, self._end_tokens):
         token_indices: List[int] = []
         if token.text is None:
             raise ConfigurationError('TokenCharactersIndexer needs a tokenizer that retains text')
         for character in self._character_tokenizer.tokenize(token.text):
             if getattr(character, 'text_id', None) is not None:
                 # `text_id` being set on the token means that we aren't using the vocab, we just
                 # use this id instead.
                 index = character.text_id
             else:
                 index = vocabulary.get_token_index(character.text, self._namespace)
             token_indices.append(index)
         indices.append(token_indices)
     return {index_name: indices}
开发者ID:apmoore1,项目名称:allennlp,代码行数:21,代码来源:token_characters_indexer.py

示例15: token_to_indices

# 需要导入模块: from allennlp.data.vocabulary import Vocabulary [as 别名]
# 或者: from allennlp.data.vocabulary.Vocabulary import get_token_index [as 别名]
 def token_to_indices(self, token: Token, vocabulary: Vocabulary) -> int:
     dep_label = token.dep_ or 'NONE'
     return vocabulary.get_token_index(dep_label, self.namespace)
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:5,代码来源:dep_label_indexer.py


注:本文中的allennlp.data.vocabulary.Vocabulary.get_token_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。