Python token_indexers.ELMoTokenCharactersIndexer方法代码示例

本文整理汇总了Python中allennlp.data.token_indexers.ELMoTokenCharactersIndexer方法的典型用法代码示例。如果您正苦于以下问题：Python token_indexers.ELMoTokenCharactersIndexer方法的具体用法？Python token_indexers.ELMoTokenCharactersIndexer怎么用？Python token_indexers.ELMoTokenCharactersIndexer使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.token_indexers的用法示例。

在下文中一共展示了token_indexers.ELMoTokenCharactersIndexer方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fever_build_vocab

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def fever_build_vocab(d_list, unk_token_num=None) -> ExVocabulary:
    if unk_token_num is None:
        unk_token_num = {'tokens': 2600}

    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')   # This is the elmo_characters
    }

    nli_dataset_reader = BasicReader(token_indexers=token_indexers)

    # for in_file in d_list:
    instances = nli_dataset_reader.read(d_list)

    whole_vocabulary = ExVocabulary.from_instances(instances, unk_token_num=unk_token_num)

    print(whole_vocabulary.get_vocab_size('tokens'))  # 122827
    print(type(whole_vocabulary.get_token_to_index_vocabulary('tokens')))

    return whole_vocabulary

开发者ID:easonnie，项目名称:combine-FEVER-NSMN，代码行数:22，代码来源:fever_reader.py

示例2: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 clean_citation: bool = True,
                 with_elmo: bool = False
                 # use_lexicon_features: bool = False,
                 # use_sparse_lexicon_features: bool = False
                 ) -> None:
        super().__init__(lazy)
        self._clean_citation = clean_citation
        self._tokenizer = tokenizer or WordTokenizer()
        if with_elmo:
            self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
                                    "tokens": SingleIdTokenIndexer()}
        else:
            self._token_indexers = {"tokens": SingleIdTokenIndexer()}

开发者ID:allenai，项目名称:scicite，代码行数:19，代码来源:citation_data_reader_aclarc_aux.py

示例3: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None,
                 use_lexicon_features: bool=False,
                 use_sparse_lexicon_features: bool = False,
                 multilabel: bool = False,
                 with_elmo: bool = False,
                 reader_format: str = 'flat') -> None:
        super().__init__(lazy)
        self._tokenizer = tokenizer or WordTokenizer()
        if with_elmo:
            # self._token_indexers = {"tokens": SingleIdTokenIndexer()}
            self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
                                    "tokens": SingleIdTokenIndexer()}
        else:
            self._token_indexers = {"tokens": SingleIdTokenIndexer()}

        self.use_lexicon_features = use_lexicon_features
        self.use_sparse_lexicon_features = use_sparse_lexicon_features
        if self.use_lexicon_features or self.use_sparse_lexicon_features:
            self.lexicons = {**ALL_ACTION_LEXICONS, **ALL_CONCEPT_LEXICONS}
        self.multilabel = multilabel
        self.reader_format = reader_format

开发者ID:allenai，项目名称:scicite，代码行数:25，代码来源:citation_data_reader_scicite.py

示例4: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 use_lexicon_features: bool = False,
                 use_sparse_lexicon_features: bool = False,
                 with_elmo: bool = False
                 ) -> None:
        super().__init__(lazy)
        self._tokenizer = tokenizer or WordTokenizer()
        if with_elmo:
            self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
                                    "tokens": SingleIdTokenIndexer()}
        else:
            self._token_indexers = {"tokens": SingleIdTokenIndexer()}
        self.use_lexicon_features = use_lexicon_features
        self.use_sparse_lexicon_features = use_sparse_lexicon_features
        if self.use_lexicon_features or self.use_sparse_lexicon_features:
            self.lexicons = {**ALL_ACTION_LEXICONS, **ALL_CONCEPT_LEXICONS}

开发者ID:allenai，项目名称:scicite，代码行数:21，代码来源:citation_data_reader_aclarc.py

示例5: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 clean_citation: bool = True,
                 with_elmo: bool = False
                 ) -> None:
        super().__init__(lazy)
        self._clean_citation = clean_citation
        self._tokenizer = tokenizer or WordTokenizer()
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if with_elmo:
            self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
                                    "tokens": SingleIdTokenIndexer()}
        else:
            self._token_indexers = {"tokens": SingleIdTokenIndexer()}

开发者ID:allenai，项目名称:scicite，代码行数:18，代码来源:citation_data_reader_scicite_aux.py

示例6: test_elmo_empty_token_list

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_elmo_empty_token_list(self):
        # Basic test
        indexer = ELMoTokenCharactersIndexer()
        assert {"elmo_tokens": []} == indexer.get_empty_token_list()
        # Real world test
        indexer = {"elmo": indexer}
        tokens_1 = TextField([Token("Apple")], indexer)
        targets_1 = ListField([TextField([Token("Apple")], indexer)])
        tokens_2 = TextField([Token("Screen"), Token("device")], indexer)
        targets_2 = ListField(
            [TextField([Token("Screen")], indexer), TextField([Token("Device")], indexer)]
        )
        instance_1 = Instance({"tokens": tokens_1, "targets": targets_1})
        instance_2 = Instance({"tokens": tokens_2, "targets": targets_2})
        a_batch = Batch([instance_1, instance_2])
        a_batch.index_instances(Vocabulary())
        batch_tensor = a_batch.as_tensor_dict()
        elmo_target_token_indices = batch_tensor["targets"]["elmo"]["elmo_tokens"]
        # The TextField that is empty should have been created using the
        # `get_empty_token_list` and then padded with zeros.
        empty_target = elmo_target_token_indices[0][1].numpy()
        np.testing.assert_array_equal(np.zeros((1, 50)), empty_target)
        non_empty_targets = [
            elmo_target_token_indices[0][0],
            elmo_target_token_indices[1][0],
            elmo_target_token_indices[1][1],
        ]
        for non_empty_target in non_empty_targets:
            with pytest.raises(AssertionError):
                np.testing.assert_array_equal(np.zeros((1, 50)), non_empty_target)

开发者ID:allenai，项目名称:allennlp，代码行数:32，代码来源:elmo_indexer_test.py

示例7: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self, model_path):
        # Prepare Data
        lazy = False
        token_indexers = {
            'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
            'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
        }

        p_dict = wn_persistent_api.persistence_load()

        dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=420)

        vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
        vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')

        # Build Model
        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
        # device_num = -1 if device.type == 'cpu' else 0

        device = torch.device("cpu")
        device_num = -1 if device.type == 'cpu' else 0

        biterator = BasicIterator(batch_size=16)
        biterator.index_with(vocab)

        model = Model(rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                                   1024 + 450 + dev_fever_data_reader.wn_feature_size),
                      rnn_size_out=(450, 450),
                      weight=weight_dict['glove.840B.300d'],
                      vocab_size=vocab.get_vocab_size('tokens'),
                      mlp_d=900,
                      embedding_dim=300, max_l=400)

        model.display()
        model.to(device)
        model.load_state_dict(torch.load(model_path))

        self.model = model
        self.dev_fever_data_reader = dev_fever_data_reader
        self.device_num = device_num
        self.biterator = biterator

开发者ID:easonnie，项目名称:combine-FEVER-NSMN，代码行数:43，代码来源:mesim_wn_simi_v1_2.py

示例8: test_bos_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_bos_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token(u'<S>')], Vocabulary(), u"test-elmo")
        expected_indices = [259, 257, 260, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261]
        assert indices == {u"test-elmo": [expected_indices]}

开发者ID:plasticityai，项目名称:magnitude，代码行数:12，代码来源:elmo_indexer_test.py

示例9: test_eos_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_eos_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token(u'</S>')], Vocabulary(), u"test-eos")
        expected_indices = [259, 258, 260, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261]
        assert indices == {u"test-eos": [expected_indices]}

开发者ID:plasticityai，项目名称:magnitude，代码行数:12，代码来源:elmo_indexer_test.py

示例10: test_unicode_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_unicode_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token(unichr(256) + u't')], Vocabulary(), u"test-unicode")
        expected_indices = [259, 197, 129, 117, 260, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261, 261, 261, 261, 261,
                            261, 261, 261, 261, 261]
        assert indices == {u"test-unicode": [expected_indices]}

开发者ID:plasticityai，项目名称:magnitude，代码行数:12，代码来源:elmo_indexer_test.py

示例11: test_elmo_as_array_produces_token_sequence

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_elmo_as_array_produces_token_sequence(self): # pylint: disable=invalid-name
        indexer = ELMoTokenCharactersIndexer()
        tokens = [Token(u'Second'), Token(u'.')]
        indices = indexer.tokens_to_indices(tokens, Vocabulary(), u"test-elmo")[u"test-elmo"]
        padded_tokens = indexer.pad_token_sequence({u'test-elmo': indices},
                                                   desired_num_tokens={u'test-elmo': 3},
                                                   padding_lengths={})
        expected_padded_tokens = [[259, 84, 102, 100, 112, 111, 101, 260, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261],
                                  [259, 47, 260, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261, 261, 261, 261, 261,
                                   261, 261, 261, 261, 261],
                                  [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0]]

        assert padded_tokens[u'test-elmo'] == expected_padded_tokens

开发者ID:plasticityai，项目名称:magnitude，代码行数:29，代码来源:elmo_indexer_test.py

示例12: init

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self, split, mode, only_use_relevant_dets=True, add_image_as_a_box=True, embs_to_load='bert_da',
                 conditioned_answer_choice=0):
        """

        :param split: train, val, or test
        :param mode: answer or rationale
        :param only_use_relevant_dets: True, if we will only use the detections mentioned in the question and answer.
                                       False, if we should use all detections.
        :param add_image_as_a_box:     True to add the image in as an additional 'detection'. It'll go first in the list
                                       of objects.
        :param embs_to_load: Which precomputed embeddings to load.
        :param conditioned_answer_choice: If you're in test mode, the answer labels aren't provided, which could be
                                          a problem for the QA->R task. Pass in 'conditioned_answer_choice=i'
                                          to always condition on the i-th answer.
        """
        self.split = split
        self.mode = mode
        self.only_use_relevant_dets = only_use_relevant_dets
        print("Only relevant dets" if only_use_relevant_dets else "Using all detections", flush=True)

        self.add_image_as_a_box = add_image_as_a_box
        self.conditioned_answer_choice = conditioned_answer_choice

        with open(os.path.join(VCR_ANNOTS_DIR, '{}.jsonl'.format(split)), 'r') as f:
            self.items = [json.loads(s) for s in f]

        if split not in ('test', 'train', 'val'):
            raise ValueError("Mode must be in test, train, or val. Supplied {}".format(mode))

        if mode not in ('answer', 'rationale'):
            raise ValueError("split must be answer or rationale")

        self.token_indexers = {'elmo': ELMoTokenCharactersIndexer()}
        self.vocab = Vocabulary()

        with open(os.path.join(DATALOADER_DIR, 'dataloaders', 'cocoontology.json'), 'r') as f:
            coco = json.load(f)
        self.coco_objects = ['__background__'] + [x['name'] for k, x in sorted(coco.items(), key=lambda x: int(x[0]))]
        self.coco_obj_to_ind = {o: i for i, o in enumerate(self.coco_objects)}

        self.embs_to_load = embs_to_load
        self.h5fn = os.path.join(BERT_DIR, f'{self.embs_to_load}_{self.mode}_{self.split}.h5')
        print("Loading embeddings from {}".format(self.h5fn), flush=True)

开发者ID:yuweijiang，项目名称:HGL-pytorch，代码行数:45，代码来源:vcr.py

示例13: test_bos_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_bos_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token("<S>")], Vocabulary())
        expected_indices = [
            259,
            257,
            260,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
        ]
        assert indices == {"elmo_tokens": [expected_indices]}

开发者ID:allenai，项目名称:allennlp，代码行数:58，代码来源:elmo_indexer_test.py

示例14: test_eos_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_eos_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token("</S>")], Vocabulary())
        expected_indices = [
            259,
            258,
            260,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
        ]
        assert indices == {"elmo_tokens": [expected_indices]}

开发者ID:allenai，项目名称:allennlp，代码行数:58，代码来源:elmo_indexer_test.py

示例15: test_unicode_to_char_ids

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_unicode_to_char_ids(self):
        indexer = ELMoTokenCharactersIndexer()
        indices = indexer.tokens_to_indices([Token(chr(256) + "t")], Vocabulary())
        expected_indices = [
            259,
            197,
            129,
            117,
            260,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
            261,
        ]
        assert indices == {"elmo_tokens": [expected_indices]}

开发者ID:allenai，项目名称:allennlp，代码行数:58，代码来源:elmo_indexer_test.py

注：本文中的allennlp.data.token_indexers.ELMoTokenCharactersIndexer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: fever_build_vocab

示例2: __init__

示例3: __init__

示例4: __init__

示例5: __init__

示例6: test_elmo_empty_token_list

示例7: __init__

示例8: test_bos_to_char_ids

示例9: test_eos_to_char_ids

示例10: test_unicode_to_char_ids

示例11: test_elmo_as_array_produces_token_sequence

示例12: __init__

示例13: test_bos_to_char_ids

示例14: test_eos_to_char_ids

示例15: test_unicode_to_char_ids

示例2: init

示例3: init

示例4: init

示例5: init

示例7: init

示例12: init