本文整理汇总了Python中allennlp.data.token_indexers.ELMoTokenCharactersIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexers.ELMoTokenCharactersIndexer方法的具体用法?Python token_indexers.ELMoTokenCharactersIndexer怎么用?Python token_indexers.ELMoTokenCharactersIndexer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.token_indexers
的用法示例。
在下文中一共展示了token_indexers.ELMoTokenCharactersIndexer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fever_build_vocab
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def fever_build_vocab(d_list, unk_token_num=None) -> ExVocabulary:
if unk_token_num is None:
unk_token_num = {'tokens': 2600}
token_indexers = {
'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens
'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters
}
nli_dataset_reader = BasicReader(token_indexers=token_indexers)
# for in_file in d_list:
instances = nli_dataset_reader.read(d_list)
whole_vocabulary = ExVocabulary.from_instances(instances, unk_token_num=unk_token_num)
print(whole_vocabulary.get_vocab_size('tokens')) # 122827
print(type(whole_vocabulary.get_token_to_index_vocabulary('tokens')))
return whole_vocabulary
示例2: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
clean_citation: bool = True,
with_elmo: bool = False
# use_lexicon_features: bool = False,
# use_sparse_lexicon_features: bool = False
) -> None:
super().__init__(lazy)
self._clean_citation = clean_citation
self._tokenizer = tokenizer or WordTokenizer()
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
示例3: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
use_lexicon_features: bool=False,
use_sparse_lexicon_features: bool = False,
multilabel: bool = False,
with_elmo: bool = False,
reader_format: str = 'flat') -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer()
if with_elmo:
# self._token_indexers = {"tokens": SingleIdTokenIndexer()}
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
self.use_lexicon_features = use_lexicon_features
self.use_sparse_lexicon_features = use_sparse_lexicon_features
if self.use_lexicon_features or self.use_sparse_lexicon_features:
self.lexicons = {**ALL_ACTION_LEXICONS, **ALL_CONCEPT_LEXICONS}
self.multilabel = multilabel
self.reader_format = reader_format
示例4: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
use_lexicon_features: bool = False,
use_sparse_lexicon_features: bool = False,
with_elmo: bool = False
) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer()
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
self.use_lexicon_features = use_lexicon_features
self.use_sparse_lexicon_features = use_sparse_lexicon_features
if self.use_lexicon_features or self.use_sparse_lexicon_features:
self.lexicons = {**ALL_ACTION_LEXICONS, **ALL_CONCEPT_LEXICONS}
示例5: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
clean_citation: bool = True,
with_elmo: bool = False
) -> None:
super().__init__(lazy)
self._clean_citation = clean_citation
self._tokenizer = tokenizer or WordTokenizer()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
示例6: test_elmo_empty_token_list
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_elmo_empty_token_list(self):
# Basic test
indexer = ELMoTokenCharactersIndexer()
assert {"elmo_tokens": []} == indexer.get_empty_token_list()
# Real world test
indexer = {"elmo": indexer}
tokens_1 = TextField([Token("Apple")], indexer)
targets_1 = ListField([TextField([Token("Apple")], indexer)])
tokens_2 = TextField([Token("Screen"), Token("device")], indexer)
targets_2 = ListField(
[TextField([Token("Screen")], indexer), TextField([Token("Device")], indexer)]
)
instance_1 = Instance({"tokens": tokens_1, "targets": targets_1})
instance_2 = Instance({"tokens": tokens_2, "targets": targets_2})
a_batch = Batch([instance_1, instance_2])
a_batch.index_instances(Vocabulary())
batch_tensor = a_batch.as_tensor_dict()
elmo_target_token_indices = batch_tensor["targets"]["elmo"]["elmo_tokens"]
# The TextField that is empty should have been created using the
# `get_empty_token_list` and then padded with zeros.
empty_target = elmo_target_token_indices[0][1].numpy()
np.testing.assert_array_equal(np.zeros((1, 50)), empty_target)
non_empty_targets = [
elmo_target_token_indices[0][0],
elmo_target_token_indices[1][0],
elmo_target_token_indices[1][1],
]
for non_empty_target in non_empty_targets:
with pytest.raises(AssertionError):
np.testing.assert_array_equal(np.zeros((1, 50)), non_empty_target)
示例7: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self, model_path):
# Prepare Data
lazy = False
token_indexers = {
'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens
'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters
}
p_dict = wn_persistent_api.persistence_load()
dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=420)
vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')
# Build Model
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
# device_num = -1 if device.type == 'cpu' else 0
device = torch.device("cpu")
device_num = -1 if device.type == 'cpu' else 0
biterator = BasicIterator(batch_size=16)
biterator.index_with(vocab)
model = Model(rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
1024 + 450 + dev_fever_data_reader.wn_feature_size),
rnn_size_out=(450, 450),
weight=weight_dict['glove.840B.300d'],
vocab_size=vocab.get_vocab_size('tokens'),
mlp_d=900,
embedding_dim=300, max_l=400)
model.display()
model.to(device)
model.load_state_dict(torch.load(model_path))
self.model = model
self.dev_fever_data_reader = dev_fever_data_reader
self.device_num = device_num
self.biterator = biterator
示例8: test_bos_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_bos_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token(u'<S>')], Vocabulary(), u"test-elmo")
expected_indices = [259, 257, 260, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261]
assert indices == {u"test-elmo": [expected_indices]}
示例9: test_eos_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_eos_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token(u'</S>')], Vocabulary(), u"test-eos")
expected_indices = [259, 258, 260, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261]
assert indices == {u"test-eos": [expected_indices]}
示例10: test_unicode_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_unicode_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token(unichr(256) + u't')], Vocabulary(), u"test-unicode")
expected_indices = [259, 197, 129, 117, 260, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261]
assert indices == {u"test-unicode": [expected_indices]}
示例11: test_elmo_as_array_produces_token_sequence
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_elmo_as_array_produces_token_sequence(self): # pylint: disable=invalid-name
indexer = ELMoTokenCharactersIndexer()
tokens = [Token(u'Second'), Token(u'.')]
indices = indexer.tokens_to_indices(tokens, Vocabulary(), u"test-elmo")[u"test-elmo"]
padded_tokens = indexer.pad_token_sequence({u'test-elmo': indices},
desired_num_tokens={u'test-elmo': 3},
padding_lengths={})
expected_padded_tokens = [[259, 84, 102, 100, 112, 111, 101, 260, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261],
[259, 47, 260, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261, 261, 261, 261, 261,
261, 261, 261, 261, 261],
[0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0]]
assert padded_tokens[u'test-elmo'] == expected_padded_tokens
示例12: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def __init__(self, split, mode, only_use_relevant_dets=True, add_image_as_a_box=True, embs_to_load='bert_da',
conditioned_answer_choice=0):
"""
:param split: train, val, or test
:param mode: answer or rationale
:param only_use_relevant_dets: True, if we will only use the detections mentioned in the question and answer.
False, if we should use all detections.
:param add_image_as_a_box: True to add the image in as an additional 'detection'. It'll go first in the list
of objects.
:param embs_to_load: Which precomputed embeddings to load.
:param conditioned_answer_choice: If you're in test mode, the answer labels aren't provided, which could be
a problem for the QA->R task. Pass in 'conditioned_answer_choice=i'
to always condition on the i-th answer.
"""
self.split = split
self.mode = mode
self.only_use_relevant_dets = only_use_relevant_dets
print("Only relevant dets" if only_use_relevant_dets else "Using all detections", flush=True)
self.add_image_as_a_box = add_image_as_a_box
self.conditioned_answer_choice = conditioned_answer_choice
with open(os.path.join(VCR_ANNOTS_DIR, '{}.jsonl'.format(split)), 'r') as f:
self.items = [json.loads(s) for s in f]
if split not in ('test', 'train', 'val'):
raise ValueError("Mode must be in test, train, or val. Supplied {}".format(mode))
if mode not in ('answer', 'rationale'):
raise ValueError("split must be answer or rationale")
self.token_indexers = {'elmo': ELMoTokenCharactersIndexer()}
self.vocab = Vocabulary()
with open(os.path.join(DATALOADER_DIR, 'dataloaders', 'cocoontology.json'), 'r') as f:
coco = json.load(f)
self.coco_objects = ['__background__'] + [x['name'] for k, x in sorted(coco.items(), key=lambda x: int(x[0]))]
self.coco_obj_to_ind = {o: i for i, o in enumerate(self.coco_objects)}
self.embs_to_load = embs_to_load
self.h5fn = os.path.join(BERT_DIR, f'{self.embs_to_load}_{self.mode}_{self.split}.h5')
print("Loading embeddings from {}".format(self.h5fn), flush=True)
示例13: test_bos_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_bos_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token("<S>")], Vocabulary())
expected_indices = [
259,
257,
260,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
]
assert indices == {"elmo_tokens": [expected_indices]}
示例14: test_eos_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_eos_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token("</S>")], Vocabulary())
expected_indices = [
259,
258,
260,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
]
assert indices == {"elmo_tokens": [expected_indices]}
示例15: test_unicode_to_char_ids
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import ELMoTokenCharactersIndexer [as 别名]
def test_unicode_to_char_ids(self):
indexer = ELMoTokenCharactersIndexer()
indices = indexer.tokens_to_indices([Token(chr(256) + "t")], Vocabulary())
expected_indices = [
259,
197,
129,
117,
260,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
261,
]
assert indices == {"elmo_tokens": [expected_indices]}