本文整理汇总了Python中allennlp.modules.elmo._ElmoCharacterEncoder方法的典型用法代码示例。如果您正苦于以下问题:Python elmo._ElmoCharacterEncoder方法的具体用法?Python elmo._ElmoCharacterEncoder怎么用?Python elmo._ElmoCharacterEncoder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.modules.elmo
的用法示例。
在下文中一共展示了elmo._ElmoCharacterEncoder方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoCharacterEncoder [as 别名]
def __init__(self):
from allennlp.modules.elmo import _ElmoCharacterEncoder
if not path.isdir(self.path('elmo')):
makedirs(self.path('elmo'))
self.fweights = self.ensure_file(path.join('elmo', 'weights.hdf5'), url=self.settings['weights'])
self.foptions = self.ensure_file(path.join('elmo', 'options.json'), url=self.settings['options'])
self.embeddings = _ElmoCharacterEncoder(self.foptions, self.fweights)
示例2: test_elmo_token_representation
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoCharacterEncoder [as 别名]
def test_elmo_token_representation(self):
# Load the test words and convert to char ids
with open(os.path.join(self.elmo_fixtures_path, "vocab_test.txt"), "r") as fin:
words = fin.read().strip().split("\n")
vocab = Vocabulary()
indexer = ELMoTokenCharactersIndexer()
tokens = [Token(word) for word in words]
indices = indexer.tokens_to_indices(tokens, vocab)
# There are 457 tokens. Reshape into 10 batches of 50 tokens.
sentences = []
for k in range(10):
char_indices = indices["elmo_tokens"][(k * 50) : ((k + 1) * 50)]
sentences.append(
indexer.as_padded_tensor_dict(
{"elmo_tokens": char_indices}, padding_lengths={"elmo_tokens": 50}
)["elmo_tokens"]
)
batch = torch.stack(sentences)
elmo_token_embedder = _ElmoCharacterEncoder(self.options_file, self.weight_file)
elmo_token_embedder_output = elmo_token_embedder(batch)
# Reshape back to a list of words and compare with ground truth. Need to also
# remove <S>, </S>
actual_embeddings = remove_sentence_boundaries(
elmo_token_embedder_output["token_embedding"], elmo_token_embedder_output["mask"]
)[0].data.numpy()
actual_embeddings = actual_embeddings.reshape(-1, actual_embeddings.shape[-1])
embedding_file = os.path.join(self.elmo_fixtures_path, "elmo_token_embeddings.hdf5")
with h5py.File(embedding_file, "r") as fin:
expected_embeddings = fin["embedding"][...]
assert numpy.allclose(actual_embeddings[: len(tokens)], expected_embeddings, atol=1e-6)
示例3: test_elmo_token_representation_bos_eos
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoCharacterEncoder [as 别名]
def test_elmo_token_representation_bos_eos(self):
# The additional <S> and </S> embeddings added by the embedder should be as expected.
indexer = ELMoTokenCharactersIndexer()
elmo_token_embedder = _ElmoCharacterEncoder(self.options_file, self.weight_file)
for correct_index, token in [[0, "<S>"], [2, "</S>"]]:
indices = indexer.tokens_to_indices([Token(token)], Vocabulary())
indices = torch.from_numpy(numpy.array(indices["elmo_tokens"])).view(1, 1, -1)
embeddings = elmo_token_embedder(indices)["token_embedding"]
assert numpy.allclose(
embeddings[0, correct_index, :].data.numpy(), embeddings[0, 1, :].data.numpy()
)
示例4: test_elmo_token_representation
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoCharacterEncoder [as 别名]
def test_elmo_token_representation(self):
# Load the test words and convert to char ids
with open(os.path.join(self.elmo_fixtures_path, u'vocab_test.txt'), u'r') as fin:
words = fin.read().strip().split(u'\n')
vocab = Vocabulary()
indexer = ELMoTokenCharactersIndexer()
tokens = [Token(word) for word in words]
indices = indexer.tokens_to_indices(tokens, vocab, u"elmo")
# There are 457 tokens. Reshape into 10 batches of 50 tokens.
sentences = []
for k in range(10):
char_indices = indices[u"elmo"][(k * 50):((k + 1) * 50)]
sentences.append(
indexer.pad_token_sequence(
{u'key': char_indices}, desired_num_tokens={u'key': 50}, padding_lengths={}
)[u'key']
)
batch = torch.from_numpy(numpy.array(sentences))
elmo_token_embedder = _ElmoCharacterEncoder(self.options_file, self.weight_file)
elmo_token_embedder_output = elmo_token_embedder(batch)
# Reshape back to a list of words and compare with ground truth. Need to also
# remove <S>, </S>
actual_embeddings = remove_sentence_boundaries(
elmo_token_embedder_output[u'token_embedding'],
elmo_token_embedder_output[u'mask']
)[0].data.numpy()
actual_embeddings = actual_embeddings.reshape(-1, actual_embeddings.shape[-1])
embedding_file = os.path.join(self.elmo_fixtures_path, u'elmo_token_embeddings.hdf5')
with h5py.File(embedding_file, u'r') as fin:
expected_embeddings = fin[u'embedding'][...]
assert numpy.allclose(actual_embeddings[:len(tokens)], expected_embeddings, atol=1e-6)
示例5: test_elmo_token_representation_bos_eos
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoCharacterEncoder [as 别名]
def test_elmo_token_representation_bos_eos(self):
# The additional <S> and </S> embeddings added by the embedder should be as expected.
indexer = ELMoTokenCharactersIndexer()
elmo_token_embedder = _ElmoCharacterEncoder(self.options_file, self.weight_file)
for correct_index, token in [[0, u'<S>'], [2, u'</S>']]:
indices = indexer.tokens_to_indices([Token(token)], Vocabulary(), u"correct")
indices = torch.from_numpy(numpy.array(indices[u"correct"])).view(1, 1, -1)
embeddings = elmo_token_embedder(indices)[u'token_embedding']
assert numpy.allclose(embeddings[0, correct_index, :].data.numpy(), embeddings[0, 1, :].data.numpy())