本文整理汇总了Python中allennlp.modules.elmo._ElmoBiLm方法的典型用法代码示例。如果您正苦于以下问题:Python elmo._ElmoBiLm方法的具体用法?Python elmo._ElmoBiLm怎么用?Python elmo._ElmoBiLm使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.modules.elmo
的用法示例。
在下文中一共展示了elmo._ElmoBiLm方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_elmo_bilm_can_cache_char_cnn_embeddings
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm_can_cache_char_cnn_embeddings(self):
sentences = [["This", "is", "a", "sentence"], ["Here", "'s", "one"], ["Another", "one"]]
vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)
words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys())
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
elmo_bilm.eval()
no_cache = elmo_bilm(
tensor["character_ids"]["elmo_tokens"], tensor["character_ids"]["elmo_tokens"]
)
# ELMo is stateful, so we need to actually re-initialise it for this comparison to work.
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
elmo_bilm.eval()
cached = elmo_bilm(tensor["character_ids"]["elmo_tokens"], tensor["tokens"]["tokens"])
numpy.testing.assert_array_almost_equal(
no_cache["mask"].data.cpu().numpy(), cached["mask"].data.cpu().numpy()
)
for activation_cached, activation in zip(cached["activations"], no_cache["activations"]):
numpy.testing.assert_array_almost_equal(
activation_cached.data.cpu().numpy(), activation.data.cpu().numpy(), decimal=6
)
示例2: test_elmo_with_module
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_with_module(self):
# We will create the _ElmoBilm class and pass it in as a module.
sentences = [
["The", "sentence", "."],
["ELMo", "helps", "disambiguate", "ELMo", "from", "Elmo", "."],
]
character_ids = self._sentences_to_ids(sentences)
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
elmo = Elmo(None, None, 2, dropout=0.0, module=elmo_bilm)
output = elmo(character_ids)
elmo_representations = output["elmo_representations"]
assert len(elmo_representations) == 2
for k in range(2):
assert list(elmo_representations[k].size()) == [2, 7, 32]
示例3: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self,
options_file = DEFAULT_OPTIONS_FILE,
weight_file = DEFAULT_WEIGHT_FILE,
cuda_device = -1) :
u"""
Parameters
----------
options_file : ``str``, optional
A path or URL to an ELMo options file.
weight_file : ``str``, optional
A path or URL to an ELMo weights file.
cuda_device : ``int``, optional, (default=-1)
The GPU device to run on.
"""
self.indexer = ELMoTokenCharactersIndexer()
logger.info(u"Initializing ELMo.")
self.elmo_bilm = _ElmoBiLm(options_file, weight_file)
if cuda_device >= 0:
self.elmo_bilm = self.elmo_bilm.cuda(device=cuda_device)
self.cuda_device = cuda_device
示例4: test_elmo_bilm_can_cache_char_cnn_embeddings
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm_can_cache_char_cnn_embeddings(self):
sentences = [[u"This", u"is", u"a", u"sentence"],
[u"Here", u"'s", u"one"],
[u"Another", u"one"]]
vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)
words_to_cache = list(vocab.get_token_to_index_vocabulary(u"tokens").keys())
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
elmo_bilm.eval()
no_cache = elmo_bilm(tensor[u"character_ids"], tensor[u"character_ids"])
# ELMo is stateful, so we need to actually re-initialise it for this comparison to work.
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
elmo_bilm.eval()
cached = elmo_bilm(tensor[u"character_ids"], tensor[u"tokens"])
numpy.testing.assert_array_almost_equal(no_cache[u"mask"].data.cpu().numpy(),
cached[u"mask"].data.cpu().numpy())
for activation_cached, activation in izip(cached[u"activations"], no_cache[u"activations"]):
numpy.testing.assert_array_almost_equal(activation_cached.data.cpu().numpy(),
activation.data.cpu().numpy(), decimal=6)
示例5: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, config):
super(ELMo, self).__init__()
self.bsize = config['bsize']
self.pool_type = config['pool_type']
self.which_layer = config['which_layer']
self.version = 1 if 'version' not in config else config['version']
self.elmo_embedder = _ElmoBiLm(config['optfile'],
config['wgtfile'],
requires_grad=False)
assert self.version in [1, 2]
if self.version == 1:
self.bos = '<s>'
self.eos = '</s>'
self.max_pad = True
self.moses_tok = False
elif self.version == 2:
self.bos = '<p>'
self.eos = '</p>'
self.max_pad = False
self.moses_tok = True
示例6: test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words(self):
sentences = [["This", "is", "OOV"], ["so", "is", "this"]]
in_vocab_sentences = [["here", "is"], ["a", "vocab"]]
oov_tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)[1]
vocab, in_vocab_tensor = self.get_vocab_and_both_elmo_indexed_ids(in_vocab_sentences)
words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys())
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
elmo_bilm(
in_vocab_tensor["character_ids"]["elmo_tokens"], in_vocab_tensor["tokens"]["tokens"]
)
elmo_bilm(oov_tensor["character_ids"]["elmo_tokens"], oov_tensor["tokens"]["tokens"])
示例7: test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words(self):
sentences = [[u"This", u"is", u"OOV"], [u"so", u"is", u"this"]]
in_vocab_sentences = [[u"here", u"is"], [u"a", u"vocab"]]
oov_tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)[1]
vocab, in_vocab_tensor = self.get_vocab_and_both_elmo_indexed_ids(in_vocab_sentences)
words_to_cache = list(vocab.get_token_to_index_vocabulary(u"tokens").keys())
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
elmo_bilm(in_vocab_tensor[u"character_ids"], in_vocab_tensor[u"tokens"])
elmo_bilm(oov_tensor[u"character_ids"], oov_tensor[u"tokens"])
示例8: test_elmo_with_module
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_with_module(self):
# We will create the _ElmoBilm class and pass it in as a module.
sentences = [[u'The', u'sentence', u'.'],
[u'ELMo', u'helps', u'disambiguate', u'ELMo', u'from', u'Elmo', u'.']]
character_ids = self._sentences_to_ids(sentences)
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
elmo = Elmo(None, None, 2, dropout=0.0, module=elmo_bilm)
output = elmo(character_ids)
elmo_representations = output[u'elmo_representations']
assert len(elmo_representations) == 2
for k in range(2):
assert list(elmo_representations[k].size()) == [2, 7, 32]
示例9: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, nhid, optfile, wgtfile, dropout):
super(ELMo, self).__init__()
self.elmo_embedder = _ElmoBiLm(optfile, wgtfile, requires_grad=False)
self.weight_param = nn.Parameter(torch.FloatTensor([0.0, 0.0, 0.0]))
self.relu_network = nn.Sequential(OrderedDict([
('linear', nn.Linear(1024, nhid)),
('dropout', nn.Dropout(dropout)),
('tanh', nn.ReLU())
]))
示例10: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, options_file, weight_file, device=None):
self._elmo_lstm = _ElmoBiLm(options_file,
weight_file,
requires_grad=False,
vocab_to_cache=None)
if device is not None:
self._elmo_lstm = self._elmo_lstm.to(device)
self.output_dim = self._elmo_lstm.get_output_dim()
开发者ID:sz128,项目名称:slot_filling_and_intent_detection_of_SLU,代码行数:12,代码来源:get_ELMo_word_embedding_for_a_dataset.py
示例11: __init__
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self,
options_file: str,
weight_file: str,
num_output_representations: int,
requires_grad: bool = False,
do_layer_norm: bool = False,
dropout: float = 0.5,
vocab_to_cache: List[str] = None,
module: torch.nn.Module = None) -> None:
super(Elmo, self).__init__()
logging.info("Initializing ELMo")
if module is not None:
if options_file is not None or weight_file is not None:
raise ConfigurationError(
"Don't provide options_file or weight_file with module")
self._elmo_lstm = module
else:
self._elmo_lstm = _ElmoBiLm(options_file,
weight_file,
requires_grad=requires_grad,
vocab_to_cache=vocab_to_cache)
self._has_cached_vocab = vocab_to_cache is not None
self._dropout = Dropout(p=dropout)
self.num_output_representations=num_output_representations
if num_output_representations!=-1:
self._scalar_mixes: Any = []
for k in range(num_output_representations):
scalar_mix = ScalarMix(self._elmo_lstm.num_layers, do_layer_norm=do_layer_norm)
self.add_module('scalar_mix_{}'.format(k), scalar_mix)
self._scalar_mixes.append(scalar_mix)
示例12: test_elmo_bilm
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm(self):
# get the raw data
sentences, expected_lm_embeddings = self._load_sentences_embeddings()
# load the test model
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
# Deal with the data.
indexer = ELMoTokenCharactersIndexer()
# For each sentence, first create a TextField, then create an instance
instances = []
for batch in zip(*sentences):
for sentence in batch:
tokens = [Token(token) for token in sentence.split()]
field = TextField(tokens, {"character_ids": indexer})
instance = Instance({"elmo": field})
instances.append(instance)
vocab = Vocabulary()
dataset = AllennlpDataset(instances, vocab)
# Now finally we can iterate through batches.
loader = PyTorchDataLoader(dataset, 3)
for i, batch in enumerate(loader):
lm_embeddings = elmo_bilm(batch["elmo"]["character_ids"]["elmo_tokens"])
top_layer_embeddings, mask = remove_sentence_boundaries(
lm_embeddings["activations"][2], lm_embeddings["mask"]
)
# check the mask lengths
lengths = mask.data.numpy().sum(axis=1)
batch_sentences = [sentences[k][i] for k in range(3)]
expected_lengths = [len(sentence.split()) for sentence in batch_sentences]
assert lengths.tolist() == expected_lengths
# get the expected embeddings and compare!
expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
for k in range(3):
assert numpy.allclose(
top_layer_embeddings[k, : lengths[k], :].data.numpy(),
expected_top_layer[k],
atol=1.0e-6,
)
示例13: test_elmo_bilm
# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm(self):
# get the raw data
sentences, expected_lm_embeddings = self._load_sentences_embeddings()
# load the test model
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
# Deal with the data.
indexer = ELMoTokenCharactersIndexer()
# For each sentence, first create a TextField, then create an instance
instances = []
for batch in izip(*sentences):
for sentence in batch:
tokens = [Token(token) for token in sentence.split()]
field = TextField(tokens, {u'character_ids': indexer})
instance = Instance({u"elmo": field})
instances.append(instance)
vocab = Vocabulary()
# Now finally we can iterate through batches.
iterator = BasicIterator(3)
iterator.index_with(vocab)
for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
lm_embeddings = elmo_bilm(batch[u'elmo'][u'character_ids'])
top_layer_embeddings, mask = remove_sentence_boundaries(
lm_embeddings[u'activations'][2],
lm_embeddings[u'mask']
)
# check the mask lengths
lengths = mask.data.numpy().sum(axis=1)
batch_sentences = [sentences[k][i] for k in range(3)]
expected_lengths = [
len(sentence.split()) for sentence in batch_sentences
]
self.assertEqual(lengths.tolist(), expected_lengths)
# get the expected embeddings and compare!
expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
for k in range(3):
self.assertTrue(
numpy.allclose(
top_layer_embeddings[k, :lengths[k], :].data.numpy(),
expected_top_layer[k],
atol=1.0e-6
)
)