本文整理汇总了Python中allennlp.commands.elmo.ElmoEmbedder方法的典型用法代码示例。如果您正苦于以下问题:Python elmo.ElmoEmbedder方法的具体用法?Python elmo.ElmoEmbedder怎么用?Python elmo.ElmoEmbedder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.commands.elmo
的用法示例。
在下文中一共展示了elmo.ElmoEmbedder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_embeddings_are_as_expected
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_embeddings_are_as_expected(self):
loaded_sentences, loaded_embeddings = self._load_sentences_embeddings()
assert len(loaded_sentences) == len(loaded_embeddings)
batch_size = len(loaded_sentences)
# The sentences and embeddings are organized in an idiosyncratic way TensorFlow handles batching.
# We are going to reorganize them linearly so they can be grouped into batches by AllenNLP.
sentences = []
expected_embeddings = []
for batch_number in range(len(loaded_sentences[0])):
for index in range(batch_size):
sentences.append(loaded_sentences[index][batch_number].split())
expected_embeddings.append(loaded_embeddings[index][batch_number])
assert len(expected_embeddings) == len(sentences)
embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
embeddings = list(embedder.embed_sentences(sentences, batch_size))
assert len(embeddings) == len(sentences)
for tensor, expected in izip(embeddings, expected_embeddings):
numpy.testing.assert_array_almost_equal(tensor[2], expected)
示例2: get_elmo_model
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def get_elmo_model( model_dir, cpu, verbose ):
weights_path = model_dir / 'weights.hdf5'
options_path = model_dir / 'options.json'
# if no pre-trained model is available, yet --> download it
if not (weights_path.exists() and options_path.exists()):
if verbose:
print('No existing model found. Start downloading pre-trained SeqVec (~360MB)...')
import urllib.request
Path.mkdir(model_dir)
repo_link = 'http://rostlab.org/~deepppi/embedding_repo/embedding_models/seqvec'
options_link = repo_link +'/options.json'
weights_link = repo_link +'/weights.hdf5'
urllib.request.urlretrieve( options_link, options_path )
urllib.request.urlretrieve( weights_link, weights_path )
cuda_device = 0 if torch.cuda.is_available() and not cpu else -1
return ElmoEmbedder( weight_file=weights_path, options_file=options_path, cuda_device=cuda_device )
示例3: get_sent_embeds
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def get_sent_embeds(sent, elmo_options_file, elmo_weights_file, layer,
cuda_device):
'''
Get the embeddings of the sentence words.
sent - list of tokens
elmo_options_file - json for model. n_characters should be 262
elmo_weights_file - saved model
layer - what layer of ELMo to output
cuda_device - cuda device
returns a numpy array with the embeddings per token for the selected layer
'''
elmo = ElmoEmbedder(elmo_options_file, elmo_weights_file, cuda_device)
s_embeds = elmo.embed_sentence(sent)
layer_embeds = s_embeds[layer,:,:]
return layer_embeds
示例4: _get_model
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def _get_model(self, weights, options):
# Retrieve pre-trained embedding model
# use GPU if available. If CPU-usage shall be enforced set cuda_device=-1
cuda_device = 0 if torch.cuda.is_available() else -1
return ElmoEmbedder(weight_file=weights, options_file=options, cuda_device=cuda_device)
示例5: loadELMo
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def loadELMo(self):
self.elmo = ElmoEmbedder(self.elmo_options_file, self.elmo_weight_file, self.elmo_cuda_device)
示例6: load_elmo
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def load_elmo():
return ElmoEmbedder(cuda_device=0)
示例7: test_all_embedding_works
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_all_embedding_works(self):
sentence = u"Michael went to the store to buy some eggs ."
with open(self.sentences_path, u'w') as f:
f.write(sentence)
sys.argv = [u"run.py", # executable
u"elmo", # command
self.sentences_path,
self.output_path,
u"--all",
u"--options-file",
self.options_file,
u"--weight-file",
self.weight_file]
main()
assert os.path.exists(self.output_path)
embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
expected_embedding = embedder.embed_sentence(sentence.split())
with h5py.File(self.output_path, u'r') as h5py_file:
assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
# The vectors in the test configuration are smaller (32 length)
embedding = h5py_file.get(u"0")
assert embedding.shape == (3, len(sentence.split()), 32)
numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"}
示例8: test_top_embedding_works
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_top_embedding_works(self):
sentence = u"Michael went to the store to buy some eggs ."
with open(self.sentences_path, u'w') as f:
f.write(sentence)
sys.argv = [u"run.py", # executable
u"elmo", # command
self.sentences_path,
self.output_path,
u"--top",
u"--options-file",
self.options_file,
u"--weight-file",
self.weight_file]
main()
assert os.path.exists(self.output_path)
embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
expected_embedding = embedder.embed_sentence(sentence.split())[2]
with h5py.File(self.output_path, u'r') as h5py_file:
assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
# The vectors in the test configuration are smaller (32 length)
embedding = h5py_file.get(u"0")
assert embedding.shape == (len(sentence.split()), 32)
numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"}
示例9: test_average_embedding_works
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_average_embedding_works(self):
sentence = u"Michael went to the store to buy some eggs ."
with open(self.sentences_path, u'w') as f:
f.write(sentence)
sys.argv = [u"run.py", # executable
u"elmo", # command
self.sentences_path,
self.output_path,
u"--average",
u"--options-file",
self.options_file,
u"--weight-file",
self.weight_file]
main()
assert os.path.exists(self.output_path)
embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
expected_embedding = embedder.embed_sentence(sentence.split())
expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3
with h5py.File(self.output_path, u'r') as h5py_file:
assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
# The vectors in the test configuration are smaller (32 length)
embedding = h5py_file.get(u"0")
assert embedding.shape == (len(sentence.split()), 32)
numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"}
示例10: test_embed_batch_is_empty_sentence
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_embed_batch_is_empty_sentence(self):
embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
embeddings = embedder.embed_sentence([])
assert embeddings.shape == (3, 0, 1024)
示例11: __init__
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self):
self.elmo = ElmoEmbedder()
示例12: __init__
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self, args, word_vocab):
super().__init__(args, word_vocab)
# import ElmoEmbedder here so that the cuda_visible_divices can work
from allennlp.commands.elmo import ElmoEmbedder
self.elmo = ElmoEmbedder(cuda_device=0 if args.gpu is not None else -1)
示例13: __init__
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self, vocab, config):
word2id = vocab.word2idx
super(Model, self).__init__()
vocab_num = len(word2id)
self.word2id = word2id
self.config = config
self.char_dict = preprocess.get_char_dict('data/char_vocab.english.txt')
self.genres = {g: i for i, g in enumerate(["bc", "bn", "mz", "nw", "pt", "tc", "wb"])}
self.device = torch.device("cuda:" + config.cuda)
self.emb = nn.Embedding(vocab_num, 350)
emb1 = EmbedLoader().load_with_vocab(config.glove, vocab,normalize=False)
emb2 = EmbedLoader().load_with_vocab(config.turian, vocab ,normalize=False)
pre_emb = np.concatenate((emb1, emb2), axis=1)
pre_emb /= (np.linalg.norm(pre_emb, axis=1, keepdims=True) + 1e-12)
if pre_emb is not None:
self.emb.weight = nn.Parameter(torch.from_numpy(pre_emb).float())
for param in self.emb.parameters():
param.requires_grad = False
self.emb_dropout = nn.Dropout(inplace=True)
if config.use_elmo:
self.elmo = ElmoEmbedder(options_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_options.json',
weight_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5',
cuda_device=int(config.cuda))
print("elmo load over.")
self.elmo_args = torch.randn((3), requires_grad=True).to(self.device)
self.char_emb = nn.Embedding(len(self.char_dict), config.char_emb_size)
self.conv1 = nn.Conv1d(config.char_emb_size, 50, 3)
self.conv2 = nn.Conv1d(config.char_emb_size, 50, 4)
self.conv3 = nn.Conv1d(config.char_emb_size, 50, 5)
self.feature_emb = nn.Embedding(config.span_width, config.feature_size)
self.feature_emb_dropout = nn.Dropout(p=0.2, inplace=True)
self.mention_distance_emb = nn.Embedding(10, config.feature_size)
self.distance_drop = nn.Dropout(p=0.2, inplace=True)
self.genre_emb = nn.Embedding(7, config.feature_size)
self.speaker_emb = nn.Embedding(2, config.feature_size)
self.bilstm = VarLSTM(input_size=350+150*config.use_CNN+config.use_elmo*1024,hidden_size=200,bidirectional=True,batch_first=True,hidden_dropout=0.2)
# self.bilstm = nn.LSTM(input_size=500, hidden_size=200, bidirectional=True, batch_first=True)
self.h0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
self.c0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
self.bilstm_drop = nn.Dropout(p=0.2, inplace=True)
self.atten = ffnn(input_size=400, hidden_size=config.atten_hidden_size, output_size=1)
self.mention_score = ffnn(input_size=1320, hidden_size=config.mention_hidden_size, output_size=1)
self.sa = ffnn(input_size=3980+40*config.use_metadata, hidden_size=config.sa_hidden_size, output_size=1)
self.mention_start_np = None
self.mention_end_np = None
示例14: __init__
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(
self,
datasets_manager: DatasetsManager = None,
layer_aggregation: str = "sum",
device: Union[str, torch.device] = torch.device("cpu"),
word_tokens_namespace="tokens",
):
""" Bag of words Elmo Embedder which aggregates elmo embedding for every token
Parameters
----------
layer_aggregation : str
You can chose one of ``[sum, average, last, first]``
which decides how to aggregate different layers of ELMO. ELMO produces three
layers of representations
sum
Representations from different layers are summed
average
Representations from different layers are average
last
Representations from last layer is considered
first
Representations from first layer is considered
device : Union[str, torch.device]
device for running the model on
word_tokens_namespace: int
Namespace where all the word tokens are stored
"""
super(BowElmoEmbedder, self).__init__()
self.dataset_manager = datasets_manager
self.embedding_dimension = self.get_embedding_dimension()
self.embedder_name = "elmo"
self.word_tokens_namespace = word_tokens_namespace
self.layer_aggregation_type = layer_aggregation
self.allowed_layer_aggregation_types = ["sum", "average", "last", "first"]
self.device = (
torch.device(device) if isinstance(device, str) else torch.device(device)
)
if self.device.index:
self.cuda_device_id = self.device.index
else:
self.cuda_device_id = -1
self.msg_printer = wasabi.Printer()
assert (
self.layer_aggregation_type in self.allowed_layer_aggregation_types
), self.msg_printer.fail(
f"For bag of words elmo encoder, the allowable aggregation "
f"types are {self.allowed_layer_aggregation_types}. You passed {self.layer_aggregation_type}"
)
# load the elmo embedders
with self.msg_printer.loading("Creating Elmo object"):
self.elmo = ElmoEmbedder(cuda_device=self.cuda_device_id)
self.msg_printer.good("Finished Loading Elmo object")
示例15: context_insensitive_elmo
# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def context_insensitive_elmo(weights_path, options_path, word2idx, cuda=False, cache_dir=None):
logger = get_logger()
vocab = [w for w, i in sorted(word2idx.items(), key=lambda x: x[1])]
validate_word2idx(word2idx)
if cache_dir is not None:
key = hash_vocab(vocab)
cache_path = os.path.join(cache_dir, 'elmo_{}.npy'.format(key))
if os.path.exists(cache_path):
logger.info('Loading cached elmo vectors: {}'.format(cache_path))
return load_elmo_cache(cache_path)
if cuda:
device = 0
else:
device = -1
batch_size = 256
nbatches = len(vocab) // batch_size + 1
logger.info('Begin caching vectors. nbatches={} device={}'.format(nbatches, device))
logger.info('Initialize ELMo Model.')
# TODO: Does not support padding.
elmo = ElmoEmbedder(options_file=options_path, weight_file=weights_path, cuda_device=device)
vec_lst = []
for i in tqdm(range(nbatches), desc='elmo'):
start = i * batch_size
batch = vocab[start:start+batch_size]
if len(batch) == 0:
continue
vec = elmo.embed_sentence(batch)
vec_lst.append(vec)
vectors = np.concatenate([x[0] for x in vec_lst], axis=0)
if cache_dir is not None:
logger.info('Saving cached elmo vectors: {}'.format(cache_path))
save_elmo_cache(cache_path, vectors)
return vectors