當前位置: 首頁>>代碼示例>>Python>>正文


Python elmo.ElmoEmbedder方法代碼示例

本文整理匯總了Python中allennlp.commands.elmo.ElmoEmbedder方法的典型用法代碼示例。如果您正苦於以下問題:Python elmo.ElmoEmbedder方法的具體用法?Python elmo.ElmoEmbedder怎麽用?Python elmo.ElmoEmbedder使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在allennlp.commands.elmo的用法示例。


在下文中一共展示了elmo.ElmoEmbedder方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_embeddings_are_as_expected

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def test_embeddings_are_as_expected(self):
        loaded_sentences, loaded_embeddings = self._load_sentences_embeddings()

        assert len(loaded_sentences) == len(loaded_embeddings)
        batch_size = len(loaded_sentences)

        # The sentences and embeddings are organized in an idiosyncratic way TensorFlow handles batching.
        # We are going to reorganize them linearly so they can be grouped into batches by AllenNLP.
        sentences = []
        expected_embeddings = []
        for batch_number in range(len(loaded_sentences[0])):
            for index in range(batch_size):
                sentences.append(loaded_sentences[index][batch_number].split())
                expected_embeddings.append(loaded_embeddings[index][batch_number])

        assert len(expected_embeddings) == len(sentences)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        embeddings = list(embedder.embed_sentences(sentences, batch_size))

        assert len(embeddings) == len(sentences)

        for tensor, expected in izip(embeddings, expected_embeddings):
            numpy.testing.assert_array_almost_equal(tensor[2], expected) 
開發者ID:plasticityai,項目名稱:magnitude,代碼行數:26,代碼來源:elmo_test.py

示例2: get_elmo_model

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def get_elmo_model( model_dir, cpu, verbose ):
    
    weights_path = model_dir / 'weights.hdf5'
    options_path = model_dir / 'options.json'

    # if no pre-trained model is available, yet --> download it
    if not (weights_path.exists() and options_path.exists()):
        if verbose: 
            print('No existing model found. Start downloading pre-trained SeqVec (~360MB)...')
        import urllib.request
        Path.mkdir(model_dir)
        repo_link    = 'http://rostlab.org/~deepppi/embedding_repo/embedding_models/seqvec'
        options_link = repo_link +'/options.json'
        weights_link = repo_link +'/weights.hdf5'
        urllib.request.urlretrieve( options_link, options_path )
        urllib.request.urlretrieve( weights_link, weights_path )

    cuda_device = 0 if torch.cuda.is_available() and not cpu else -1
    return ElmoEmbedder( weight_file=weights_path, options_file=options_path, cuda_device=cuda_device ) 
開發者ID:mheinzinger,項目名稱:SeqVec,代碼行數:21,代碼來源:seqvec_embedder.py

示例3: get_sent_embeds

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def get_sent_embeds(sent, elmo_options_file, elmo_weights_file, layer,
                    cuda_device):
    '''
    Get the embeddings of the sentence words.
    sent - list of tokens
    elmo_options_file - json for model. n_characters should be 262
    elmo_weights_file - saved model
    layer - what layer of ELMo to output
    cuda_device - cuda device

    returns a numpy array with the embeddings per token for the selected layer
    '''
    elmo = ElmoEmbedder(elmo_options_file, elmo_weights_file, cuda_device)
    s_embeds = elmo.embed_sentence(sent)
    layer_embeds = s_embeds[layer,:,:]
    return layer_embeds 
開發者ID:TalSchuster,項目名稱:CrossLingualContextualEmb,代碼行數:18,代碼來源:demo.py

示例4: _get_model

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def _get_model(self, weights, options):
        # Retrieve pre-trained embedding model
        # use GPU if available. If CPU-usage shall be enforced set cuda_device=-1
        cuda_device = 0 if torch.cuda.is_available() else -1
        return ElmoEmbedder(weight_file=weights, options_file=options, cuda_device=cuda_device) 
開發者ID:kipoi,項目名稱:models,代碼行數:7,代碼來源:model.py

示例5: loadELMo

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def loadELMo(self):
        self.elmo = ElmoEmbedder(self.elmo_options_file, self.elmo_weight_file, self.elmo_cuda_device) 
開發者ID:UKPLab,項目名稱:elmo-bilstm-cnn-crf,代碼行數:4,代碼來源:ELMoWordEmbeddings.py

示例6: load_elmo

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def load_elmo():
    return ElmoEmbedder(cuda_device=0) 
開發者ID:allanj,項目名稱:ner_with_dependency,代碼行數:4,代碼來源:preelmo.py

示例7: test_all_embedding_works

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def test_all_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--all",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (3, len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
開發者ID:plasticityai,項目名稱:magnitude,代碼行數:31,代碼來源:elmo_test.py

示例8: test_top_embedding_works

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def test_top_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--top",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())[2]

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
開發者ID:plasticityai,項目名稱:magnitude,代碼行數:31,代碼來源:elmo_test.py

示例9: test_average_embedding_works

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def test_average_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--average",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())
        expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
開發者ID:plasticityai,項目名稱:magnitude,代碼行數:32,代碼來源:elmo_test.py

示例10: test_embed_batch_is_empty_sentence

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def test_embed_batch_is_empty_sentence(self):
        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        embeddings = embedder.embed_sentence([])

        assert embeddings.shape == (3, 0, 1024) 
開發者ID:plasticityai,項目名稱:magnitude,代碼行數:7,代碼來源:elmo_test.py

示例11: __init__

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def __init__(self):
        
        self.elmo = ElmoEmbedder() 
開發者ID:uhh-lt,項目名稱:bert-sense,代碼行數:5,代碼來源:ELMO_Model.py

示例12: __init__

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def __init__(self, args, word_vocab):
        super().__init__(args, word_vocab)

        # import ElmoEmbedder here so that the cuda_visible_divices can work
        from allennlp.commands.elmo import ElmoEmbedder
        self.elmo = ElmoEmbedder(cuda_device=0 if args.gpu is not None else -1) 
開發者ID:PKU-TANGENT,項目名稱:NeuralEDUSeg,代碼行數:8,代碼來源:elmo_crf_seg.py

示例13: __init__

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def __init__(self, vocab, config):
        word2id = vocab.word2idx
        super(Model, self).__init__()
        vocab_num = len(word2id)
        self.word2id = word2id
        self.config = config
        self.char_dict = preprocess.get_char_dict('data/char_vocab.english.txt')
        self.genres = {g: i for i, g in enumerate(["bc", "bn", "mz", "nw", "pt", "tc", "wb"])}
        self.device = torch.device("cuda:" + config.cuda)

        self.emb = nn.Embedding(vocab_num, 350)

        emb1 = EmbedLoader().load_with_vocab(config.glove, vocab,normalize=False)
        emb2 = EmbedLoader().load_with_vocab(config.turian,  vocab ,normalize=False)
        pre_emb = np.concatenate((emb1, emb2), axis=1)
        pre_emb /= (np.linalg.norm(pre_emb, axis=1, keepdims=True) + 1e-12)

        if pre_emb is not None:
            self.emb.weight = nn.Parameter(torch.from_numpy(pre_emb).float())
            for param in self.emb.parameters():
                param.requires_grad = False
        self.emb_dropout = nn.Dropout(inplace=True)


        if config.use_elmo:
            self.elmo = ElmoEmbedder(options_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_options.json',
                                     weight_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5',
                                     cuda_device=int(config.cuda))
            print("elmo load over.")
            self.elmo_args = torch.randn((3), requires_grad=True).to(self.device)

        self.char_emb = nn.Embedding(len(self.char_dict), config.char_emb_size)
        self.conv1 = nn.Conv1d(config.char_emb_size, 50, 3)
        self.conv2 = nn.Conv1d(config.char_emb_size, 50, 4)
        self.conv3 = nn.Conv1d(config.char_emb_size, 50, 5)

        self.feature_emb = nn.Embedding(config.span_width, config.feature_size)
        self.feature_emb_dropout = nn.Dropout(p=0.2, inplace=True)

        self.mention_distance_emb = nn.Embedding(10, config.feature_size)
        self.distance_drop = nn.Dropout(p=0.2, inplace=True)

        self.genre_emb = nn.Embedding(7, config.feature_size)
        self.speaker_emb = nn.Embedding(2, config.feature_size)

        self.bilstm = VarLSTM(input_size=350+150*config.use_CNN+config.use_elmo*1024,hidden_size=200,bidirectional=True,batch_first=True,hidden_dropout=0.2)
        # self.bilstm = nn.LSTM(input_size=500, hidden_size=200, bidirectional=True, batch_first=True)
        self.h0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
        self.c0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
        self.bilstm_drop = nn.Dropout(p=0.2, inplace=True)

        self.atten = ffnn(input_size=400, hidden_size=config.atten_hidden_size, output_size=1)
        self.mention_score = ffnn(input_size=1320, hidden_size=config.mention_hidden_size, output_size=1)
        self.sa = ffnn(input_size=3980+40*config.use_metadata, hidden_size=config.sa_hidden_size, output_size=1)
        self.mention_start_np = None
        self.mention_end_np = None 
開發者ID:fastnlp,項目名稱:fastNLP,代碼行數:58,代碼來源:model_re.py

示例14: __init__

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def __init__(
        self,
        datasets_manager: DatasetsManager = None,
        layer_aggregation: str = "sum",
        device: Union[str, torch.device] = torch.device("cpu"),
        word_tokens_namespace="tokens",
    ):
        """ Bag of words Elmo Embedder which aggregates elmo embedding for every token

        Parameters
        ----------
        layer_aggregation : str
            You can chose one of ``[sum, average, last, first]``
            which decides how to aggregate different layers of ELMO. ELMO produces three
            layers of representations

            sum
                Representations from different layers are summed
            average
                Representations from different layers are average
            last
                Representations from last layer is considered
            first
                Representations from first layer is considered

        device : Union[str, torch.device]
            device for running the model on

        word_tokens_namespace: int
            Namespace where all the word tokens are stored
        """
        super(BowElmoEmbedder, self).__init__()
        self.dataset_manager = datasets_manager
        self.embedding_dimension = self.get_embedding_dimension()
        self.embedder_name = "elmo"
        self.word_tokens_namespace = word_tokens_namespace
        self.layer_aggregation_type = layer_aggregation
        self.allowed_layer_aggregation_types = ["sum", "average", "last", "first"]
        self.device = (
            torch.device(device) if isinstance(device, str) else torch.device(device)
        )

        if self.device.index:
            self.cuda_device_id = self.device.index
        else:
            self.cuda_device_id = -1
        self.msg_printer = wasabi.Printer()

        assert (
            self.layer_aggregation_type in self.allowed_layer_aggregation_types
        ), self.msg_printer.fail(
            f"For bag of words elmo encoder, the allowable aggregation "
            f"types are {self.allowed_layer_aggregation_types}. You passed {self.layer_aggregation_type}"
        )

        # load the elmo embedders
        with self.msg_printer.loading("Creating Elmo object"):
            self.elmo = ElmoEmbedder(cuda_device=self.cuda_device_id)
        self.msg_printer.good("Finished Loading Elmo object") 
開發者ID:abhinavkashyap,項目名稱:sciwing,代碼行數:61,代碼來源:bow_elmo_embedder.py

示例15: context_insensitive_elmo

# 需要導入模塊: from allennlp.commands import elmo [as 別名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 別名]
def context_insensitive_elmo(weights_path, options_path, word2idx, cuda=False, cache_dir=None):
    logger = get_logger()

    vocab = [w for w, i in sorted(word2idx.items(), key=lambda x: x[1])]

    validate_word2idx(word2idx)

    if cache_dir is not None:
        key = hash_vocab(vocab)
        cache_path = os.path.join(cache_dir, 'elmo_{}.npy'.format(key))

        if os.path.exists(cache_path):
            logger.info('Loading cached elmo vectors: {}'.format(cache_path))
            return load_elmo_cache(cache_path)

    if cuda:
        device = 0
    else:
        device = -1

    batch_size = 256
    nbatches = len(vocab) // batch_size + 1

    logger.info('Begin caching vectors. nbatches={} device={}'.format(nbatches, device))
    logger.info('Initialize ELMo Model.')

    # TODO: Does not support padding.
    elmo = ElmoEmbedder(options_file=options_path, weight_file=weights_path, cuda_device=device)
    vec_lst = []
    for i in tqdm(range(nbatches), desc='elmo'):
        start = i * batch_size
        batch = vocab[start:start+batch_size]
        if len(batch) == 0:
            continue
        vec = elmo.embed_sentence(batch)
        vec_lst.append(vec)

    vectors = np.concatenate([x[0] for x in vec_lst], axis=0)

    if cache_dir is not None:
        logger.info('Saving cached elmo vectors: {}'.format(cache_path))
        save_elmo_cache(cache_path, vectors)

    return vectors 
開發者ID:iesl,項目名稱:diora,代碼行數:46,代碼來源:embeddings.py


注:本文中的allennlp.commands.elmo.ElmoEmbedder方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。