当前位置: 首页>>代码示例>>Python>>正文


Python elmo.ElmoEmbedder方法代码示例

本文整理汇总了Python中allennlp.commands.elmo.ElmoEmbedder方法的典型用法代码示例。如果您正苦于以下问题:Python elmo.ElmoEmbedder方法的具体用法?Python elmo.ElmoEmbedder怎么用?Python elmo.ElmoEmbedder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.commands.elmo的用法示例。


在下文中一共展示了elmo.ElmoEmbedder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_embeddings_are_as_expected

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_embeddings_are_as_expected(self):
        loaded_sentences, loaded_embeddings = self._load_sentences_embeddings()

        assert len(loaded_sentences) == len(loaded_embeddings)
        batch_size = len(loaded_sentences)

        # The sentences and embeddings are organized in an idiosyncratic way TensorFlow handles batching.
        # We are going to reorganize them linearly so they can be grouped into batches by AllenNLP.
        sentences = []
        expected_embeddings = []
        for batch_number in range(len(loaded_sentences[0])):
            for index in range(batch_size):
                sentences.append(loaded_sentences[index][batch_number].split())
                expected_embeddings.append(loaded_embeddings[index][batch_number])

        assert len(expected_embeddings) == len(sentences)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        embeddings = list(embedder.embed_sentences(sentences, batch_size))

        assert len(embeddings) == len(sentences)

        for tensor, expected in izip(embeddings, expected_embeddings):
            numpy.testing.assert_array_almost_equal(tensor[2], expected) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:26,代码来源:elmo_test.py

示例2: get_elmo_model

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def get_elmo_model( model_dir, cpu, verbose ):
    
    weights_path = model_dir / 'weights.hdf5'
    options_path = model_dir / 'options.json'

    # if no pre-trained model is available, yet --> download it
    if not (weights_path.exists() and options_path.exists()):
        if verbose: 
            print('No existing model found. Start downloading pre-trained SeqVec (~360MB)...')
        import urllib.request
        Path.mkdir(model_dir)
        repo_link    = 'http://rostlab.org/~deepppi/embedding_repo/embedding_models/seqvec'
        options_link = repo_link +'/options.json'
        weights_link = repo_link +'/weights.hdf5'
        urllib.request.urlretrieve( options_link, options_path )
        urllib.request.urlretrieve( weights_link, weights_path )

    cuda_device = 0 if torch.cuda.is_available() and not cpu else -1
    return ElmoEmbedder( weight_file=weights_path, options_file=options_path, cuda_device=cuda_device ) 
开发者ID:mheinzinger,项目名称:SeqVec,代码行数:21,代码来源:seqvec_embedder.py

示例3: get_sent_embeds

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def get_sent_embeds(sent, elmo_options_file, elmo_weights_file, layer,
                    cuda_device):
    '''
    Get the embeddings of the sentence words.
    sent - list of tokens
    elmo_options_file - json for model. n_characters should be 262
    elmo_weights_file - saved model
    layer - what layer of ELMo to output
    cuda_device - cuda device

    returns a numpy array with the embeddings per token for the selected layer
    '''
    elmo = ElmoEmbedder(elmo_options_file, elmo_weights_file, cuda_device)
    s_embeds = elmo.embed_sentence(sent)
    layer_embeds = s_embeds[layer,:,:]
    return layer_embeds 
开发者ID:TalSchuster,项目名称:CrossLingualContextualEmb,代码行数:18,代码来源:demo.py

示例4: _get_model

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def _get_model(self, weights, options):
        # Retrieve pre-trained embedding model
        # use GPU if available. If CPU-usage shall be enforced set cuda_device=-1
        cuda_device = 0 if torch.cuda.is_available() else -1
        return ElmoEmbedder(weight_file=weights, options_file=options, cuda_device=cuda_device) 
开发者ID:kipoi,项目名称:models,代码行数:7,代码来源:model.py

示例5: loadELMo

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def loadELMo(self):
        self.elmo = ElmoEmbedder(self.elmo_options_file, self.elmo_weight_file, self.elmo_cuda_device) 
开发者ID:UKPLab,项目名称:elmo-bilstm-cnn-crf,代码行数:4,代码来源:ELMoWordEmbeddings.py

示例6: load_elmo

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def load_elmo():
    return ElmoEmbedder(cuda_device=0) 
开发者ID:allanj,项目名称:ner_with_dependency,代码行数:4,代码来源:preelmo.py

示例7: test_all_embedding_works

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_all_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--all",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (3, len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
开发者ID:plasticityai,项目名称:magnitude,代码行数:31,代码来源:elmo_test.py

示例8: test_top_embedding_works

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_top_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--top",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())[2]

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
开发者ID:plasticityai,项目名称:magnitude,代码行数:31,代码来源:elmo_test.py

示例9: test_average_embedding_works

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_average_embedding_works(self):
        sentence = u"Michael went to the store to buy some eggs ."
        with open(self.sentences_path, u'w') as f:
            f.write(sentence)

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--average",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())
        expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(u"0")
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get(u"sentence_to_index")[0]) == {sentence: u"0"} 
开发者ID:plasticityai,项目名称:magnitude,代码行数:32,代码来源:elmo_test.py

示例10: test_embed_batch_is_empty_sentence

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def test_embed_batch_is_empty_sentence(self):
        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        embeddings = embedder.embed_sentence([])

        assert embeddings.shape == (3, 0, 1024) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:7,代码来源:elmo_test.py

示例11: __init__

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self):
        
        self.elmo = ElmoEmbedder() 
开发者ID:uhh-lt,项目名称:bert-sense,代码行数:5,代码来源:ELMO_Model.py

示例12: __init__

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self, args, word_vocab):
        super().__init__(args, word_vocab)

        # import ElmoEmbedder here so that the cuda_visible_divices can work
        from allennlp.commands.elmo import ElmoEmbedder
        self.elmo = ElmoEmbedder(cuda_device=0 if args.gpu is not None else -1) 
开发者ID:PKU-TANGENT,项目名称:NeuralEDUSeg,代码行数:8,代码来源:elmo_crf_seg.py

示例13: __init__

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(self, vocab, config):
        word2id = vocab.word2idx
        super(Model, self).__init__()
        vocab_num = len(word2id)
        self.word2id = word2id
        self.config = config
        self.char_dict = preprocess.get_char_dict('data/char_vocab.english.txt')
        self.genres = {g: i for i, g in enumerate(["bc", "bn", "mz", "nw", "pt", "tc", "wb"])}
        self.device = torch.device("cuda:" + config.cuda)

        self.emb = nn.Embedding(vocab_num, 350)

        emb1 = EmbedLoader().load_with_vocab(config.glove, vocab,normalize=False)
        emb2 = EmbedLoader().load_with_vocab(config.turian,  vocab ,normalize=False)
        pre_emb = np.concatenate((emb1, emb2), axis=1)
        pre_emb /= (np.linalg.norm(pre_emb, axis=1, keepdims=True) + 1e-12)

        if pre_emb is not None:
            self.emb.weight = nn.Parameter(torch.from_numpy(pre_emb).float())
            for param in self.emb.parameters():
                param.requires_grad = False
        self.emb_dropout = nn.Dropout(inplace=True)


        if config.use_elmo:
            self.elmo = ElmoEmbedder(options_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_options.json',
                                     weight_file='data/elmo/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5',
                                     cuda_device=int(config.cuda))
            print("elmo load over.")
            self.elmo_args = torch.randn((3), requires_grad=True).to(self.device)

        self.char_emb = nn.Embedding(len(self.char_dict), config.char_emb_size)
        self.conv1 = nn.Conv1d(config.char_emb_size, 50, 3)
        self.conv2 = nn.Conv1d(config.char_emb_size, 50, 4)
        self.conv3 = nn.Conv1d(config.char_emb_size, 50, 5)

        self.feature_emb = nn.Embedding(config.span_width, config.feature_size)
        self.feature_emb_dropout = nn.Dropout(p=0.2, inplace=True)

        self.mention_distance_emb = nn.Embedding(10, config.feature_size)
        self.distance_drop = nn.Dropout(p=0.2, inplace=True)

        self.genre_emb = nn.Embedding(7, config.feature_size)
        self.speaker_emb = nn.Embedding(2, config.feature_size)

        self.bilstm = VarLSTM(input_size=350+150*config.use_CNN+config.use_elmo*1024,hidden_size=200,bidirectional=True,batch_first=True,hidden_dropout=0.2)
        # self.bilstm = nn.LSTM(input_size=500, hidden_size=200, bidirectional=True, batch_first=True)
        self.h0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
        self.c0 = nn.init.orthogonal_(torch.empty(2, 1, 200)).to(self.device)
        self.bilstm_drop = nn.Dropout(p=0.2, inplace=True)

        self.atten = ffnn(input_size=400, hidden_size=config.atten_hidden_size, output_size=1)
        self.mention_score = ffnn(input_size=1320, hidden_size=config.mention_hidden_size, output_size=1)
        self.sa = ffnn(input_size=3980+40*config.use_metadata, hidden_size=config.sa_hidden_size, output_size=1)
        self.mention_start_np = None
        self.mention_end_np = None 
开发者ID:fastnlp,项目名称:fastNLP,代码行数:58,代码来源:model_re.py

示例14: __init__

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def __init__(
        self,
        datasets_manager: DatasetsManager = None,
        layer_aggregation: str = "sum",
        device: Union[str, torch.device] = torch.device("cpu"),
        word_tokens_namespace="tokens",
    ):
        """ Bag of words Elmo Embedder which aggregates elmo embedding for every token

        Parameters
        ----------
        layer_aggregation : str
            You can chose one of ``[sum, average, last, first]``
            which decides how to aggregate different layers of ELMO. ELMO produces three
            layers of representations

            sum
                Representations from different layers are summed
            average
                Representations from different layers are average
            last
                Representations from last layer is considered
            first
                Representations from first layer is considered

        device : Union[str, torch.device]
            device for running the model on

        word_tokens_namespace: int
            Namespace where all the word tokens are stored
        """
        super(BowElmoEmbedder, self).__init__()
        self.dataset_manager = datasets_manager
        self.embedding_dimension = self.get_embedding_dimension()
        self.embedder_name = "elmo"
        self.word_tokens_namespace = word_tokens_namespace
        self.layer_aggregation_type = layer_aggregation
        self.allowed_layer_aggregation_types = ["sum", "average", "last", "first"]
        self.device = (
            torch.device(device) if isinstance(device, str) else torch.device(device)
        )

        if self.device.index:
            self.cuda_device_id = self.device.index
        else:
            self.cuda_device_id = -1
        self.msg_printer = wasabi.Printer()

        assert (
            self.layer_aggregation_type in self.allowed_layer_aggregation_types
        ), self.msg_printer.fail(
            f"For bag of words elmo encoder, the allowable aggregation "
            f"types are {self.allowed_layer_aggregation_types}. You passed {self.layer_aggregation_type}"
        )

        # load the elmo embedders
        with self.msg_printer.loading("Creating Elmo object"):
            self.elmo = ElmoEmbedder(cuda_device=self.cuda_device_id)
        self.msg_printer.good("Finished Loading Elmo object") 
开发者ID:abhinavkashyap,项目名称:sciwing,代码行数:61,代码来源:bow_elmo_embedder.py

示例15: context_insensitive_elmo

# 需要导入模块: from allennlp.commands import elmo [as 别名]
# 或者: from allennlp.commands.elmo import ElmoEmbedder [as 别名]
def context_insensitive_elmo(weights_path, options_path, word2idx, cuda=False, cache_dir=None):
    logger = get_logger()

    vocab = [w for w, i in sorted(word2idx.items(), key=lambda x: x[1])]

    validate_word2idx(word2idx)

    if cache_dir is not None:
        key = hash_vocab(vocab)
        cache_path = os.path.join(cache_dir, 'elmo_{}.npy'.format(key))

        if os.path.exists(cache_path):
            logger.info('Loading cached elmo vectors: {}'.format(cache_path))
            return load_elmo_cache(cache_path)

    if cuda:
        device = 0
    else:
        device = -1

    batch_size = 256
    nbatches = len(vocab) // batch_size + 1

    logger.info('Begin caching vectors. nbatches={} device={}'.format(nbatches, device))
    logger.info('Initialize ELMo Model.')

    # TODO: Does not support padding.
    elmo = ElmoEmbedder(options_file=options_path, weight_file=weights_path, cuda_device=device)
    vec_lst = []
    for i in tqdm(range(nbatches), desc='elmo'):
        start = i * batch_size
        batch = vocab[start:start+batch_size]
        if len(batch) == 0:
            continue
        vec = elmo.embed_sentence(batch)
        vec_lst.append(vec)

    vectors = np.concatenate([x[0] for x in vec_lst], axis=0)

    if cache_dir is not None:
        logger.info('Saving cached elmo vectors: {}'.format(cache_path))
        save_elmo_cache(cache_path, vectors)

    return vectors 
开发者ID:iesl,项目名称:diora,代码行数:46,代码来源:embeddings.py


注:本文中的allennlp.commands.elmo.ElmoEmbedder方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。