Python elmo._ElmoBiLm方法代码示例

本文整理汇总了Python中allennlp.modules.elmo._ElmoBiLm方法的典型用法代码示例。如果您正苦于以下问题：Python elmo._ElmoBiLm方法的具体用法？Python elmo._ElmoBiLm怎么用？Python elmo._ElmoBiLm使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.modules.elmo的用法示例。

在下文中一共展示了elmo._ElmoBiLm方法的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_elmo_bilm_can_cache_char_cnn_embeddings

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm_can_cache_char_cnn_embeddings(self):
        sentences = [["This", "is", "a", "sentence"], ["Here", "'s", "one"], ["Another", "one"]]
        vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)
        words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys())
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
        elmo_bilm.eval()
        no_cache = elmo_bilm(
            tensor["character_ids"]["elmo_tokens"], tensor["character_ids"]["elmo_tokens"]
        )

        # ELMo is stateful, so we need to actually re-initialise it for this comparison to work.
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
        elmo_bilm.eval()
        cached = elmo_bilm(tensor["character_ids"]["elmo_tokens"], tensor["tokens"]["tokens"])

        numpy.testing.assert_array_almost_equal(
            no_cache["mask"].data.cpu().numpy(), cached["mask"].data.cpu().numpy()
        )
        for activation_cached, activation in zip(cached["activations"], no_cache["activations"]):
            numpy.testing.assert_array_almost_equal(
                activation_cached.data.cpu().numpy(), activation.data.cpu().numpy(), decimal=6
            )

开发者ID:allenai，项目名称:allennlp，代码行数:24，代码来源:elmo_test.py

示例2: test_elmo_with_module

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_with_module(self):
        # We will create the _ElmoBilm class and pass it in as a module.
        sentences = [
            ["The", "sentence", "."],
            ["ELMo", "helps", "disambiguate", "ELMo", "from", "Elmo", "."],
        ]

        character_ids = self._sentences_to_ids(sentences)
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
        elmo = Elmo(None, None, 2, dropout=0.0, module=elmo_bilm)
        output = elmo(character_ids)
        elmo_representations = output["elmo_representations"]

        assert len(elmo_representations) == 2
        for k in range(2):
            assert list(elmo_representations[k].size()) == [2, 7, 32]

开发者ID:allenai，项目名称:allennlp，代码行数:18，代码来源:elmo_test.py

示例3: init

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self,
                 options_file      = DEFAULT_OPTIONS_FILE,
                 weight_file      = DEFAULT_WEIGHT_FILE,
                 cuda_device      = -1)        :
        u"""
        Parameters
        ----------
        options_file : ``str``, optional
            A path or URL to an ELMo options file.
        weight_file : ``str``, optional
            A path or URL to an ELMo weights file.
        cuda_device : ``int``, optional, (default=-1)
            The GPU device to run on.
        """
        self.indexer = ELMoTokenCharactersIndexer()

        logger.info(u"Initializing ELMo.")
        self.elmo_bilm = _ElmoBiLm(options_file, weight_file)
        if cuda_device >= 0:
            self.elmo_bilm = self.elmo_bilm.cuda(device=cuda_device)

        self.cuda_device = cuda_device

开发者ID:plasticityai，项目名称:magnitude，代码行数:24，代码来源:elmo.py

示例4: test_elmo_bilm_can_cache_char_cnn_embeddings

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm_can_cache_char_cnn_embeddings(self):
        sentences = [[u"This", u"is", u"a", u"sentence"],
                     [u"Here", u"'s", u"one"],
                     [u"Another", u"one"]]
        vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)
        words_to_cache = list(vocab.get_token_to_index_vocabulary(u"tokens").keys())
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
        elmo_bilm.eval()
        no_cache = elmo_bilm(tensor[u"character_ids"], tensor[u"character_ids"])

        # ELMo is stateful, so we need to actually re-initialise it for this comparison to work.
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)
        elmo_bilm.eval()
        cached = elmo_bilm(tensor[u"character_ids"], tensor[u"tokens"])

        numpy.testing.assert_array_almost_equal(no_cache[u"mask"].data.cpu().numpy(),
                                                cached[u"mask"].data.cpu().numpy())
        for activation_cached, activation in izip(cached[u"activations"], no_cache[u"activations"]):
            numpy.testing.assert_array_almost_equal(activation_cached.data.cpu().numpy(),
                                                    activation.data.cpu().numpy(), decimal=6)

开发者ID:plasticityai，项目名称:magnitude，代码行数:22，代码来源:elmo_test.py

示例5: init

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, config):
        super(ELMo, self).__init__()
        self.bsize = config['bsize']
        self.pool_type = config['pool_type']
        self.which_layer = config['which_layer']
        self.version = 1 if 'version' not in config else config['version']
        self.elmo_embedder = _ElmoBiLm(config['optfile'],
                                       config['wgtfile'],
                                       requires_grad=False)

        assert self.version in [1, 2]
        if self.version == 1:
            self.bos = '<s>'
            self.eos = '</s>'
            self.max_pad = True
            self.moses_tok = False
        elif self.version == 2:
            self.bos = '<p>'
            self.eos = '</p>'
            self.max_pad = False
            self.moses_tok = True

开发者ID:wasiahmad，项目名称:transferable_sent2vec，代码行数:23，代码来源:elmo.py

示例6: test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words(self):
        sentences = [["This", "is", "OOV"], ["so", "is", "this"]]
        in_vocab_sentences = [["here", "is"], ["a", "vocab"]]
        oov_tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)[1]
        vocab, in_vocab_tensor = self.get_vocab_and_both_elmo_indexed_ids(in_vocab_sentences)
        words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys())
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)

        elmo_bilm(
            in_vocab_tensor["character_ids"]["elmo_tokens"], in_vocab_tensor["tokens"]["tokens"]
        )
        elmo_bilm(oov_tensor["character_ids"]["elmo_tokens"], oov_tensor["tokens"]["tokens"])

开发者ID:allenai，项目名称:allennlp，代码行数:14，代码来源:elmo_test.py

示例7: test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_char_cnn_cache_does_not_raise_error_for_uncached_words(self):
        sentences = [[u"This", u"is", u"OOV"], [u"so", u"is", u"this"]]
        in_vocab_sentences = [[u"here", u"is"], [u"a", u"vocab"]]
        oov_tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)[1]
        vocab, in_vocab_tensor = self.get_vocab_and_both_elmo_indexed_ids(in_vocab_sentences)
        words_to_cache = list(vocab.get_token_to_index_vocabulary(u"tokens").keys())
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file, vocab_to_cache=words_to_cache)

        elmo_bilm(in_vocab_tensor[u"character_ids"], in_vocab_tensor[u"tokens"])
        elmo_bilm(oov_tensor[u"character_ids"], oov_tensor[u"tokens"])

开发者ID:plasticityai，项目名称:magnitude，代码行数:12，代码来源:elmo_test.py

示例8: test_elmo_with_module

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_with_module(self):
        # We will create the _ElmoBilm class and pass it in as a module.
        sentences = [[u'The', u'sentence', u'.'],
                     [u'ELMo', u'helps', u'disambiguate', u'ELMo', u'from', u'Elmo', u'.']]

        character_ids = self._sentences_to_ids(sentences)
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
        elmo = Elmo(None, None, 2, dropout=0.0, module=elmo_bilm)
        output = elmo(character_ids)
        elmo_representations = output[u'elmo_representations']

        assert len(elmo_representations) == 2
        for k in range(2):
            assert list(elmo_representations[k].size()) == [2, 7, 32]

开发者ID:plasticityai，项目名称:magnitude，代码行数:16，代码来源:elmo_test.py

示例9: init

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, nhid, optfile, wgtfile, dropout):
        super(ELMo, self).__init__()
        self.elmo_embedder = _ElmoBiLm(optfile, wgtfile, requires_grad=False)
        self.weight_param = nn.Parameter(torch.FloatTensor([0.0, 0.0, 0.0]))
        self.relu_network = nn.Sequential(OrderedDict([
            ('linear', nn.Linear(1024, nhid)),
            ('dropout', nn.Dropout(dropout)),
            ('tanh', nn.ReLU())
        ]))

开发者ID:wasiahmad，项目名称:transferable_sent2vec，代码行数:11，代码来源:classifier.py

示例10: init

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self, options_file, weight_file, device=None):
        self._elmo_lstm = _ElmoBiLm(options_file,
                                    weight_file,
                                    requires_grad=False,
                                    vocab_to_cache=None)

        if device is not None:
            self._elmo_lstm = self._elmo_lstm.to(device)

        self.output_dim = self._elmo_lstm.get_output_dim()

开发者ID:sz128，项目名称:slot_filling_and_intent_detection_of_SLU，代码行数:12，代码来源:get_ELMo_word_embedding_for_a_dataset.py

示例11: init

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def __init__(self,
                 options_file: str,
                 weight_file: str,
                 num_output_representations: int,
                 requires_grad: bool = False,
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 vocab_to_cache: List[str] = None,
                 module: torch.nn.Module = None) -> None:
        super(Elmo, self).__init__()

        logging.info("Initializing ELMo")
        if module is not None:
            if options_file is not None or weight_file is not None:
                raise ConfigurationError(
                        "Don't provide options_file or weight_file with module")
            self._elmo_lstm = module
        else:
            self._elmo_lstm = _ElmoBiLm(options_file,
                                        weight_file,
                                        requires_grad=requires_grad,
                                        vocab_to_cache=vocab_to_cache)
        self._has_cached_vocab = vocab_to_cache is not None
        self._dropout = Dropout(p=dropout)
        self.num_output_representations=num_output_representations
        if num_output_representations!=-1:
            self._scalar_mixes: Any = []
            for k in range(num_output_representations):
                scalar_mix = ScalarMix(self._elmo_lstm.num_layers, do_layer_norm=do_layer_norm)
                self.add_module('scalar_mix_{}'.format(k), scalar_mix)
                self._scalar_mixes.append(scalar_mix)

开发者ID:xycforgithub，项目名称:MultiTask-MRC，代码行数:33，代码来源:my_elmo.py

示例12: test_elmo_bilm

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm(self):
        # get the raw data
        sentences, expected_lm_embeddings = self._load_sentences_embeddings()

        # load the test model
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)

        # Deal with the data.
        indexer = ELMoTokenCharactersIndexer()

        # For each sentence, first create a TextField, then create an instance
        instances = []
        for batch in zip(*sentences):
            for sentence in batch:
                tokens = [Token(token) for token in sentence.split()]
                field = TextField(tokens, {"character_ids": indexer})
                instance = Instance({"elmo": field})
                instances.append(instance)

        vocab = Vocabulary()
        dataset = AllennlpDataset(instances, vocab)
        # Now finally we can iterate through batches.
        loader = PyTorchDataLoader(dataset, 3)
        for i, batch in enumerate(loader):
            lm_embeddings = elmo_bilm(batch["elmo"]["character_ids"]["elmo_tokens"])
            top_layer_embeddings, mask = remove_sentence_boundaries(
                lm_embeddings["activations"][2], lm_embeddings["mask"]
            )

            # check the mask lengths
            lengths = mask.data.numpy().sum(axis=1)
            batch_sentences = [sentences[k][i] for k in range(3)]
            expected_lengths = [len(sentence.split()) for sentence in batch_sentences]
            assert lengths.tolist() == expected_lengths

            # get the expected embeddings and compare!
            expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
            for k in range(3):
                assert numpy.allclose(
                    top_layer_embeddings[k, : lengths[k], :].data.numpy(),
                    expected_top_layer[k],
                    atol=1.0e-6,
                )

开发者ID:allenai，项目名称:allennlp，代码行数:45，代码来源:elmo_test.py

示例13: test_elmo_bilm

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import _ElmoBiLm [as 别名]
def test_elmo_bilm(self):
        # get the raw data
        sentences, expected_lm_embeddings = self._load_sentences_embeddings()

        # load the test model
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)

        # Deal with the data.
        indexer = ELMoTokenCharactersIndexer()

        # For each sentence, first create a TextField, then create an instance
        instances = []
        for batch in izip(*sentences):
            for sentence in batch:
                tokens = [Token(token) for token in sentence.split()]
                field = TextField(tokens, {u'character_ids': indexer})
                instance = Instance({u"elmo": field})
                instances.append(instance)

        vocab = Vocabulary()

        # Now finally we can iterate through batches.
        iterator = BasicIterator(3)
        iterator.index_with(vocab)
        for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
            lm_embeddings = elmo_bilm(batch[u'elmo'][u'character_ids'])
            top_layer_embeddings, mask = remove_sentence_boundaries(
                    lm_embeddings[u'activations'][2],
                    lm_embeddings[u'mask']
            )

            # check the mask lengths
            lengths = mask.data.numpy().sum(axis=1)
            batch_sentences = [sentences[k][i] for k in range(3)]
            expected_lengths = [
                    len(sentence.split()) for sentence in batch_sentences
            ]
            self.assertEqual(lengths.tolist(), expected_lengths)

            # get the expected embeddings and compare!
            expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
            for k in range(3):
                self.assertTrue(
                        numpy.allclose(
                                top_layer_embeddings[k, :lengths[k], :].data.numpy(),
                                expected_top_layer[k],
                                atol=1.0e-6
                        )
                )

开发者ID:plasticityai，项目名称:magnitude，代码行数:51，代码来源:elmo_test.py

注：本文中的allennlp.modules.elmo._ElmoBiLm方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。