当前位置: 首页>>代码示例>>Python>>正文


Python elmo.batch_to_ids方法代码示例

本文整理汇总了Python中allennlp.modules.elmo.batch_to_ids方法的典型用法代码示例。如果您正苦于以下问题:Python elmo.batch_to_ids方法的具体用法?Python elmo.batch_to_ids怎么用?Python elmo.batch_to_ids使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.modules.elmo的用法示例。


在下文中一共展示了elmo.batch_to_ids方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: batchify

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def batchify(x_data, y_data, batch_size=128, shuffle=False):
    batches = []
    for i in range(0, len(x_data), batch_size):
        start, stop = i, i + batch_size
        x_batch = batch_to_ids(x_data[start:stop])
        lengths = Variable(torch.from_numpy(np.array([max(len(x), 1) for x in x_data[start:stop]])).float()).view(-1, 1)
        if CUDA:
            y_batch = Variable(torch.from_numpy(np.array(y_data[start:stop])).cuda())
        else:
            y_batch = Variable(torch.from_numpy(np.array(y_data[start:stop])))
        batches.append((x_batch, y_batch, lengths))

    if shuffle:
        random.shuffle(batches)

    return batches 
开发者ID:Pinafore,项目名称:qb,代码行数:18,代码来源:elmo.py

示例2: forward

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def forward(self, lines: List[Line]):
        texts = []
        for line in lines:
            line_tokens = line.tokens[self.word_tokens_namespace]
            line_tokens = list(map(lambda tok: tok.text, line_tokens))
            texts.append(line_tokens)

        character_ids = batch_to_ids(texts)
        character_ids = character_ids.to(self.device)
        output_dict = self.elmo(character_ids)
        # batch_size, max_seq_length * 1024
        embeddings = output_dict["elmo_representations"][0]
        return embeddings 
开发者ID:abhinavkashyap,项目名称:sciwing,代码行数:15,代码来源:elmo_embedder.py

示例3: emb

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def emb(self, word, default=None):
        from allennlp.modules.elmo import batch_to_ids
        idx = batch_to_ids([[word]])
        emb = self.embeddings(idx)['token_embedding']
        return emb[0, 1].tolist() 
开发者ID:vzhong,项目名称:embeddings,代码行数:7,代码来源:elmo.py

示例4: test_batch_to_char_ids

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def test_batch_to_char_ids():
    sentences = [
            ["This", "is", "a", "sentence"],
            ["Here", "'s", "one"],
            ["Another", "one"],
    ]
    t1 = utils.batch_to_char_ids(sentences)
    t2 = batch_to_ids(sentences)
    np.testing.assert_array_equal(t1.numpy(), t2.numpy())

    sentences = [["one"]]
    t1 = utils.batch_to_char_ids(sentences)
    t2 = batch_to_ids(sentences)
    np.testing.assert_array_equal(t1.numpy(), t2.numpy()) 
开发者ID:cnt-dev,项目名称:pytorch-fast-elmo,代码行数:16,代码来源:test_utils.py

示例5: forward

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def forward(self, batch):
        questions = [q.split() for q in batch['question']]
        question_ids = batch_to_ids(questions).cuda()
        elmo_vectors = self.elmo(question_ids)
        cnn_vector = self.cnn_encoder(elmo_vectors['elmo_representations'][0], elmo_vectors['mask'])
        loss = self.loss(cnn_vector, batch['class'].cuda())
        preds = torch.argmax(cnn_vector, dim=1)
        softmax = torch.nn.functional.softmax(cnn_vector, dim=1)
        return loss, preds, softmax 
开发者ID:martiansideofthemoon,项目名称:squash-generation,代码行数:11,代码来源:model.py

示例6: batch_to_embeddings

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def batch_to_embeddings(self, batch                 )                                     :
        u"""
        Parameters
        ----------
        batch : ``List[List[str]]``, required
            A list of tokenized sentences.

        Returns
        -------
            A tuple of tensors, the first representing activations (batch_size, 3, num_timesteps, 1024) and
        the second a mask (batch_size, num_timesteps).
        """
        character_ids = batch_to_ids(batch)
        if self.cuda_device >= 0:
            character_ids = character_ids.cuda(device=self.cuda_device)

        bilm_output = self.elmo_bilm(character_ids)
        layer_activations = bilm_output[u'activations']
        mask_with_bos_eos = bilm_output[u'mask']

        # without_bos_eos is a 3 element list of (activation, mask) tensor pairs,
        # each with size (batch_size, num_timesteps, dim and (batch_size, num_timesteps)
        # respectively.
        without_bos_eos = [remove_sentence_boundaries(layer, mask_with_bos_eos)
                           for layer in layer_activations]
        # Converts a list of pairs (activation, mask) tensors to a single tensor of activations.
        activations = torch.cat([ele[0].unsqueeze(1) for ele in without_bos_eos], dim=1)
        # The mask is the same for each ELMo vector, so just take the first.
        mask = without_bos_eos[0][1]

        return activations, mask 
开发者ID:plasticityai,项目名称:magnitude,代码行数:33,代码来源:elmo.py

示例7: elmo_process

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def elmo_process(batch, device):
            elmo_tensor = batch_to_ids(batch)
            elmo_tensor = elmo_tensor.to(device=device)
            torchtext_tensor = torchtext_process(batch, device)
            return (elmo_tensor, torchtext_tensor) 
开发者ID:yikangshen,项目名称:Ordered-Memory,代码行数:7,代码来源:sentiment.py

示例8: transform

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def transform(self, X, y=None):
        """Transform documents to document ids.

        Uses the vocabulary learned by fit.

        Args:
            X : iterable
            an iterable which yields either str, unicode or file objects.
            y : iterabl, label strings.

        Returns:
            features: document id matrix.
            y: label id matrix.
        """
        word_ids = [self._word_vocab.doc2id(doc) for doc in X]
        word_ids = pad_sequences(word_ids, padding='post')

        char_ids = [[self._char_vocab.doc2id(w) for w in doc] for doc in X]
        char_ids = pad_nested_sequences(char_ids)

        character_ids = batch_to_ids(X)
        elmo_embeddings = self._elmo(character_ids)['elmo_representations'][1]
        elmo_embeddings = elmo_embeddings.detach().numpy()

        features = [word_ids, char_ids, elmo_embeddings]

        if y is not None:
            y = [self._label_vocab.doc2id(doc) for doc in y]
            y = pad_sequences(y, padding='post')
            y = to_categorical(y, self.label_size).astype(int)
            # In 2018/06/01, to_categorical is a bit strange.
            # >>> to_categorical([[1,3]], num_classes=4).shape
            # (1, 2, 4)
            # >>> to_categorical([[1]], num_classes=4).shape
            # (1, 4)
            # So, I expand dimensions when len(y.shape) == 2.
            y = y if len(y.shape) == 3 else np.expand_dims(y, axis=0)
            return features, y
        else:
            return features 
开发者ID:Hironsan,项目名称:anago,代码行数:42,代码来源:preprocessing.py

示例9: forward

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def forward(self, batch_text):
        # batch_char = batch_to_ids(batch_text)
        return self.elmo(batch_text)['elmo_representations'] 
开发者ID:xycforgithub,项目名称:MultiTask-MRC,代码行数:5,代码来源:elmo.py

示例10: create_cached_cnn_embeddings

# 需要导入模块: from allennlp.modules import elmo [as 别名]
# 或者: from allennlp.modules.elmo import batch_to_ids [as 别名]
def create_cached_cnn_embeddings(self, tokens: List[str]) -> None:
        """
        Given a list of tokens, this method precomputes word representations
        by running just the character convolutions and highway layers of elmo,
        essentially creating uncontextual word vectors. On subsequent forward passes,
        the word ids are looked up from an embedding, rather than being computed on
        the fly via the CNN encoder.
        This function sets 3 attributes:
        _word_embedding : ``torch.Tensor``
            The word embedding for each word in the tokens passed to this method.
        _bos_embedding : ``torch.Tensor``
            The embedding for the BOS token.
        _eos_embedding : ``torch.Tensor``
            The embedding for the EOS token.
        Parameters
        ----------
        tokens : ``List[str]``, required.
            A list of tokens to precompute character convolutions for.
        """
        tokens = [ELMoCharacterMapper.bos_token, ELMoCharacterMapper.eos_token] + tokens
        timesteps = 32
        batch_size = 32
        chunked_tokens = lazy_groups_of(iter(tokens), timesteps)

        all_embeddings = []
        device = get_device_of(next(self.parameters()))
        for batch in lazy_groups_of(chunked_tokens, batch_size):
            # Shape (batch_size, timesteps, 50)
            batched_tensor = batch_to_ids(batch)
            # NOTE: This device check is for when a user calls this method having
            # already placed the model on a device. If this is called in the
            # constructor, it will probably happen on the CPU. This isn't too bad,
            # because it's only a few convolutions and will likely be very fast.
            if device >= 0:
                batched_tensor = batched_tensor.cuda(device)
            output = self._token_embedder(batched_tensor, add_bos=False, add_eos=False)
            token_embedding = output["token_embedding"]
            mask = output["mask"]
            token_embedding, _ = remove_sentence_boundaries(token_embedding, mask, rmv_bos=False, rmv_eos=False)
            all_embeddings.append(token_embedding.view(-1, token_embedding.size(-1)))
        full_embedding = torch.cat(all_embeddings, 0)

        # We might have some trailing embeddings from padding in the batch, so
        # we clip the embedding and lookup to the right size.
        full_embedding = full_embedding[:len(tokens), :]
        embedding = full_embedding[2:len(tokens), :]
        vocab_size, embedding_dim = list(embedding.size())

        from allennlp.modules.token_embedders import Embedding # type: ignore
        self._bos_embedding = full_embedding[0, :]
        self._eos_embedding = full_embedding[1, :]
        self._word_embedding = Embedding(vocab_size, # type: ignore
                                         embedding_dim,
                                         weight=embedding.data,
                                         trainable=self._requires_grad,
                                         padding_index=0) 
开发者ID:jzhou316,项目名称:Unsupervised-Sentence-Summarization,代码行数:58,代码来源:elmo_sequential_embedder.py


注:本文中的allennlp.modules.elmo.batch_to_ids方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。