当前位置: 首页>>代码示例>>Python>>正文


Python token_embedders.Embedding方法代码示例

本文整理汇总了Python中allennlp.modules.token_embedders.Embedding方法的典型用法代码示例。如果您正苦于以下问题:Python token_embedders.Embedding方法的具体用法?Python token_embedders.Embedding怎么用?Python token_embedders.Embedding使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.modules.token_embedders的用法示例。


在下文中一共展示了token_embedders.Embedding方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _construct_embedding_matrix

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def _construct_embedding_matrix(self) -> Embedding:
        """
        For HotFlip, we need a word embedding matrix to search over. The below is necessary for
        models such as ELMo, character-level models, or for models that use a projection layer
        after their word embeddings.

        We run all of the tokens from the vocabulary through the TextFieldEmbedder, and save the
        final output embedding. We then group all of those output embeddings into an "embedding
        matrix".
        """
        embedding_layer = util.find_embedding_layer(self.predictor._model)
        self.embedding_layer = embedding_layer
        if isinstance(embedding_layer, (Embedding, torch.nn.modules.sparse.Embedding)):
            # If we're using something that already has an only embedding matrix, we can just use
            # that and bypass this method.
            return embedding_layer.weight

        # We take the top `self.max_tokens` as candidates for hotflip.  Because we have to
        # construct a new vector for each of these, we can't always afford to use the whole vocab,
        # for both runtime and memory considerations.
        all_tokens = list(self.vocab._token_to_index[self.namespace])[: self.max_tokens]
        max_index = self.vocab.get_token_index(all_tokens[-1], self.namespace)
        self.invalid_replacement_indices = [
            i for i in self.invalid_replacement_indices if i < max_index
        ]

        inputs = self._make_embedder_input(all_tokens)

        # pass all tokens through the fake matrix and create an embedding out of it.
        embedding_matrix = embedding_layer(inputs).squeeze()

        return embedding_matrix 
开发者ID:allenai,项目名称:allennlp,代码行数:34,代码来源:hotflip.py

示例2: __init__

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def __init__(self,
                 vocab            ,
                 source_embedder                   ,
                 encoder                ,
                 max_decoding_steps     ,
                 target_namespace      = u"tokens",
                 target_embedding_dim      = None,
                 attention_function                     = None,
                 scheduled_sampling_ratio        = 0.0)        :
        super(SimpleSeq2Seq, self).__init__(vocab)
        self._source_embedder = source_embedder
        self._encoder = encoder
        self._max_decoding_steps = max_decoding_steps
        self._target_namespace = target_namespace
        self._attention_function = attention_function
        self._scheduled_sampling_ratio = scheduled_sampling_ratio
        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)
        num_classes = self.vocab.get_vocab_size(self._target_namespace)
        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with that of the final hidden states of the encoder. Also, if
        # we're using attention with ``DotProductSimilarity``, this is needed.
        self._decoder_output_dim = self._encoder.get_output_dim()
        target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim()
        self._target_embedder = Embedding(num_classes, target_embedding_dim)
        if self._attention_function:
            self._decoder_attention = LegacyAttention(self._attention_function)
            # The output of attention, a weighted average over encoder outputs, will be
            # concatenated to the input vector of the decoder at each time step.
            self._decoder_input_dim = self._encoder.get_output_dim() + target_embedding_dim
        else:
            self._decoder_input_dim = target_embedding_dim
        # TODO (pradeep): Do not hardcode decoder cell type.
        self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim)
        self._output_projection_layer = Linear(self._decoder_output_dim, num_classes)

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:41,代码来源:simple_seq2seq.py

示例3: _remap_embeddings

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def _remap_embeddings(self, token_type_embeddings):
        embed_dim = token_type_embeddings.shape[1]
        if list(token_type_embeddings.shape) == [self.remap_segment_embeddings, embed_dim]:
            # already remapped!
            return None
        new_embeddings = torch.nn.Embedding(self.remap_segment_embeddings, embed_dim)
        new_embeddings.weight.data.copy_(token_type_embeddings.data[0, :])
        return new_embeddings 
开发者ID:allenai,项目名称:kb,代码行数:10,代码来源:knowbert.py

示例4: __init__

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def __init__(
        self,
        vocabulary: Vocabulary,
        input_size: int = 256,
        hidden_size: int = 128,
        num_layers: int = 2,
        dropout: float = 0.0,
    ):
        super().__init__()
        self._start_index = vocabulary.get_token_index("@start@", namespace="programs")
        self._end_index = vocabulary.get_token_index("@end@", namespace="programs")
        self._pad_index = vocabulary.get_token_index("@@PADDING@@", namespace="programs")
        self._unk_index = vocabulary.get_token_index("@@UNKNOWN@@", namespace="programs")

        vocab_size = vocabulary.get_vocab_size(namespace="programs")
        embedder_inner = Embedding(vocab_size, input_size, padding_index=self._pad_index)
        self._embedder = BasicTextFieldEmbedder({"programs": embedder_inner})

        self._encoder = PytorchSeq2SeqWrapper(
            nn.LSTM(
                input_size, hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True
            )
        )
        # Project and tie input and output embeddings
        self._projection_layer = nn.Linear(hidden_size, input_size, bias=False)
        self._output_layer = nn.Linear(input_size, vocab_size, bias=False)
        self._output_layer.weight = embedder_inner.weight

        # Record average log2 (perplexity) for calculating final perplexity.
        self._log2_perplexity = Average() 
开发者ID:kdexd,项目名称:probnmn-clevr,代码行数:32,代码来源:program_prior.py

示例5: __init__

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def __init__(self,
                 encoder_output_dim     ,
                 action_embedding_dim     ,
                 input_attention           ,
                 num_start_types     ,
                 num_entity_types     ,
                 mixture_feedforward              = None,
                 dropout        = 0.0,
                 unlinked_terminal_indices            = None)        :
        super(WikiTablesDecoderStep, self).__init__()
        self._mixture_feedforward = mixture_feedforward
        self._entity_type_embedding = Embedding(num_entity_types, action_embedding_dim)
        self._input_attention = input_attention

        self._num_start_types = num_start_types
        self._start_type_predictor = Linear(encoder_output_dim, num_start_types)

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        output_dim = encoder_output_dim
        input_dim = output_dim
        # Our decoder input will be the concatenation of the decoder hidden state and the previous
        # action embedding, and we'll project that down to the decoder's `input_dim`, which we
        # arbitrarily set to be the same as `output_dim`.
        self._input_projection_layer = Linear(output_dim + action_embedding_dim, input_dim)
        # Before making a prediction, we'll compute an attention over the input given our updated
        # hidden state. Then we concatenate those with the decoder state and project to
        # `action_embedding_dim` to make a prediction.
        self._output_projection_layer = Linear(output_dim + encoder_output_dim, action_embedding_dim)
        if unlinked_terminal_indices is not None:
            # This means we are using coverage to train the parser.
            # These factors are used to add the embeddings of yet to be produced actions to the
            # predicted embedding, and to boost the action logits of yet to be produced linked
            # actions, respectively.
            self._unlinked_checklist_multiplier = Parameter(torch.FloatTensor([1.0]))
            self._linked_checklist_multiplier = Parameter(torch.FloatTensor([1.0]))

        self._unlinked_terminal_indices = unlinked_terminal_indices
        # TODO(pradeep): Do not hardcode decoder cell type.
        self._decoder_cell = LSTMCell(input_dim, output_dim)

        if mixture_feedforward is not None:
            check_dimensions_match(output_dim, mixture_feedforward.get_input_dim(),
                                   u"hidden state embedding dim", u"mixture feedforward input dim")
            check_dimensions_match(mixture_feedforward.get_output_dim(), 1,
                                   u"mixture feedforward output dim", u"dimension for scalar value")

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:55,代码来源:wikitables_decoder_step.py

示例6: __init__

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def __init__(self,
                 vocab            ,
                 text_field_embedder                   ,
                 context_layer                ,
                 mention_feedforward             ,
                 antecedent_feedforward             ,
                 feature_size     ,
                 max_span_width     ,
                 spans_per_word       ,
                 max_antecedents     ,
                 lexical_dropout        = 0.2,
                 initializer                        = InitializerApplicator(),
                 regularizer                                  = None)        :
        super(CoreferenceResolver, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._antecedent_feedforward = TimeDistributed(antecedent_feedforward)
        feedforward_scorer = torch.nn.Sequential(
                TimeDistributed(mention_feedforward),
                TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
        self._mention_pruner = SpanPruner(feedforward_scorer)
        self._antecedent_scorer = TimeDistributed(torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1))

        self._endpoint_span_extractor = EndpointSpanExtractor(context_layer.get_output_dim(),
                                                              combination=u"x,y",
                                                              num_width_embeddings=max_span_width,
                                                              span_width_embedding_dim=feature_size,
                                                              bucket_widths=False)
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(input_dim=text_field_embedder.get_output_dim())

        # 10 possible distance buckets.
        self._num_distance_buckets = 10
        self._distance_embedding = Embedding(self._num_distance_buckets, feature_size)

        self._max_span_width = max_span_width
        self._spans_per_word = spans_per_word
        self._max_antecedents = max_antecedents

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        initializer(self)

    #overrides 
开发者ID:plasticityai,项目名称:magnitude,代码行数:50,代码来源:coref.py

示例7: _compute_span_pair_embeddings

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def _compute_span_pair_embeddings(self,
                                      top_span_embeddings                   ,
                                      antecedent_embeddings                   ,
                                      antecedent_offsets                   ):
        u"""
        Computes an embedding representation of pairs of spans for the pairwise scoring function
        to consider. This includes both the original span representations, the element-wise
        similarity of the span representations, and an embedding representation of the distance
        between the two spans.

        Parameters
        ----------
        top_span_embeddings : ``torch.FloatTensor``, required.
            Embedding representations of the top spans. Has shape
            (batch_size, num_spans_to_keep, embedding_size).
        antecedent_embeddings : ``torch.FloatTensor``, required.
            Embedding representations of the antecedent spans we are considering
            for each top span. Has shape
            (batch_size, num_spans_to_keep, max_antecedents, embedding_size).
        antecedent_offsets : ``torch.IntTensor``, required.
            The offsets between each top span and its antecedent spans in terms
            of spans we are considering. Has shape (1, max_antecedents).

        Returns
        -------
        span_pair_embeddings : ``torch.FloatTensor``
            Embedding representation of the pair of spans to consider. Has shape
            (batch_size, num_spans_to_keep, max_antecedents, embedding_size)
        """
        # Shape: (batch_size, num_spans_to_keep, max_antecedents, embedding_size)
        target_embeddings = top_span_embeddings.unsqueeze(2).expand_as(antecedent_embeddings)

        # Shape: (1, max_antecedents, embedding_size)
        antecedent_distance_embeddings = self._distance_embedding(
                util.bucket_values(antecedent_offsets,
                                   num_total_buckets=self._num_distance_buckets))

        # Shape: (1, 1, max_antecedents, embedding_size)
        antecedent_distance_embeddings = antecedent_distance_embeddings.unsqueeze(0)

        expanded_distance_embeddings_shape = (antecedent_embeddings.size(0),
                                              antecedent_embeddings.size(1),
                                              antecedent_embeddings.size(2),
                                              antecedent_distance_embeddings.size(-1))
        # Shape: (batch_size, num_spans_to_keep, max_antecedents, embedding_size)
        antecedent_distance_embeddings = antecedent_distance_embeddings.expand(*expanded_distance_embeddings_shape)

        # Shape: (batch_size, num_spans_to_keep, max_antecedents, embedding_size)
        span_pair_embeddings = torch.cat([target_embeddings,
                                          antecedent_embeddings,
                                          antecedent_embeddings * target_embeddings,
                                          antecedent_distance_embeddings], -1)
        return span_pair_embeddings 
开发者ID:plasticityai,项目名称:magnitude,代码行数:55,代码来源:coref.py

示例8: _compute_coreference_scores

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def _compute_coreference_scores(self,
                                    pairwise_embeddings                   ,
                                    top_span_mention_scores                   ,
                                    antecedent_mention_scores                   ,
                                    antecedent_log_mask                   )                     :
        u"""
        Computes scores for every pair of spans. Additionally, a dummy label is included,
        representing the decision that the span is not coreferent with anything. For the dummy
        label, the score is always zero. For the true antecedent spans, the score consists of
        the pairwise antecedent score and the unary mention scores for the span and its
        antecedent. The factoring allows the model to blame many of the absent links on bad
        spans, enabling the pruning strategy used in the forward pass.

        Parameters
        ----------
        pairwise_embeddings: ``torch.FloatTensor``, required.
            Embedding representations of pairs of spans. Has shape
            (batch_size, num_spans_to_keep, max_antecedents, encoding_dim)
        top_span_mention_scores: ``torch.FloatTensor``, required.
            Mention scores for every span. Has shape
            (batch_size, num_spans_to_keep, max_antecedents).
        antecedent_mention_scores: ``torch.FloatTensor``, required.
            Mention scores for every antecedent. Has shape
            (batch_size, num_spans_to_keep, max_antecedents).
        antecedent_log_mask: ``torch.FloatTensor``, required.
            The log of the mask for valid antecedents.

        Returns
        -------
        coreference_scores: ``torch.FloatTensor``
            A tensor of shape (batch_size, num_spans_to_keep, max_antecedents + 1),
            representing the unormalised score for each (span, antecedent) pair
            we considered.

        """
        # Shape: (batch_size, num_spans_to_keep, max_antecedents)
        antecedent_scores = self._antecedent_scorer(
                self._antecedent_feedforward(pairwise_embeddings)).squeeze(-1)
        antecedent_scores += top_span_mention_scores + antecedent_mention_scores
        antecedent_scores += antecedent_log_mask

        # Shape: (batch_size, num_spans_to_keep, 1)
        shape = [antecedent_scores.size(0), antecedent_scores.size(1), 1]
        dummy_scores = antecedent_scores.new_zeros(*shape)

        # Shape: (batch_size, num_spans_to_keep, max_antecedents + 1)
        coreference_scores = torch.cat([dummy_scores, antecedent_scores], -1)
        return coreference_scores 
开发者ID:plasticityai,项目名称:magnitude,代码行数:50,代码来源:coref.py

示例9: __init__

# 需要导入模块: from allennlp.modules import token_embedders [as 别名]
# 或者: from allennlp.modules.token_embedders import Embedding [as 别名]
def __init__(
        self,
        vocabulary: Vocabulary,
        source_namespace: str,
        target_namespace: str,
        input_size: int = 256,
        hidden_size: int = 256,
        num_layers: int = 2,
        dropout: float = 0.0,
        max_decoding_steps: int = 30,
    ):

        # @@PADDING@@, @@UNKNOWN@@, @start@, @end@ have same indices in all namespaces.
        self._pad_index = vocabulary.get_token_index("@@PADDING@@", namespace=source_namespace)
        self._unk_index = vocabulary.get_token_index("@@UNKNOWN@@", namespace=source_namespace)
        self._end_index = vocabulary.get_token_index("@end@", namespace=source_namespace)
        self._start_index = vocabulary.get_token_index("@start@", namespace=source_namespace)

        # Short-hand notations.
        __source_vocab_size = vocabulary.get_vocab_size(namespace=source_namespace)
        __target_vocab_size = vocabulary.get_vocab_size(namespace=target_namespace)

        # Source embedder converts tokenized source sequences to dense embeddings.
        __source_embedder = BasicTextFieldEmbedder(
            {"tokens": Embedding(__source_vocab_size, input_size, padding_index=self._pad_index)}
        )

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        __encoder = PytorchSeq2SeqWrapper(
            nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        )

        # Attention mechanism between decoder context and encoder hidden states at each time step.
        __attention = DotProductAttention()

        super().__init__(
            vocabulary,
            source_embedder=__source_embedder,
            encoder=__encoder,
            max_decoding_steps=max_decoding_steps,
            attention=__attention,
            target_namespace=target_namespace,
            use_bleu=True,
        )
        # Record four metrics - perplexity, sequence accuracy, word error rate and BLEU score.
        # super().__init__() already declared "self._bleu",
        # perplexity = 2 ** average_val_loss
        # word error rate = 1 - unigram recall
        self._log2_perplexity = Average()
        self._sequence_accuracy = SequenceAccuracy()
        self._unigram_recall = UnigramRecall() 
开发者ID:kdexd,项目名称:probnmn-clevr,代码行数:53,代码来源:seq2seq_base.py


注:本文中的allennlp.modules.token_embedders.Embedding方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。