當前位置: 首頁>>代碼示例>>Python>>正文


Python text_field_embedders.BasicTextFieldEmbedder方法代碼示例

本文整理匯總了Python中allennlp.modules.text_field_embedders.BasicTextFieldEmbedder方法的典型用法代碼示例。如果您正苦於以下問題:Python text_field_embedders.BasicTextFieldEmbedder方法的具體用法?Python text_field_embedders.BasicTextFieldEmbedder怎麽用?Python text_field_embedders.BasicTextFieldEmbedder使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在allennlp.modules.text_field_embedders的用法示例。


在下文中一共展示了text_field_embedders.BasicTextFieldEmbedder方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_token_embedders

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def get_token_embedders(model_name, tune_bert=False, special_tokens_fix=0):
    take_grads = True if tune_bert > 0 else False
    bert_token_emb = PretrainedBertEmbedder(
        pretrained_model=model_name,
        top_layer_only=True, requires_grad=take_grads,
        special_tokens_fix=special_tokens_fix)

    token_embedders = {'bert': bert_token_emb}
    embedder_to_indexer_map = {"bert": ["bert", "bert-offsets"]}

    text_filed_emd = BasicTextFieldEmbedder(token_embedders=token_embedders,
                                            embedder_to_indexer_map=embedder_to_indexer_map,
                                            allow_unmatched_keys=True)
    return text_filed_emd 
開發者ID:plkmo,項目名稱:NLP_Toolkit,代碼行數:16,代碼來源:trainer.py

示例2: _get_embbeder

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def _get_embbeder(self, weigths_name, special_tokens_fix):
        embedders = {'bert': PretrainedBertEmbedder(
            pretrained_model=weigths_name,
            requires_grad=False,
            top_layer_only=True,
            special_tokens_fix=special_tokens_fix)
        }
        text_field_embedder = BasicTextFieldEmbedder(
            token_embedders=embedders,
            embedder_to_indexer_map={"bert": ["bert", "bert-offsets"]},
            allow_unmatched_keys=True)
        return text_field_embedder 
開發者ID:plkmo,項目名稱:NLP_Toolkit,代碼行數:14,代碼來源:gec_model.py

示例3: __init__

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def __init__(self, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        weight = torch.ones(vocab.get_vocab_size(), 10)
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size(), embedding_dim=10, weight=weight, trainable=False
        )
        self.embedder = BasicTextFieldEmbedder({"words": token_embedding}) 
開發者ID:allenai,項目名稱:allennlp,代碼行數:9,代碼來源:list_field_test.py

示例4: __init__

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def __init__(self,
                 vocab: Vocabulary,
                 source_text_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 decoder: SeqDecoder,
                 tied_source_embedder_key: Optional[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(CustomComposedSeq2Seq, self).__init__(vocab, regularizer)

        self._source_text_embedder = source_text_embedder
        self._encoder = encoder
        self._decoder = decoder

        if self._encoder.get_output_dim() != self._decoder.get_output_dim():
            raise ConfigurationError(f"Encoder output dimension {self._encoder.get_output_dim()} should be"
                                     f" equal to decoder dimension {self._decoder.get_output_dim()}.")
        if tied_source_embedder_key:
            if not isinstance(self._source_text_embedder, BasicTextFieldEmbedder):
                raise ConfigurationError("Unable to tie embeddings,"
                                         "Source text embedder is not an instance of `BasicTextFieldEmbedder`.")
            source_embedder = self._source_text_embedder._token_embedders[tied_source_embedder_key]
            if not isinstance(source_embedder, Embedding):
                raise ConfigurationError("Unable to tie embeddings,"
                                         "Selected source embedder is not an instance of `Embedding`.")
            if source_embedder.get_output_dim() != self._decoder.target_embedder.get_output_dim():
                raise ConfigurationError(f"Output Dimensions mismatch between"
                                         f"source embedder and target embedder.")
            self._source_text_embedder._token_embedders[tied_source_embedder_key] = self._decoder.target_embedder
        initializer(self) 
開發者ID:IlyaGusev,項目名稱:summarus,代碼行數:33,代碼來源:custom_composed_seq2seq.py

示例5: __init__

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def __init__(
        self,
        vocabulary: Vocabulary,
        input_size: int = 256,
        hidden_size: int = 128,
        num_layers: int = 2,
        dropout: float = 0.0,
    ):
        super().__init__()
        self._start_index = vocabulary.get_token_index("@start@", namespace="programs")
        self._end_index = vocabulary.get_token_index("@end@", namespace="programs")
        self._pad_index = vocabulary.get_token_index("@@PADDING@@", namespace="programs")
        self._unk_index = vocabulary.get_token_index("@@UNKNOWN@@", namespace="programs")

        vocab_size = vocabulary.get_vocab_size(namespace="programs")
        embedder_inner = Embedding(vocab_size, input_size, padding_index=self._pad_index)
        self._embedder = BasicTextFieldEmbedder({"programs": embedder_inner})

        self._encoder = PytorchSeq2SeqWrapper(
            nn.LSTM(
                input_size, hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True
            )
        )
        # Project and tie input and output embeddings
        self._projection_layer = nn.Linear(hidden_size, input_size, bias=False)
        self._output_layer = nn.Linear(input_size, vocab_size, bias=False)
        self._output_layer.weight = embedder_inner.weight

        # Record average log2 (perplexity) for calculating final perplexity.
        self._log2_perplexity = Average() 
開發者ID:kdexd,項目名稱:probnmn-clevr,代碼行數:32,代碼來源:program_prior.py

示例6: __init__

# 需要導入模塊: from allennlp.modules import text_field_embedders [as 別名]
# 或者: from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder [as 別名]
def __init__(
        self,
        vocabulary: Vocabulary,
        source_namespace: str,
        target_namespace: str,
        input_size: int = 256,
        hidden_size: int = 256,
        num_layers: int = 2,
        dropout: float = 0.0,
        max_decoding_steps: int = 30,
    ):

        # @@PADDING@@, @@UNKNOWN@@, @start@, @end@ have same indices in all namespaces.
        self._pad_index = vocabulary.get_token_index("@@PADDING@@", namespace=source_namespace)
        self._unk_index = vocabulary.get_token_index("@@UNKNOWN@@", namespace=source_namespace)
        self._end_index = vocabulary.get_token_index("@end@", namespace=source_namespace)
        self._start_index = vocabulary.get_token_index("@start@", namespace=source_namespace)

        # Short-hand notations.
        __source_vocab_size = vocabulary.get_vocab_size(namespace=source_namespace)
        __target_vocab_size = vocabulary.get_vocab_size(namespace=target_namespace)

        # Source embedder converts tokenized source sequences to dense embeddings.
        __source_embedder = BasicTextFieldEmbedder(
            {"tokens": Embedding(__source_vocab_size, input_size, padding_index=self._pad_index)}
        )

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        __encoder = PytorchSeq2SeqWrapper(
            nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        )

        # Attention mechanism between decoder context and encoder hidden states at each time step.
        __attention = DotProductAttention()

        super().__init__(
            vocabulary,
            source_embedder=__source_embedder,
            encoder=__encoder,
            max_decoding_steps=max_decoding_steps,
            attention=__attention,
            target_namespace=target_namespace,
            use_bleu=True,
        )
        # Record four metrics - perplexity, sequence accuracy, word error rate and BLEU score.
        # super().__init__() already declared "self._bleu",
        # perplexity = 2 ** average_val_loss
        # word error rate = 1 - unigram recall
        self._log2_perplexity = Average()
        self._sequence_accuracy = SequenceAccuracy()
        self._unigram_recall = UnigramRecall() 
開發者ID:kdexd,項目名稱:probnmn-clevr,代碼行數:53,代碼來源:seq2seq_base.py


注:本文中的allennlp.modules.text_field_embedders.BasicTextFieldEmbedder方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。