Python Vocabulary.from_instances方法代碼示例

本文整理匯總了Python中allennlp.data.Vocabulary.from_instances方法的典型用法代碼示例。如果您正苦於以下問題：Python Vocabulary.from_instances方法的具體用法？Python Vocabulary.from_instances怎麽用？Python Vocabulary.from_instances使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類allennlp.data.Vocabulary的用法示例。

在下文中一共展示了Vocabulary.from_instances方法的9個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: set_up_model

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def set_up_model(self, param_file, dataset_file):

        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params["dataset_reader"])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = reader.read(str(dataset_file))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if "vocabulary" in params:
            vocab_params = params["vocabulary"]
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.instances.index_with(vocab)
        self.model = Model.from_params(vocab=self.vocab, params=params["model"])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(list(self.instances))
        self.dataset.index_instances(self.vocab)

開發者ID:allenai，項目名稱:allennlp，代碼行數:26，代碼來源:model_test_case.py

示例2: setup_method

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def setup_method(self):
        super().setup_method()
        self.instances = SequenceTaggingDatasetReader().read(
            self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
        )
        self.vocab = Vocabulary.from_instances(self.instances)
        self.model_params = Params(
            {
                "text_field_embedder": {
                    "token_embedders": {
                        "tokens": {"type": "embedding", "embedding_dim": 5, "sparse": True}
                    }
                },
                "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
            }
        )
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)

開發者ID:allenai，項目名稱:allennlp，代碼行數:19，代碼來源:optimizer_test.py

示例3: get_vocab_and_both_elmo_indexed_ids

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def get_vocab_and_both_elmo_indexed_ids(batch                 ):
        instances = []
        indexer = ELMoTokenCharactersIndexer()
        indexer2 = SingleIdTokenIndexer()
        for sentence in batch:
            tokens = [Token(token) for token in sentence]
            field = TextField(tokens,
                              {u'character_ids': indexer,
                               u'tokens': indexer2})
            instance = Instance({u"elmo": field})
            instances.append(instance)

        dataset = Batch(instances)
        vocab = Vocabulary.from_instances(instances)
        dataset.index_instances(vocab)
        return vocab, dataset.as_tensor_dict()[u"elmo"]

開發者ID:plasticityai，項目名稱:magnitude，代碼行數:18，代碼來源:elmo_test.py

示例4: setUp

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def setUp(self):
        super(TestOptimizer, self).setUp()
        self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / u'data' / u'sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.model_params = Params({
                u"text_field_embedder": {
                        u"tokens": {
                                u"type": u"embedding",
                                u"embedding_dim": 5
                                }
                        },
                u"encoder": {
                        u"type": u"lstm",
                        u"input_size": 5,
                        u"hidden_size": 7,
                        u"num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(vocab=vocab, params=self.model_params)

開發者ID:plasticityai，項目名稱:magnitude，代碼行數:21，代碼來源:optimizer_test.py

示例5: setUp

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def setUp(self):
        super(TestTrainer, self).setUp()
        self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / u'data' / u'sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
                u"text_field_embedder": {
                        u"tokens": {
                                u"type": u"embedding",
                                u"embedding_dim": 5
                                }
                        },
                u"encoder": {
                        u"type": u"lstm",
                        u"input_size": 5,
                        u"hidden_size": 7,
                        u"num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(vocab)

開發者ID:plasticityai，項目名稱:magnitude，代碼行數:25，代碼來源:trainer_test.py

示例6: set_up_model

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = list(reader.read(str(dataset_file)))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)

開發者ID:allenai，項目名稱:vampire，代碼行數:25，代碼來源:test_case.py

示例7: get_vocab_and_both_elmo_indexed_ids

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def get_vocab_and_both_elmo_indexed_ids(batch: List[List[str]]):
        instances = []
        indexer = ELMoTokenCharactersIndexer()
        indexer2 = SingleIdTokenIndexer()
        for sentence in batch:
            tokens = [Token(token) for token in sentence]
            field = TextField(tokens, {"character_ids": indexer, "tokens": indexer2})
            instance = Instance({"elmo": field})
            instances.append(instance)

        dataset = Batch(instances)
        vocab = Vocabulary.from_instances(instances)
        dataset.index_instances(vocab)
        return vocab, dataset.as_tensor_dict()["elmo"]

開發者ID:allenai，項目名稱:allennlp，代碼行數:16，代碼來源:elmo_test.py

示例8: setup_method

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def setup_method(self):
        super().setup_method()
        self.instances = SequenceTaggingDatasetReader().read(
            self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
        )
        self.instances_lazy = SequenceTaggingDatasetReader(lazy=True).read(
            self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
        )
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params(
            {
                "text_field_embedder": {
                    "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
                },
                "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
            }
        )
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
        self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collate)
        self.data_loader_lazy = DataLoader(
            self.instances_lazy, batch_size=2, collate_fn=allennlp_collate
        )
        self.validation_data_loader = DataLoader(
            self.instances, batch_size=2, collate_fn=allennlp_collate
        )
        self.instances.index_with(vocab)
        self.instances_lazy.index_with(vocab)

開發者ID:allenai，項目名稱:allennlp，代碼行數:31，代碼來源:trainer_test.py

示例9: test_batch_predictions_are_consistent

# 需要導入模塊: from allennlp.data import Vocabulary [as 別名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 別名]
def test_batch_predictions_are_consistent(self):
        # The CNN encoder has problems with this kind of test - it's not properly masked yet, so
        # changing the amount of padding in the batch will result in small differences in the
        # output of the encoder.  Because BiDAF is so deep, these differences get magnified through
        # the network and make this test impossible.  So, we'll remove the CNN encoder entirely
        # from the model for this test.  If/when we fix the CNN encoder to work correctly with
        # masking, we can change this back to how the other models run this test, with just a
        # single line.
        # pylint: disable=protected-access,attribute-defined-outside-init

        # Save some state.
        saved_model = self.model
        saved_instances = self.instances

        # Modify the state, run the test with modified state.
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params[u'dataset_reader'])
        reader._token_indexers = {u'tokens': reader._token_indexers[u'tokens']}
        self.instances = reader.read(self.FIXTURES_ROOT / u'data' / u'squad.json')
        vocab = Vocabulary.from_instances(self.instances)
        for instance in self.instances:
            instance.index_fields(vocab)
        del params[u'model'][u'text_field_embedder'][u'token_embedders'][u'token_characters']
        params[u'model'][u'phrase_layer'][u'input_size'] = 2
        self.model = Model.from_params(vocab=vocab, params=params[u'model'])

        self.ensure_batch_predictions_are_consistent()

        # Restore the state.
        self.model = saved_model
        self.instances = saved_instances

開發者ID:plasticityai，項目名稱:magnitude，代碼行數:33，代碼來源:bidaf_test.py

注：本文中的allennlp.data.Vocabulary.from_instances方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。