当前位置: 首页>>代码示例>>Python>>正文


Python DatasetReader.from_params方法代码示例

本文整理汇总了Python中allennlp.data.DatasetReader.from_params方法的典型用法代码示例。如果您正苦于以下问题:Python DatasetReader.from_params方法的具体用法?Python DatasetReader.from_params怎么用?Python DatasetReader.from_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.DatasetReader的用法示例。


在下文中一共展示了DatasetReader.from_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self,
                archive_file=DEFAULT_ARCHIVE_FILE,
                cuda_device=DEFAULT_CUDA_DEVICE,
                model_file=None):
        """ Constructor for NLU class. """
        check_for_gpu(cuda_device)

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for JointNLU is specified!")
            archive_file = cached_path(model_file)


        archive = load_archive(archive_file,
                            cuda_device=cuda_device)
        self.tokenizer = SpacyWordSplitter(language="en_core_web_sm")
        dataset_reader_params = archive.config["dataset_reader"]
        self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
        self.model = archive.model
        self.model.eval() 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:22,代码来源:nlu.py

示例2: __init__

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self, 
                archive_file=DEFAULT_ARCHIVE_FILE,
                cuda_device=DEFAULT_CUDA_DEVICE,
                model_file=None):
        """ Constructor for NLU class. """
        SysPolicy.__init__(self)

        check_for_gpu(cuda_device)

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for MILU is specified!")
            archive_file = cached_path(model_file)

        archive = load_archive(archive_file,
                            cuda_device=cuda_device)
        dataset_reader_params = archive.config["dataset_reader"]
        self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
        self.action_decoder = MultiWozVocabActionDecoder()
        self.action_decoder.action_vocab = self.dataset_reader.action_vocab
        self.state_encoder = self.dataset_reader.state_encoder
        self.model = archive.model
        self.model.eval() 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:25,代码来源:policy.py

示例3: set_up_model

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):

        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params["dataset_reader"])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = reader.read(str(dataset_file))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if "vocabulary" in params:
            vocab_params = params["vocabulary"]
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.instances.index_with(vocab)
        self.model = Model.from_params(vocab=self.vocab, params=params["model"])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(list(self.instances))
        self.dataset.index_instances(self.vocab) 
开发者ID:allenai,项目名称:allennlp,代码行数:26,代码来源:model_test_case.py

示例4: test_train_can_fine_tune_model_from_archive

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_train_can_fine_tune_model_from_archive(self):
        params = Params.from_file(
            self.FIXTURES_ROOT / "basic_classifier" / "experiment_from_archive.jsonnet"
        )
        train_loop = TrainModel.from_params(
            params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key=""
        )
        train_loop.run()

        model = Model.from_archive(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )

        # This is checking that the vocabulary actually got extended.  The data that we're using for
        # training is different from the data we used to produce the model archive, and we set
        # parameters such that the vocab should have been extended.
        assert train_loop.model.vocab.get_vocab_size() > model.vocab.get_vocab_size() 
开发者ID:allenai,项目名称:allennlp,代码行数:19,代码来源:train_test.py

示例5: test_transferring_of_modules_ensures_type_consistency

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_transferring_of_modules_ensures_type_consistency(self):

        model_archive = str(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )
        trained_model = load_archive(model_archive).model

        config_file = str(self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet")
        model_params = Params.from_file(config_file).pop("model").as_dict(quiet=True)

        # Override only text_field_embedder and make it load Seq2SeqEncoder
        model_params["text_field_embedder"] = {
            "_pretrained": {
                "archive_file": model_archive,
                "module_path": "_seq2seq_encoder._module",
            }
        }
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=trained_model.vocab, params=Params(model_params)) 
开发者ID:allenai,项目名称:allennlp,代码行数:21,代码来源:from_params_test.py

示例6: test_lazy_construction_can_happen_multiple_times

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_lazy_construction_can_happen_multiple_times(self):
        test_string = "this is a test"
        extra_string = "extra string"

        class ConstructedObject(FromParams):
            def __init__(self, string: str, extra: str):
                self.string = string
                self.extra = extra

        class Testing(FromParams):
            def __init__(self, lazy_object: Lazy[ConstructedObject]):
                first_time = lazy_object.construct(extra=extra_string)
                second_time = lazy_object.construct(extra=extra_string)
                assert first_time.string == test_string
                assert first_time.extra == extra_string
                assert second_time.string == test_string
                assert second_time.extra == extra_string

        Testing.from_params(Params({"lazy_object": {"string": test_string}})) 
开发者ID:allenai,项目名称:allennlp,代码行数:21,代码来源:from_params_test.py

示例7: test_mismatching_dimensions_throws_configuration_error

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_mismatching_dimensions_throws_configuration_error(self):
        params = Params.from_file(self.param_file)
        # Make the phrase layer wrong - it should be 10 to match
        # the embedding + char cnn dimensions.
        params[u"model"][u"phrase_layer"][u"input_size"] = 12
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 40 to match
        # 4 * output_dim of the phrase_layer.
        params[u"model"][u"phrase_layer"][u"input_size"] = 30
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 70 to match
        # 4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim.
        params[u"model"][u"span_end_encoder"][u"input_size"] = 50
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model")) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:23,代码来源:bidaf_test.py

示例8: set_up_model

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params[u'dataset_reader'])
        instances = reader.read(dataset_file)
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if u'vocabulary' in params:
            vocab_params = params[u'vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params[u'model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:24,代码来源:model_test_case.py

示例9: from_archive

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def from_archive(cls, archive         , predictor_name      = None)               :
        u"""
        Instantiate a :class:`Predictor` from an :class:`~allennlp.models.archival.Archive`;
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, the default one for the model will be used.
        """
        # Duplicate the config so that the config inside the archive doesn't get consumed
        config = archive.config.duplicate()

        if not predictor_name:
            model_type = config.get(u"model").get(u"type")
            if not model_type in DEFAULT_PREDICTORS:
                raise ConfigurationError("No default predictor for model type {model_type}.\n"\
                                         "Please specify a predictor explicitly.")
            predictor_name = DEFAULT_PREDICTORS[model_type]

        dataset_reader_params = config[u"dataset_reader"]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)

        model = archive.model
        model.eval()

        return Predictor.by_name(predictor_name)(model, dataset_reader) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:25,代码来源:predictor.py

示例10: get_bert_test_fixture

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_bert_test_fixture():
    embedder_params = {
        "type": "bert-pretrained",
        "pretrained_model": "tests/fixtures/bert/bert_test_fixture.tar.gz",
        "requires_grad": True,
        "top_layer_only": True,
    }
    embedder_params_copy = dict(embedder_params)
    embedder = TokenEmbedder.from_params(Params(embedder_params))


    indexer_params = {
        "type": "bert-pretrained",
        "pretrained_model": "tests/fixtures/bert/vocab.txt",
        "do_lowercase": True,
        "use_starting_offsets": True,
        "max_pieces": 512,
    }
    indexer_params_copy = dict(indexer_params)
    indexer = TokenIndexer.from_params(Params(indexer_params))

    return {'embedder': embedder, 'embedder_params': embedder_params_copy,
            'indexer': indexer, 'indexer_params': indexer_params_copy} 
开发者ID:allenai,项目名称:kb,代码行数:25,代码来源:testing.py

示例11: get_reader

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_reader():
    params = {
        "type": "kg_probe",
        "tokenizer_and_candidate_generator": {
            "type": "bert_tokenizer_and_candidate_generator",
            "entity_candidate_generators": {
                "wordnet": {"type": "wordnet_mention_generator",
                            "entity_file": "tests/fixtures/wordnet/entities_fixture.jsonl"}
            },
            "entity_indexers":  {
                "wordnet": {
                       "type": "characters_tokenizer",
                       "tokenizer": {
                           "type": "word",
                           "word_splitter": {"type": "just_spaces"},
                       },
                       "namespace": "entity"
                    }
            },
            "bert_model_type": "tests/fixtures/bert/vocab.txt",
            "do_lower_case": True,
        },
    }

    return DatasetReader.from_params(Params(params)) 
开发者ID:allenai,项目名称:kb,代码行数:27,代码来源:test_kg_probe_reader.py

示例12: get_reader

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_reader():
    params = {
        "type": "tacred",
        "tokenizer_and_candidate_generator": {
            "type": "bert_tokenizer_and_candidate_generator",
            "entity_candidate_generators": {
                "wordnet": {"type": "wordnet_mention_generator",
                            "entity_file": "tests/fixtures/wordnet/entities_fixture.jsonl"}
            },
            "entity_indexers":  {
                "wordnet": {
                       "type": "characters_tokenizer",
                       "tokenizer": {
                           "type": "word",
                           "word_splitter": {"type": "just_spaces"},
                       },
                       "namespace": "entity"
                    }
            },
            "bert_model_type": "tests/fixtures/tacred/vocab.txt",
            "do_lower_case": True,
        }
    }
    return DatasetReader.from_params(Params(params)) 
开发者ID:allenai,项目名称:kb,代码行数:26,代码来源:test_tacred_reader.py

示例13: set_up_model

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = list(reader.read(str(dataset_file)))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab) 
开发者ID:allenai,项目名称:vampire,代码行数:25,代码来源:test_case.py

示例14: __init__

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self,
                archive_file=DEFAULT_ARCHIVE_FILE,
                cuda_device=DEFAULT_CUDA_DEVICE,
                model_file=None,
                context_size=3):
        """ Constructor for NLU class. """

        self.context_size = context_size

        check_for_gpu(cuda_device)

        if not os.path.isfile(archive_file):
            if not model_file:
                raise Exception("No model for MILU is specified!")

            archive_file = cached_path(model_file)

        archive = load_archive(archive_file,
                            cuda_device=cuda_device)
        self.tokenizer = SpacyWordSplitter(language="en_core_web_sm")
        _special_case = [{ORTH: u"id", LEMMA: u"id"}]
        self.tokenizer.spacy.tokenizer.add_special_case(u"id", _special_case)

        dataset_reader_params = archive.config["dataset_reader"]
        self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
        self.model = archive.model
        self.model.eval() 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:29,代码来源:nlu.py

示例15: from_archive

# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def from_archive(
        cls,
        archive: Archive,
        predictor_name: str = None,
        dataset_reader_to_load: str = "validation",
        frozen: bool = True,
    ) -> "Predictor":
        """
        Instantiate a `Predictor` from an [`Archive`](../models/archival.md);
        that is, from the result of training a model. Optionally specify which `Predictor`
        subclass; otherwise, we try to find a corresponding predictor in `DEFAULT_PREDICTORS`, or if
        one is not found, the base class (i.e. `Predictor`) will be used. Optionally specify
        which [`DatasetReader`](../data/dataset_readers/dataset_reader.md) should be loaded;
        otherwise, the validation one will be used if it exists followed by the training dataset reader.
        Optionally specify if the loaded model should be frozen, meaning `model.eval()` will be called.
        """
        # Duplicate the config so that the config inside the archive doesn't get consumed
        config = archive.config.duplicate()

        if not predictor_name:
            model_type = config.get("model").get("type")
            model_class, _ = Model.resolve_class_name(model_type)
            predictor_name = model_class.default_predictor
        predictor_class: Type[Predictor] = Predictor.by_name(  # type: ignore
            predictor_name
        ) if predictor_name is not None else cls

        if dataset_reader_to_load == "validation" and "validation_dataset_reader" in config:
            dataset_reader_params = config["validation_dataset_reader"]
        else:
            dataset_reader_params = config["dataset_reader"]
        dataset_reader = DatasetReader.from_params(dataset_reader_params)

        model = archive.model
        if frozen:
            model.eval()

        return predictor_class(model, dataset_reader) 
开发者ID:allenai,项目名称:allennlp,代码行数:40,代码来源:predictor.py


注:本文中的allennlp.data.DatasetReader.from_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。