本文整理汇总了Python中allennlp.data.DatasetReader.from_params方法的典型用法代码示例。如果您正苦于以下问题:Python DatasetReader.from_params方法的具体用法?Python DatasetReader.from_params怎么用?Python DatasetReader.from_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.DatasetReader
的用法示例。
在下文中一共展示了DatasetReader.from_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self,
archive_file=DEFAULT_ARCHIVE_FILE,
cuda_device=DEFAULT_CUDA_DEVICE,
model_file=None):
""" Constructor for NLU class. """
check_for_gpu(cuda_device)
if not os.path.isfile(archive_file):
if not model_file:
raise Exception("No model for JointNLU is specified!")
archive_file = cached_path(model_file)
archive = load_archive(archive_file,
cuda_device=cuda_device)
self.tokenizer = SpacyWordSplitter(language="en_core_web_sm")
dataset_reader_params = archive.config["dataset_reader"]
self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
self.model = archive.model
self.model.eval()
示例2: __init__
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self,
archive_file=DEFAULT_ARCHIVE_FILE,
cuda_device=DEFAULT_CUDA_DEVICE,
model_file=None):
""" Constructor for NLU class. """
SysPolicy.__init__(self)
check_for_gpu(cuda_device)
if not os.path.isfile(archive_file):
if not model_file:
raise Exception("No model for MILU is specified!")
archive_file = cached_path(model_file)
archive = load_archive(archive_file,
cuda_device=cuda_device)
dataset_reader_params = archive.config["dataset_reader"]
self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
self.action_decoder = MultiWozVocabActionDecoder()
self.action_decoder.action_vocab = self.dataset_reader.action_vocab
self.state_encoder = self.dataset_reader.state_encoder
self.model = archive.model
self.model.eval()
示例3: set_up_model
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):
self.param_file = param_file
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params["dataset_reader"])
# The dataset reader might be lazy, but a lazy list here breaks some of our tests.
instances = reader.read(str(dataset_file))
# Use parameters for vocabulary if they are present in the config file, so that choices like
# "non_padded_namespaces", "min_count" etc. can be set if needed.
if "vocabulary" in params:
vocab_params = params["vocabulary"]
vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
else:
vocab = Vocabulary.from_instances(instances)
self.vocab = vocab
self.instances = instances
self.instances.index_with(vocab)
self.model = Model.from_params(vocab=self.vocab, params=params["model"])
# TODO(joelgrus) get rid of these
# (a lot of the model tests use them, so they'll have to be changed)
self.dataset = Batch(list(self.instances))
self.dataset.index_instances(self.vocab)
示例4: test_train_can_fine_tune_model_from_archive
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_train_can_fine_tune_model_from_archive(self):
params = Params.from_file(
self.FIXTURES_ROOT / "basic_classifier" / "experiment_from_archive.jsonnet"
)
train_loop = TrainModel.from_params(
params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key=""
)
train_loop.run()
model = Model.from_archive(
self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
)
# This is checking that the vocabulary actually got extended. The data that we're using for
# training is different from the data we used to produce the model archive, and we set
# parameters such that the vocab should have been extended.
assert train_loop.model.vocab.get_vocab_size() > model.vocab.get_vocab_size()
示例5: test_transferring_of_modules_ensures_type_consistency
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_transferring_of_modules_ensures_type_consistency(self):
model_archive = str(
self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
)
trained_model = load_archive(model_archive).model
config_file = str(self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet")
model_params = Params.from_file(config_file).pop("model").as_dict(quiet=True)
# Override only text_field_embedder and make it load Seq2SeqEncoder
model_params["text_field_embedder"] = {
"_pretrained": {
"archive_file": model_archive,
"module_path": "_seq2seq_encoder._module",
}
}
with pytest.raises(ConfigurationError):
Model.from_params(vocab=trained_model.vocab, params=Params(model_params))
示例6: test_lazy_construction_can_happen_multiple_times
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_lazy_construction_can_happen_multiple_times(self):
test_string = "this is a test"
extra_string = "extra string"
class ConstructedObject(FromParams):
def __init__(self, string: str, extra: str):
self.string = string
self.extra = extra
class Testing(FromParams):
def __init__(self, lazy_object: Lazy[ConstructedObject]):
first_time = lazy_object.construct(extra=extra_string)
second_time = lazy_object.construct(extra=extra_string)
assert first_time.string == test_string
assert first_time.extra == extra_string
assert second_time.string == test_string
assert second_time.extra == extra_string
Testing.from_params(Params({"lazy_object": {"string": test_string}}))
示例7: test_mismatching_dimensions_throws_configuration_error
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def test_mismatching_dimensions_throws_configuration_error(self):
params = Params.from_file(self.param_file)
# Make the phrase layer wrong - it should be 10 to match
# the embedding + char cnn dimensions.
params[u"model"][u"phrase_layer"][u"input_size"] = 12
with pytest.raises(ConfigurationError):
Model.from_params(vocab=self.vocab, params=params.pop(u"model"))
params = Params.from_file(self.param_file)
# Make the modeling layer input_dimension wrong - it should be 40 to match
# 4 * output_dim of the phrase_layer.
params[u"model"][u"phrase_layer"][u"input_size"] = 30
with pytest.raises(ConfigurationError):
Model.from_params(vocab=self.vocab, params=params.pop(u"model"))
params = Params.from_file(self.param_file)
# Make the modeling layer input_dimension wrong - it should be 70 to match
# 4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim.
params[u"model"][u"span_end_encoder"][u"input_size"] = 50
with pytest.raises(ConfigurationError):
Model.from_params(vocab=self.vocab, params=params.pop(u"model"))
示例8: set_up_model
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):
# pylint: disable=attribute-defined-outside-init
self.param_file = param_file
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params[u'dataset_reader'])
instances = reader.read(dataset_file)
# Use parameters for vocabulary if they are present in the config file, so that choices like
# "non_padded_namespaces", "min_count" etc. can be set if needed.
if u'vocabulary' in params:
vocab_params = params[u'vocabulary']
vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
else:
vocab = Vocabulary.from_instances(instances)
self.vocab = vocab
self.instances = instances
self.model = Model.from_params(vocab=self.vocab, params=params[u'model'])
# TODO(joelgrus) get rid of these
# (a lot of the model tests use them, so they'll have to be changed)
self.dataset = Batch(self.instances)
self.dataset.index_instances(self.vocab)
示例9: from_archive
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def from_archive(cls, archive , predictor_name = None) :
u"""
Instantiate a :class:`Predictor` from an :class:`~allennlp.models.archival.Archive`;
that is, from the result of training a model. Optionally specify which `Predictor`
subclass; otherwise, the default one for the model will be used.
"""
# Duplicate the config so that the config inside the archive doesn't get consumed
config = archive.config.duplicate()
if not predictor_name:
model_type = config.get(u"model").get(u"type")
if not model_type in DEFAULT_PREDICTORS:
raise ConfigurationError("No default predictor for model type {model_type}.\n"\
"Please specify a predictor explicitly.")
predictor_name = DEFAULT_PREDICTORS[model_type]
dataset_reader_params = config[u"dataset_reader"]
dataset_reader = DatasetReader.from_params(dataset_reader_params)
model = archive.model
model.eval()
return Predictor.by_name(predictor_name)(model, dataset_reader)
示例10: get_bert_test_fixture
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_bert_test_fixture():
embedder_params = {
"type": "bert-pretrained",
"pretrained_model": "tests/fixtures/bert/bert_test_fixture.tar.gz",
"requires_grad": True,
"top_layer_only": True,
}
embedder_params_copy = dict(embedder_params)
embedder = TokenEmbedder.from_params(Params(embedder_params))
indexer_params = {
"type": "bert-pretrained",
"pretrained_model": "tests/fixtures/bert/vocab.txt",
"do_lowercase": True,
"use_starting_offsets": True,
"max_pieces": 512,
}
indexer_params_copy = dict(indexer_params)
indexer = TokenIndexer.from_params(Params(indexer_params))
return {'embedder': embedder, 'embedder_params': embedder_params_copy,
'indexer': indexer, 'indexer_params': indexer_params_copy}
示例11: get_reader
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_reader():
params = {
"type": "kg_probe",
"tokenizer_and_candidate_generator": {
"type": "bert_tokenizer_and_candidate_generator",
"entity_candidate_generators": {
"wordnet": {"type": "wordnet_mention_generator",
"entity_file": "tests/fixtures/wordnet/entities_fixture.jsonl"}
},
"entity_indexers": {
"wordnet": {
"type": "characters_tokenizer",
"tokenizer": {
"type": "word",
"word_splitter": {"type": "just_spaces"},
},
"namespace": "entity"
}
},
"bert_model_type": "tests/fixtures/bert/vocab.txt",
"do_lower_case": True,
},
}
return DatasetReader.from_params(Params(params))
示例12: get_reader
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def get_reader():
params = {
"type": "tacred",
"tokenizer_and_candidate_generator": {
"type": "bert_tokenizer_and_candidate_generator",
"entity_candidate_generators": {
"wordnet": {"type": "wordnet_mention_generator",
"entity_file": "tests/fixtures/wordnet/entities_fixture.jsonl"}
},
"entity_indexers": {
"wordnet": {
"type": "characters_tokenizer",
"tokenizer": {
"type": "word",
"word_splitter": {"type": "just_spaces"},
},
"namespace": "entity"
}
},
"bert_model_type": "tests/fixtures/tacred/vocab.txt",
"do_lower_case": True,
}
}
return DatasetReader.from_params(Params(params))
示例13: set_up_model
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def set_up_model(self, param_file, dataset_file):
# pylint: disable=attribute-defined-outside-init
self.param_file = param_file
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params['dataset_reader'])
# The dataset reader might be lazy, but a lazy list here breaks some of our tests.
instances = list(reader.read(str(dataset_file)))
# Use parameters for vocabulary if they are present in the config file, so that choices like
# "non_padded_namespaces", "min_count" etc. can be set if needed.
if 'vocabulary' in params:
vocab_params = params['vocabulary']
vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
else:
vocab = Vocabulary.from_instances(instances)
self.vocab = vocab
self.instances = instances
self.model = Model.from_params(vocab=self.vocab, params=params['model'])
# TODO(joelgrus) get rid of these
# (a lot of the model tests use them, so they'll have to be changed)
self.dataset = Batch(self.instances)
self.dataset.index_instances(self.vocab)
示例14: __init__
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def __init__(self,
archive_file=DEFAULT_ARCHIVE_FILE,
cuda_device=DEFAULT_CUDA_DEVICE,
model_file=None,
context_size=3):
""" Constructor for NLU class. """
self.context_size = context_size
check_for_gpu(cuda_device)
if not os.path.isfile(archive_file):
if not model_file:
raise Exception("No model for MILU is specified!")
archive_file = cached_path(model_file)
archive = load_archive(archive_file,
cuda_device=cuda_device)
self.tokenizer = SpacyWordSplitter(language="en_core_web_sm")
_special_case = [{ORTH: u"id", LEMMA: u"id"}]
self.tokenizer.spacy.tokenizer.add_special_case(u"id", _special_case)
dataset_reader_params = archive.config["dataset_reader"]
self.dataset_reader = DatasetReader.from_params(dataset_reader_params)
self.model = archive.model
self.model.eval()
示例15: from_archive
# 需要导入模块: from allennlp.data import DatasetReader [as 别名]
# 或者: from allennlp.data.DatasetReader import from_params [as 别名]
def from_archive(
cls,
archive: Archive,
predictor_name: str = None,
dataset_reader_to_load: str = "validation",
frozen: bool = True,
) -> "Predictor":
"""
Instantiate a `Predictor` from an [`Archive`](../models/archival.md);
that is, from the result of training a model. Optionally specify which `Predictor`
subclass; otherwise, we try to find a corresponding predictor in `DEFAULT_PREDICTORS`, or if
one is not found, the base class (i.e. `Predictor`) will be used. Optionally specify
which [`DatasetReader`](../data/dataset_readers/dataset_reader.md) should be loaded;
otherwise, the validation one will be used if it exists followed by the training dataset reader.
Optionally specify if the loaded model should be frozen, meaning `model.eval()` will be called.
"""
# Duplicate the config so that the config inside the archive doesn't get consumed
config = archive.config.duplicate()
if not predictor_name:
model_type = config.get("model").get("type")
model_class, _ = Model.resolve_class_name(model_type)
predictor_name = model_class.default_predictor
predictor_class: Type[Predictor] = Predictor.by_name( # type: ignore
predictor_name
) if predictor_name is not None else cls
if dataset_reader_to_load == "validation" and "validation_dataset_reader" in config:
dataset_reader_params = config["validation_dataset_reader"]
else:
dataset_reader_params = config["dataset_reader"]
dataset_reader = DatasetReader.from_params(dataset_reader_params)
model = archive.model
if frozen:
model.eval()
return predictor_class(model, dataset_reader)