本文整理汇总了Python中allennlp.data.dataset_readers.dataset_reader.DatasetReader.from_params方法的典型用法代码示例。如果您正苦于以下问题:Python DatasetReader.from_params方法的具体用法?Python DatasetReader.from_params怎么用?Python DatasetReader.from_params使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.dataset_readers.dataset_reader.DatasetReader
的用法示例。
在下文中一共展示了DatasetReader.from_params方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate_from_args
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
# Disable some of the more verbose logging statements
logging.getLogger('allennlp.common.params').disabled = True
logging.getLogger('allennlp.nn.initializers').disabled = True
logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
# Load from archive
archive = load_archive(args.archive_file, args.cuda_device, args.overrides)
config = archive.config
prepare_environment(config)
model = archive.model
model.eval()
# Load the evaluation data
dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.evaluation_data_file
logger.info("Reading evaluation data from %s", evaluation_data_path)
dataset = dataset_reader.read(evaluation_data_path)
iterator = DataIterator.from_params(config.pop("iterator"))
iterator.index_with(model.vocab)
metrics = evaluate(model, dataset, iterator, args.output_file)
logger.info("Finished evaluating.")
logger.info("Metrics:")
for key, metric in metrics.items():
logger.info("%s: %s", key, metric)
return metrics
示例2: datasets_from_params
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def datasets_from_params(params ) :
u"""
Load all the datasets specified by the config.
"""
dataset_reader = DatasetReader.from_params(params.pop(u'dataset_reader'))
validation_dataset_reader_params = params.pop(u"validation_dataset_reader", None)
validation_and_test_dataset_reader = dataset_reader
if validation_dataset_reader_params is not None:
logger.info(u"Using a separate dataset reader to load validation and test data.")
validation_and_test_dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
train_data_path = params.pop(u'train_data_path')
logger.info(u"Reading training data from %s", train_data_path)
train_data = dataset_reader.read(train_data_path)
datasets = {u"train": train_data}
validation_data_path = params.pop(u'validation_data_path', None)
if validation_data_path is not None:
logger.info(u"Reading validation data from %s", validation_data_path)
validation_data = validation_and_test_dataset_reader.read(validation_data_path)
datasets[u"validation"] = validation_data
test_data_path = params.pop(u"test_data_path", None)
if test_data_path is not None:
logger.info(u"Reading test data from %s", test_data_path)
test_data = validation_and_test_dataset_reader.read(test_data_path)
datasets[u"test"] = test_data
return datasets
示例3: target_to_lines
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def target_to_lines(archive_file, input_file, output_file, lowercase=True):
archive = load_archive(archive_file)
reader = DatasetReader.from_params(archive.config.pop("dataset_reader"))
with open(output_file, "w") as w:
for t in reader.parse_set(input_file):
target = t[1]
target = target.strip()
target = target.lower() if lowercase else target
w.write(target.replace("\n", " ") + "\n")
示例4: evaluate_from_args
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
# Disable some of the more verbose logging statements
logging.getLogger('allennlp.common.params').disabled = True
logging.getLogger('allennlp.nn.initializers').disabled = True
logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
# Load from archive
archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
config = archive.config
prepare_environment(config)
model = archive.model
model.eval()
# Load the evaluation data
# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
if validation_dataset_reader_params is not None:
dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
else:
dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file
logger.info("Reading evaluation data from %s", evaluation_data_path)
instances = dataset_reader.read(evaluation_data_path)
embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping)
if args.embedding_sources_mapping else {})
if args.extend_vocab:
logger.info("Vocabulary is being extended with test instances.")
model.vocab.extend_from_instances(Params({}), instances=instances)
model.extend_embedder_vocab(embedding_sources)
iterator_params = config.pop("validation_iterator", None)
if iterator_params is None:
iterator_params = config.pop("iterator")
iterator = DataIterator.from_params(iterator_params)
iterator.index_with(model.vocab)
metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key)
logger.info("Finished evaluating.")
logger.info("Metrics:")
for key, metric in metrics.items():
logger.info("%s: %s", key, metric)
output_file = args.output_file
if output_file:
with open(output_file, "w") as file:
json.dump(metrics, file, indent=4)
return metrics
示例5: evaluate_from_args
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
# Disable some of the more verbose logging statements
logging.getLogger('allennlp.common.params').disabled = True
logging.getLogger('allennlp.nn.initializers').disabled = True
logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
# Load from archive
archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
config = archive.config
prepare_environment(config)
model = archive.model
model.eval()
# Load the evaluation data
# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
if validation_dataset_reader_params is not None:
dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
else:
dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file
logger.info("Reading evaluation data from %s", evaluation_data_path)
instances = dataset_reader.read(evaluation_data_path)
embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {})
if args.extend_vocab:
logger.info("Vocabulary is being extended with test instances.")
model.vocab.extend_from_instances(Params({}), instances=instances)
model.extend_embedder_vocab(embedding_sources)
iterator_params = config.pop("validation_iterator", None)
if iterator_params is None:
iterator_params = config.pop("iterator")
iterator = DataIterator.from_params(iterator_params)
iterator.index_with(model.vocab)
metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key)
logger.info("Finished evaluating.")
logger.info("Metrics:")
for key, metric in metrics.items():
logger.info("%s: %s", key, metric)
output_file = args.output_file
if output_file:
with open(output_file, "w") as file:
json.dump(metrics, file, indent=4)
return metrics
示例6: evaluate_from_args
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
# Disable some of the more verbose logging statements
logging.getLogger("allennlp.common.params").disabled = True
logging.getLogger("allennlp.nn.initializers").disabled = True
logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel(logging.INFO)
# Load from archive
archive = load_archive(
args.archive_file,
weights_file=args.weights_file,
cuda_device=args.cuda_device,
overrides=args.overrides,
)
config = archive.config
prepare_environment(config)
model = archive.model
model.eval()
# Load the evaluation data
# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop("validation_dataset_reader", None)
if validation_dataset_reader_params is not None:
dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
else:
dataset_reader = DatasetReader.from_params(config.pop("dataset_reader"))
evaluation_data_path = args.input_file
logger.info("Reading evaluation data from %s", evaluation_data_path)
instances = dataset_reader.read(evaluation_data_path)
embedding_sources = (
json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}
)
if args.extend_vocab:
logger.info("Vocabulary is being extended with test instances.")
model.vocab.extend_from_instances(instances=instances)
model.extend_embedder_vocab(embedding_sources)
instances.index_with(model.vocab)
data_loader_params = config.pop("validation_data_loader", None)
if data_loader_params is None:
data_loader_params = config.pop("data_loader")
if args.batch_size:
data_loader_params["batch_size"] = args.batch_size
data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params)
metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key)
logger.info("Finished evaluating.")
dump_metrics(args.output_file, metrics, log=True)
return metrics
示例7: evaluate_from_args
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def evaluate_from_args(args ) :
# Disable some of the more verbose logging statements
logging.getLogger(u'allennlp.common.params').disabled = True
logging.getLogger(u'allennlp.nn.initializers').disabled = True
logging.getLogger(u'allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
# Load from archive
archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
config = archive.config
prepare_environment(config)
model = archive.model
model.eval()
# Load the evaluation data
# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop(u'validation_dataset_reader', None)
if validation_dataset_reader_params is not None:
dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
else:
dataset_reader = DatasetReader.from_params(config.pop(u'dataset_reader'))
evaluation_data_path = args.input_file
logger.info(u"Reading evaluation data from %s", evaluation_data_path)
instances = dataset_reader.read(evaluation_data_path)
iterator_params = config.pop(u"validation_iterator", None)
if iterator_params is None:
iterator_params = config.pop(u"iterator")
iterator = DataIterator.from_params(iterator_params)
iterator.index_with(model.vocab)
metrics = evaluate(model, instances, iterator, args.cuda_device)
logger.info(u"Finished evaluating.")
logger.info(u"Metrics:")
for key, metric in list(metrics.items()):
logger.info(u"%s: %s", key, metric)
output_file = args.output_file
if output_file:
with open(output_file, u"w") as file:
json.dump(metrics, file, indent=4)
return metrics
示例8: _test_model
# 需要导入模块: from allennlp.data.dataset_readers.dataset_reader import DatasetReader [as 别名]
# 或者: from allennlp.data.dataset_readers.dataset_reader.DatasetReader import from_params [as 别名]
def _test_model(self, file_name):
params = self.params[file_name].duplicate()
reader_params = params.duplicate().pop("reader", default=Params({}))
if reader_params["type"] == "cnn_dailymail":
reader_params["cnn_tokenized_dir"] = TEST_STORIES_DIR
dataset_file = TEST_URLS_FILE
elif reader_params["type"] == "ria":
dataset_file = RIA_EXAMPLE_FILE
else:
assert False
reader = DatasetReader.from_params(reader_params)
tokenizer = reader._tokenizer
dataset = reader.read(dataset_file)
vocabulary_params = params.pop("vocabulary", default=Params({}))
vocabulary = Vocabulary.from_params(vocabulary_params, instances=dataset)
model_params = params.pop("model")
model = Model.from_params(model_params, vocab=vocabulary)
print(model)
print("Trainable params count: ", sum(p.numel() for p in model.parameters() if p.requires_grad))
iterator = DataIterator.from_params(params.pop('iterator'))
iterator.index_with(vocabulary)
trainer = Trainer.from_params(model, None, iterator,
dataset, None, params.pop('trainer'))
trainer.train()
model.eval()
predictor = Seq2SeqPredictor(model, reader)
for article, reference_sents in reader.parse_set(dataset_file):
ref_words = [token.text for token in tokenizer.tokenize(reference_sents)]
decoded_words = predictor.predict(article)["predicted_tokens"]
self.assertGreaterEqual(len(decoded_words), len(ref_words))
unk_count = 0
while DEFAULT_OOV_TOKEN in decoded_words:
unk_index = decoded_words.index(DEFAULT_OOV_TOKEN)
decoded_words.pop(unk_index)
unk_count += 1
if unk_index < len(ref_words):
ref_words.pop(unk_index)
self.assertLess(unk_count, 5)
self.assertListEqual(decoded_words[:len(ref_words)], ref_words)