本文整理汇总了Python中allennlp.data.iterators.BasicIterator类的典型用法代码示例。如果您正苦于以下问题:Python BasicIterator类的具体用法?Python BasicIterator怎么用?Python BasicIterator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BasicIterator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_trainer_can_run_multiple_gpu
def test_trainer_can_run_multiple_gpu(self):
multigpu_iterator = BasicIterator(batch_size=4)
multigpu_iterator.index_with(self.vocab)
trainer = Trainer(self.model, self.optimizer,
multigpu_iterator, self.instances, num_epochs=2,
cuda_device=[0, 1])
trainer.train()
示例2: test_trainer_can_run_multiple_gpu
def test_trainer_can_run_multiple_gpu(self):
class MetaDataCheckWrapper(Model):
"""
Checks that the metadata field has been correctly split across the batch dimension
when running on multiple gpus.
"""
def __init__(self, model):
super().__init__(model.vocab)
self.model = model
def forward(self, **kwargs) -> Dict[str, torch.Tensor]: # type: ignore # pylint: disable=arguments-differ
assert 'metadata' in kwargs and 'tags' in kwargs, \
f'tokens and metadata must be provided. Got {kwargs.keys()} instead.'
batch_size = kwargs['tokens']['tokens'].size()[0]
assert len(kwargs['metadata']) == batch_size, \
f'metadata must be split appropriately. Expected {batch_size} elements, ' \
f"got {len(kwargs['metadata'])} elements."
return self.model.forward(**kwargs)
multigpu_iterator = BasicIterator(batch_size=4)
multigpu_iterator.index_with(self.vocab)
trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer,
multigpu_iterator, self.instances, num_epochs=2,
cuda_device=[0, 1])
trainer.train()
示例3: test_create_batches_groups_correctly
def test_create_batches_groups_correctly(self):
# pylint: disable=protected-access
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2)
batches = list(iterator._create_batches(test_instances, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0], self.instances[1]],
[self.instances[2], self.instances[3]],
[self.instances[4]]]
示例4: test_can_optimise_model_with_dense_and_sparse_params
def test_can_optimise_model_with_dense_and_sparse_params(self):
optimizer_params = Params({
"type": "dense_sparse_adam"
})
parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
optimizer = Optimizer.from_params(parameters, optimizer_params)
iterator = BasicIterator(2)
iterator.index_with(self.vocab)
Trainer(self.model, optimizer, iterator, self.instances).train()
示例5: test_from_params
def test_from_params(self):
# pylint: disable=protected-access
params = Params({})
iterator = BasicIterator.from_params(params)
assert iterator._batch_size == 32 # default value
params = Params({"batch_size": 10})
iterator = BasicIterator.from_params(params)
assert iterator._batch_size == 10
示例6: test_epoch_tracking_multiple_epochs
def test_epoch_tracking_multiple_epochs(self):
iterator = BasicIterator(batch_size=2, track_epoch=True)
iterator.index_with(self.vocab)
all_batches = list(iterator(self.instances, num_epochs=10))
assert len(all_batches) == 10 * 3
for i, batch in enumerate(all_batches):
# Should have 3 batches per epoch
epoch = i // 3
assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
示例7: test_max_instances_in_memory
def test_max_instances_in_memory(self):
# pylint: disable=protected-access
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2, max_instances_in_memory=3)
# One epoch: 5 instances -> [2, 1, 2]
batches = list(iterator._create_batches(test_instances, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0], self.instances[1]],
[self.instances[2]],
[self.instances[3], self.instances[4]]]
示例8: test_yield_one_epoch_iterates_over_the_data_once
def test_yield_one_epoch_iterates_over_the_data_once(self):
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2)
iterator.index_with(self.vocab)
batches = list(iterator(test_instances, num_epochs=1))
# We just want to get the single-token array for the text field in the instance.
instances = [tuple(instance.detach().cpu().numpy())
for batch in batches
for instance in batch['text']["tokens"]]
assert len(instances) == 5
self.assert_instances_are_correct(instances)
示例9: test_trainer_can_log_learning_rates_tensorboard
def test_trainer_can_log_learning_rates_tensorboard(self):
iterator = BasicIterator(batch_size=4)
iterator.index_with(self.vocab)
trainer = Trainer(self.model, self.optimizer,
iterator, self.instances, num_epochs=2,
serialization_dir=self.TEST_DIR,
should_log_learning_rate=True,
summary_interval=2)
trainer.train()
示例10: test_call_iterates_over_data_forever
def test_call_iterates_over_data_forever(self):
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2)
iterator.index_with(self.vocab)
generator = iterator(test_instances)
batches = [next(generator) for _ in range(18)] # going over the data 6 times
# We just want to get the single-token array for the text field in the instance.
instances = [tuple(instance.detach().cpu().numpy())
for batch in batches
for instance in batch['text']["tokens"]]
assert len(instances) == 5 * 6
self.assert_instances_are_correct(instances)
示例11: test_with_iterator
def test_with_iterator(self):
reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2)
instances = reader.read(self.glob)
iterator = BasicIterator(batch_size=32)
iterator.index_with(self.vocab)
batches = [batch for batch in iterator(instances, num_epochs=1)]
# 400 instances / batch_size 32 = 12 full batches + 1 batch of 16
sizes = sorted([len(batch['tags']) for batch in batches])
assert sizes == [16] + 12 * [32]
示例12: test_epoch_tracking_forever
def test_epoch_tracking_forever(self):
iterator = BasicIterator(batch_size=2, track_epoch=True)
iterator.index_with(self.vocab)
it = iterator(self.instances, num_epochs=None)
all_batches = [next(it) for _ in range(30)]
assert len(all_batches) == 30
for i, batch in enumerate(all_batches):
# Should have 3 batches per epoch
epoch = i // 3
assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
示例13: test_elmo_bilm
def test_elmo_bilm(self):
# get the raw data
sentences, expected_lm_embeddings = self._load_sentences_embeddings()
# load the test model
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
# Deal with the data.
indexer = ELMoTokenCharactersIndexer()
# For each sentence, first create a TextField, then create an instance
instances = []
for batch in zip(*sentences):
for sentence in batch:
tokens = [Token(token) for token in sentence.split()]
field = TextField(tokens, {'character_ids': indexer})
instance = Instance({"elmo": field})
instances.append(instance)
vocab = Vocabulary()
# Now finally we can iterate through batches.
iterator = BasicIterator(3)
iterator.index_with(vocab)
for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
lm_embeddings = elmo_bilm(batch['elmo']['character_ids'])
top_layer_embeddings, mask = remove_sentence_boundaries(
lm_embeddings['activations'][2],
lm_embeddings['mask']
)
# check the mask lengths
lengths = mask.data.numpy().sum(axis=1)
batch_sentences = [sentences[k][i] for k in range(3)]
expected_lengths = [
len(sentence.split()) for sentence in batch_sentences
]
self.assertEqual(lengths.tolist(), expected_lengths)
# get the expected embeddings and compare!
expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
for k in range(3):
self.assertTrue(
numpy.allclose(
top_layer_embeddings[k, :lengths[k], :].data.numpy(),
expected_top_layer[k],
atol=1.0e-6
)
)
示例14: main
def main(serialization_directory, device):
"""
serialization_directory : str, required.
The directory containing the serialized weights.
device: int, default = -1
The device to run the evaluation on.
"""
config = Params.from_file(os.path.join(serialization_directory, "config.json"))
dataset_reader = DatasetReader.from_params(config['dataset_reader'])
evaluation_data_path = config['validation_data_path']
model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)
prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
gold_file_path = os.path.join(serialization_directory, "gold.txt")
prediction_file = open(prediction_file_path, "w+")
gold_file = open(gold_file_path, "w+")
# Load the evaluation data and index it.
print("Reading evaluation data from {}".format(evaluation_data_path))
instances = dataset_reader.read(evaluation_data_path)
iterator = BasicIterator(batch_size=32)
iterator.index_with(model.vocab)
model_predictions = []
batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
for batch in Tqdm.tqdm(batches):
result = model(**batch)
predictions = model.decode(result)
model_predictions.extend(predictions["tags"])
for instance, prediction in zip(instances, model_predictions):
fields = instance.fields
try:
# Most sentences have a verbal predicate, but not all.
verb_index = fields["verb_indicator"].labels.index(1)
except ValueError:
verb_index = None
gold_tags = fields["tags"].labels
sentence = fields["tokens"].tokens
write_to_conll_eval_file(prediction_file, gold_file,
verb_index, sentence, prediction, gold_tags)
prediction_file.close()
gold_file.close()
示例15: test_multiple_cursors
def test_multiple_cursors(self):
# pylint: disable=protected-access
lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))
lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))
eager_instances1 = self.instances[:]
eager_instances2 = self.instances[:]
for instances1, instances2 in [(eager_instances1, eager_instances2),
(lazy_instances1, lazy_instances2)]:
iterator = BasicIterator(batch_size=1, instances_per_epoch=2)
iterator.index_with(self.vocab)
# First epoch through dataset1
batches = list(iterator._create_batches(instances1, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0]], [self.instances[1]]]
# First epoch through dataset2
batches = list(iterator._create_batches(instances2, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0]], [self.instances[1]]]
# Second epoch through dataset1
batches = list(iterator._create_batches(instances1, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
# Second epoch through dataset2
batches = list(iterator._create_batches(instances2, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[2]], [self.instances[3]]]