本文整理汇总了Python中allennlp.data.iterators.BasicIterator.index_with方法的典型用法代码示例。如果您正苦于以下问题:Python BasicIterator.index_with方法的具体用法?Python BasicIterator.index_with怎么用?Python BasicIterator.index_with使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.iterators.BasicIterator
的用法示例。
在下文中一共展示了BasicIterator.index_with方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_multiple_cursors
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_multiple_cursors(self):
# pylint: disable=protected-access
lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))
lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))
eager_instances1 = self.instances[:]
eager_instances2 = self.instances[:]
for instances1, instances2 in [(eager_instances1, eager_instances2),
(lazy_instances1, lazy_instances2)]:
iterator = BasicIterator(batch_size=1, instances_per_epoch=2)
iterator.index_with(self.vocab)
# First epoch through dataset1
batches = list(iterator._create_batches(instances1, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0]], [self.instances[1]]]
# First epoch through dataset2
batches = list(iterator._create_batches(instances2, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[0]], [self.instances[1]]]
# Second epoch through dataset1
batches = list(iterator._create_batches(instances1, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
# Second epoch through dataset2
batches = list(iterator._create_batches(instances2, shuffle=False))
grouped_instances = [batch.instances for batch in batches]
assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
示例2: test_trainer_can_run_multiple_gpu
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_trainer_can_run_multiple_gpu(self):
multigpu_iterator = BasicIterator(batch_size=4)
multigpu_iterator.index_with(self.vocab)
trainer = Trainer(self.model, self.optimizer,
multigpu_iterator, self.instances, num_epochs=2,
cuda_device=[0, 1])
trainer.train()
示例3: test_trainer_can_run_multiple_gpu
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_trainer_can_run_multiple_gpu(self):
class MetaDataCheckWrapper(Model):
"""
Checks that the metadata field has been correctly split across the batch dimension
when running on multiple gpus.
"""
def __init__(self, model):
super().__init__(model.vocab)
self.model = model
def forward(self, **kwargs) -> Dict[str, torch.Tensor]: # type: ignore # pylint: disable=arguments-differ
assert 'metadata' in kwargs and 'tags' in kwargs, \
f'tokens and metadata must be provided. Got {kwargs.keys()} instead.'
batch_size = kwargs['tokens']['tokens'].size()[0]
assert len(kwargs['metadata']) == batch_size, \
f'metadata must be split appropriately. Expected {batch_size} elements, ' \
f"got {len(kwargs['metadata'])} elements."
return self.model.forward(**kwargs)
multigpu_iterator = BasicIterator(batch_size=4)
multigpu_iterator.index_with(self.vocab)
trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer,
multigpu_iterator, self.instances, num_epochs=2,
cuda_device=[0, 1])
trainer.train()
示例4: test_can_optimise_model_with_dense_and_sparse_params
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_can_optimise_model_with_dense_and_sparse_params(self):
optimizer_params = Params({
"type": "dense_sparse_adam"
})
parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
optimizer = Optimizer.from_params(parameters, optimizer_params)
iterator = BasicIterator(2)
iterator.index_with(self.vocab)
Trainer(self.model, optimizer, iterator, self.instances).train()
示例5: test_epoch_tracking_multiple_epochs
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_epoch_tracking_multiple_epochs(self):
iterator = BasicIterator(batch_size=2, track_epoch=True)
iterator.index_with(self.vocab)
all_batches = list(iterator(self.instances, num_epochs=10))
assert len(all_batches) == 10 * 3
for i, batch in enumerate(all_batches):
# Should have 3 batches per epoch
epoch = i // 3
assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
示例6: test_trainer_can_log_learning_rates_tensorboard
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_trainer_can_log_learning_rates_tensorboard(self):
iterator = BasicIterator(batch_size=4)
iterator.index_with(self.vocab)
trainer = Trainer(self.model, self.optimizer,
iterator, self.instances, num_epochs=2,
serialization_dir=self.TEST_DIR,
should_log_learning_rate=True,
summary_interval=2)
trainer.train()
示例7: test_yield_one_epoch_iterates_over_the_data_once
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_yield_one_epoch_iterates_over_the_data_once(self):
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2)
iterator.index_with(self.vocab)
batches = list(iterator(test_instances, num_epochs=1))
# We just want to get the single-token array for the text field in the instance.
instances = [tuple(instance.detach().cpu().numpy())
for batch in batches
for instance in batch['text']["tokens"]]
assert len(instances) == 5
self.assert_instances_are_correct(instances)
示例8: test_with_iterator
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_with_iterator(self):
reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2)
instances = reader.read(self.glob)
iterator = BasicIterator(batch_size=32)
iterator.index_with(self.vocab)
batches = [batch for batch in iterator(instances, num_epochs=1)]
# 400 instances / batch_size 32 = 12 full batches + 1 batch of 16
sizes = sorted([len(batch['tags']) for batch in batches])
assert sizes == [16] + 12 * [32]
示例9: test_call_iterates_over_data_forever
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_call_iterates_over_data_forever(self):
for test_instances in (self.instances, self.lazy_instances):
iterator = BasicIterator(batch_size=2)
iterator.index_with(self.vocab)
generator = iterator(test_instances)
batches = [next(generator) for _ in range(18)] # going over the data 6 times
# We just want to get the single-token array for the text field in the instance.
instances = [tuple(instance.detach().cpu().numpy())
for batch in batches
for instance in batch['text']["tokens"]]
assert len(instances) == 5 * 6
self.assert_instances_are_correct(instances)
示例10: test_epoch_tracking_forever
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_epoch_tracking_forever(self):
iterator = BasicIterator(batch_size=2, track_epoch=True)
iterator.index_with(self.vocab)
it = iterator(self.instances, num_epochs=None)
all_batches = [next(it) for _ in range(30)]
assert len(all_batches) == 30
for i, batch in enumerate(all_batches):
# Should have 3 batches per epoch
epoch = i // 3
assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
示例11: test_elmo_bilm
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_elmo_bilm(self):
# get the raw data
sentences, expected_lm_embeddings = self._load_sentences_embeddings()
# load the test model
elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)
# Deal with the data.
indexer = ELMoTokenCharactersIndexer()
# For each sentence, first create a TextField, then create an instance
instances = []
for batch in zip(*sentences):
for sentence in batch:
tokens = [Token(token) for token in sentence.split()]
field = TextField(tokens, {'character_ids': indexer})
instance = Instance({"elmo": field})
instances.append(instance)
vocab = Vocabulary()
# Now finally we can iterate through batches.
iterator = BasicIterator(3)
iterator.index_with(vocab)
for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
lm_embeddings = elmo_bilm(batch['elmo']['character_ids'])
top_layer_embeddings, mask = remove_sentence_boundaries(
lm_embeddings['activations'][2],
lm_embeddings['mask']
)
# check the mask lengths
lengths = mask.data.numpy().sum(axis=1)
batch_sentences = [sentences[k][i] for k in range(3)]
expected_lengths = [
len(sentence.split()) for sentence in batch_sentences
]
self.assertEqual(lengths.tolist(), expected_lengths)
# get the expected embeddings and compare!
expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
for k in range(3):
self.assertTrue(
numpy.allclose(
top_layer_embeddings[k, :lengths[k], :].data.numpy(),
expected_top_layer[k],
atol=1.0e-6
)
)
示例12: main
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def main(serialization_directory, device):
"""
serialization_directory : str, required.
The directory containing the serialized weights.
device: int, default = -1
The device to run the evaluation on.
"""
config = Params.from_file(os.path.join(serialization_directory, "config.json"))
dataset_reader = DatasetReader.from_params(config['dataset_reader'])
evaluation_data_path = config['validation_data_path']
model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)
prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
gold_file_path = os.path.join(serialization_directory, "gold.txt")
prediction_file = open(prediction_file_path, "w+")
gold_file = open(gold_file_path, "w+")
# Load the evaluation data and index it.
print("Reading evaluation data from {}".format(evaluation_data_path))
instances = dataset_reader.read(evaluation_data_path)
iterator = BasicIterator(batch_size=32)
iterator.index_with(model.vocab)
model_predictions = []
batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
for batch in Tqdm.tqdm(batches):
result = model(**batch)
predictions = model.decode(result)
model_predictions.extend(predictions["tags"])
for instance, prediction in zip(instances, model_predictions):
fields = instance.fields
try:
# Most sentences have a verbal predicate, but not all.
verb_index = fields["verb_indicator"].labels.index(1)
except ValueError:
verb_index = None
gold_tags = fields["tags"].labels
sentence = fields["tokens"].tokens
write_to_conll_eval_file(prediction_file, gold_file,
verb_index, sentence, prediction, gold_tags)
prediction_file.close()
gold_file.close()
示例13: test_maximum_samples_per_batch
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_maximum_samples_per_batch(self):
for test_instances in (self.instances, self.lazy_instances):
# pylint: disable=protected-access
iterator = BasicIterator(
batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
)
iterator.index_with(self.vocab)
batches = list(iterator._create_batches(test_instances, shuffle=False))
stats = self.get_batches_stats(batches)
# ensure all instances are in a batch
assert stats['total_instances'] == len(self.instances)
# ensure correct batch sizes
assert stats['batch_lengths'] == [2, 1, 1, 1]
# ensure correct sample sizes (<= 9)
assert stats['sample_sizes'] == [8, 3, 9, 1]
示例14: test_maximum_samples_per_batch_packs_tightly
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_maximum_samples_per_batch_packs_tightly(self):
# pylint: disable=protected-access
token_counts = [10, 4, 3]
test_instances = self.create_instances_from_token_counts(token_counts)
iterator = BasicIterator(
batch_size=3, maximum_samples_per_batch=['num_tokens', 11]
)
iterator.index_with(self.vocab)
batches = list(iterator._create_batches(test_instances, shuffle=False))
stats = self.get_batches_stats(batches)
# ensure all instances are in a batch
assert stats['total_instances'] == len(token_counts)
# ensure correct batch sizes
assert stats['batch_lengths'] == [1, 2]
# ensure correct sample sizes (<= 11)
assert stats['sample_sizes'] == [10, 8]
示例15: test_maximum_samples_per_batch
# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def test_maximum_samples_per_batch(self):
for test_instances in (self.instances, self.lazy_instances):
# pylint: disable=protected-access
iterator = BasicIterator(
batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
)
iterator.index_with(self.vocab)
batches = list(iterator._create_batches(test_instances, shuffle=False))
# ensure all instances are in a batch
grouped_instances = [batch.instances for batch in batches]
num_instances = sum(len(group) for group in grouped_instances)
assert num_instances == len(self.instances)
# ensure all batches are sufficiently small
for batch in batches:
batch_sequence_length = max(
[instance.get_padding_lengths()['text']['num_tokens']
for instance in batch.instances]
)
assert batch_sequence_length * len(batch.instances) <= 9