当前位置: 首页>>代码示例>>Python>>正文


Python BasicIterator.index_with方法代码示例

本文整理汇总了Python中allennlp.data.iterators.BasicIterator.index_with方法的典型用法代码示例。如果您正苦于以下问题:Python BasicIterator.index_with方法的具体用法?Python BasicIterator.index_with怎么用?Python BasicIterator.index_with使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.iterators.BasicIterator的用法示例。


在下文中一共展示了BasicIterator.index_with方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_multiple_cursors

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_multiple_cursors(self):
        # pylint: disable=protected-access
        lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))
        lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))

        eager_instances1 = self.instances[:]
        eager_instances2 = self.instances[:]

        for instances1, instances2 in [(eager_instances1, eager_instances2),
                                       (lazy_instances1, lazy_instances2)]:
            iterator = BasicIterator(batch_size=1, instances_per_epoch=2)
            iterator.index_with(self.vocab)

            # First epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]], [self.instances[1]]]

            # First epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]], [self.instances[1]]]

            # Second epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]], [self.instances[3]]]

            # Second epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
开发者ID:apmoore1,项目名称:allennlp,代码行数:34,代码来源:basic_iterator_test.py

示例2: test_trainer_can_run_multiple_gpu

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
 def test_trainer_can_run_multiple_gpu(self):
     multigpu_iterator = BasicIterator(batch_size=4)
     multigpu_iterator.index_with(self.vocab)
     trainer = Trainer(self.model, self.optimizer,
                       multigpu_iterator, self.instances, num_epochs=2,
                       cuda_device=[0, 1])
     trainer.train()
开发者ID:pyknife,项目名称:allennlp,代码行数:9,代码来源:trainer_test.py

示例3: test_trainer_can_run_multiple_gpu

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_trainer_can_run_multiple_gpu(self):

        class MetaDataCheckWrapper(Model):
            """
            Checks that the metadata field has been correctly split across the batch dimension
            when running on multiple gpus.
            """
            def __init__(self, model):
                super().__init__(model.vocab)
                self.model = model

            def forward(self, **kwargs) -> Dict[str, torch.Tensor]:  # type: ignore # pylint: disable=arguments-differ
                assert 'metadata' in kwargs and 'tags' in kwargs, \
                    f'tokens and metadata must be provided. Got {kwargs.keys()} instead.'
                batch_size = kwargs['tokens']['tokens'].size()[0]
                assert len(kwargs['metadata']) == batch_size, \
                    f'metadata must be split appropriately. Expected {batch_size} elements, ' \
                    f"got {len(kwargs['metadata'])} elements."
                return self.model.forward(**kwargs)

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer,
                          multigpu_iterator, self.instances, num_epochs=2,
                          cuda_device=[0, 1])
        trainer.train()
开发者ID:ziaridoy20,项目名称:allennlp,代码行数:28,代码来源:trainer_test.py

示例4: test_can_optimise_model_with_dense_and_sparse_params

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({
             "type": "dense_sparse_adam"
     })
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(parameters, optimizer_params)
     iterator = BasicIterator(2)
     iterator.index_with(self.vocab)
     Trainer(self.model, optimizer, iterator, self.instances).train()
开发者ID:apmoore1,项目名称:allennlp,代码行数:11,代码来源:optimizer_test.py

示例5: test_epoch_tracking_multiple_epochs

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_epoch_tracking_multiple_epochs(self):
        iterator = BasicIterator(batch_size=2, track_epoch=True)
        iterator.index_with(self.vocab)

        all_batches = list(iterator(self.instances, num_epochs=10))
        assert len(all_batches) == 10 * 3
        for i, batch in enumerate(all_batches):
            # Should have 3 batches per epoch
            epoch = i // 3
            assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
开发者ID:apmoore1,项目名称:allennlp,代码行数:12,代码来源:basic_iterator_test.py

示例6: test_trainer_can_log_learning_rates_tensorboard

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_trainer_can_log_learning_rates_tensorboard(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          should_log_learning_rate=True,
                          summary_interval=2)

        trainer.train()
开发者ID:ziaridoy20,项目名称:allennlp,代码行数:13,代码来源:trainer_test.py

示例7: test_yield_one_epoch_iterates_over_the_data_once

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
 def test_yield_one_epoch_iterates_over_the_data_once(self):
     for test_instances in (self.instances, self.lazy_instances):
         iterator = BasicIterator(batch_size=2)
         iterator.index_with(self.vocab)
         batches = list(iterator(test_instances, num_epochs=1))
         # We just want to get the single-token array for the text field in the instance.
         instances = [tuple(instance.detach().cpu().numpy())
                      for batch in batches
                      for instance in batch['text']["tokens"]]
         assert len(instances) == 5
         self.assert_instances_are_correct(instances)
开发者ID:apmoore1,项目名称:allennlp,代码行数:13,代码来源:basic_iterator_test.py

示例8: test_with_iterator

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_with_iterator(self):
        reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2)
        instances = reader.read(self.glob)

        iterator = BasicIterator(batch_size=32)
        iterator.index_with(self.vocab)

        batches = [batch for batch in iterator(instances, num_epochs=1)]

        # 400 instances / batch_size 32 = 12 full batches + 1 batch of 16
        sizes = sorted([len(batch['tags']) for batch in batches])
        assert sizes == [16] + 12 * [32]
开发者ID:apmoore1,项目名称:allennlp,代码行数:14,代码来源:multiprocess_dataset_reader_test.py

示例9: test_call_iterates_over_data_forever

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
 def test_call_iterates_over_data_forever(self):
     for test_instances in (self.instances, self.lazy_instances):
         iterator = BasicIterator(batch_size=2)
         iterator.index_with(self.vocab)
         generator = iterator(test_instances)
         batches = [next(generator) for _ in range(18)]  # going over the data 6 times
         # We just want to get the single-token array for the text field in the instance.
         instances = [tuple(instance.detach().cpu().numpy())
                      for batch in batches
                      for instance in batch['text']["tokens"]]
         assert len(instances) == 5 * 6
         self.assert_instances_are_correct(instances)
开发者ID:apmoore1,项目名称:allennlp,代码行数:14,代码来源:basic_iterator_test.py

示例10: test_epoch_tracking_forever

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_epoch_tracking_forever(self):
        iterator = BasicIterator(batch_size=2, track_epoch=True)
        iterator.index_with(self.vocab)

        it = iterator(self.instances, num_epochs=None)

        all_batches = [next(it) for _ in range(30)]

        assert len(all_batches) == 30
        for i, batch in enumerate(all_batches):
            # Should have 3 batches per epoch
            epoch = i // 3
            assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
开发者ID:apmoore1,项目名称:allennlp,代码行数:15,代码来源:basic_iterator_test.py

示例11: test_elmo_bilm

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_elmo_bilm(self):
        # get the raw data
        sentences, expected_lm_embeddings = self._load_sentences_embeddings()

        # load the test model
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)

        # Deal with the data.
        indexer = ELMoTokenCharactersIndexer()

        # For each sentence, first create a TextField, then create an instance
        instances = []
        for batch in zip(*sentences):
            for sentence in batch:
                tokens = [Token(token) for token in sentence.split()]
                field = TextField(tokens, {'character_ids': indexer})
                instance = Instance({"elmo": field})
                instances.append(instance)

        vocab = Vocabulary()

        # Now finally we can iterate through batches.
        iterator = BasicIterator(3)
        iterator.index_with(vocab)
        for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
            lm_embeddings = elmo_bilm(batch['elmo']['character_ids'])
            top_layer_embeddings, mask = remove_sentence_boundaries(
                    lm_embeddings['activations'][2],
                    lm_embeddings['mask']
            )

            # check the mask lengths
            lengths = mask.data.numpy().sum(axis=1)
            batch_sentences = [sentences[k][i] for k in range(3)]
            expected_lengths = [
                    len(sentence.split()) for sentence in batch_sentences
            ]
            self.assertEqual(lengths.tolist(), expected_lengths)

            # get the expected embeddings and compare!
            expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
            for k in range(3):
                self.assertTrue(
                        numpy.allclose(
                                top_layer_embeddings[k, :lengths[k], :].data.numpy(),
                                expected_top_layer[k],
                                atol=1.0e-6
                        )
                )
开发者ID:pyknife,项目名称:allennlp,代码行数:51,代码来源:elmo_test.py

示例12: main

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
def main(serialization_directory, device):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    """

    config = Params.from_file(os.path.join(serialization_directory, "config.json"))
    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = config['validation_data_path']

    model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)

    prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
    gold_file_path = os.path.join(serialization_directory, "gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("Reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)
    iterator = BasicIterator(batch_size=32)
    iterator.index_with(model.vocab)

    model_predictions = []
    batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
    for batch in Tqdm.tqdm(batches):
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    for instance, prediction in zip(instances, model_predictions):
        fields = instance.fields
        try:
            # Most sentences have a verbal predicate, but not all.
            verb_index = fields["verb_indicator"].labels.index(1)
        except ValueError:
            verb_index = None

        gold_tags = fields["tags"].labels
        sentence = fields["tokens"].tokens

        write_to_conll_eval_file(prediction_file, gold_file,
                                 verb_index, sentence, prediction, gold_tags)
    prediction_file.close()
    gold_file.close()
开发者ID:Jordan-Sauchuk,项目名称:allennlp,代码行数:49,代码来源:write_srl_predictions_to_conll_format.py

示例13: test_maximum_samples_per_batch

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_maximum_samples_per_batch(self):
        for test_instances in (self.instances, self.lazy_instances):
            # pylint: disable=protected-access
            iterator = BasicIterator(
                    batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
            )
            iterator.index_with(self.vocab)
            batches = list(iterator._create_batches(test_instances, shuffle=False))
            stats = self.get_batches_stats(batches)

            # ensure all instances are in a batch
            assert stats['total_instances'] == len(self.instances)

            # ensure correct batch sizes
            assert stats['batch_lengths'] == [2, 1, 1, 1]

            # ensure correct sample sizes (<= 9)
            assert stats['sample_sizes'] == [8, 3, 9, 1]
开发者ID:apmoore1,项目名称:allennlp,代码行数:20,代码来源:basic_iterator_test.py

示例14: test_maximum_samples_per_batch_packs_tightly

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_maximum_samples_per_batch_packs_tightly(self):
        # pylint: disable=protected-access
        token_counts = [10, 4, 3]
        test_instances = self.create_instances_from_token_counts(token_counts)

        iterator = BasicIterator(
                batch_size=3, maximum_samples_per_batch=['num_tokens', 11]
        )
        iterator.index_with(self.vocab)
        batches = list(iterator._create_batches(test_instances, shuffle=False))
        stats = self.get_batches_stats(batches)

        # ensure all instances are in a batch
        assert stats['total_instances'] == len(token_counts)

        # ensure correct batch sizes
        assert stats['batch_lengths'] == [1, 2]

        # ensure correct sample sizes (<= 11)
        assert stats['sample_sizes'] == [10, 8]
开发者ID:apmoore1,项目名称:allennlp,代码行数:22,代码来源:basic_iterator_test.py

示例15: test_maximum_samples_per_batch

# 需要导入模块: from allennlp.data.iterators import BasicIterator [as 别名]
# 或者: from allennlp.data.iterators.BasicIterator import index_with [as 别名]
    def test_maximum_samples_per_batch(self):
        for test_instances in (self.instances, self.lazy_instances):
            # pylint: disable=protected-access
            iterator = BasicIterator(
                    batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
            )
            iterator.index_with(self.vocab)
            batches = list(iterator._create_batches(test_instances, shuffle=False))

            # ensure all instances are in a batch
            grouped_instances = [batch.instances for batch in batches]
            num_instances = sum(len(group) for group in grouped_instances)
            assert num_instances == len(self.instances)

            # ensure all batches are sufficiently small
            for batch in batches:
                batch_sequence_length = max(
                        [instance.get_padding_lengths()['text']['num_tokens']
                         for instance in batch.instances]
                )
                assert batch_sequence_length * len(batch.instances) <= 9
开发者ID:ziaridoy20,项目名称:allennlp,代码行数:23,代码来源:basic_iterator_test.py


注:本文中的allennlp.data.iterators.BasicIterator.index_with方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。