本文整理汇总了Python中allennlp.data.Vocabulary.from_instances方法的典型用法代码示例。如果您正苦于以下问题:Python Vocabulary.from_instances方法的具体用法?Python Vocabulary.from_instances怎么用?Python Vocabulary.from_instances使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.Vocabulary
的用法示例。
在下文中一共展示了Vocabulary.from_instances方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_batch_predictions_are_consistent
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def test_batch_predictions_are_consistent(self):
# The CNN encoder has problems with this kind of test - it's not properly masked yet, so
# changing the amount of padding in the batch will result in small differences in the
# output of the encoder. Because BiDAF is so deep, these differences get magnified through
# the network and make this test impossible. So, we'll remove the CNN encoder entirely
# from the model for this test. If/when we fix the CNN encoder to work correctly with
# masking, we can change this back to how the other models run this test, with just a
# single line.
# pylint: disable=protected-access,attribute-defined-outside-init
# Save some state.
saved_model = self.model
saved_instances = self.instances
# Modify the state, run the test with modified state.
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params['dataset_reader'])
reader._token_indexers = {'tokens': reader._token_indexers['tokens']}
self.instances = reader.read('tests/fixtures/data/squad.json')
vocab = Vocabulary.from_instances(self.instances)
for instance in self.instances:
instance.index_fields(vocab)
del params['model']['text_field_embedder']['token_characters']
params['model']['phrase_layer']['input_size'] = 2
self.model = Model.from_params(vocab, params['model'])
self.ensure_batch_predictions_are_consistent()
# Restore the state.
self.model = saved_model
self.instances = saved_instances
示例2: set_up_model
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def set_up_model(self, param_file, dataset_file):
# pylint: disable=attribute-defined-outside-init
self.param_file = param_file
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params['dataset_reader'])
instances = reader.read(dataset_file)
vocab = Vocabulary.from_instances(instances)
self.vocab = vocab
self.instances = instances
self.model = Model.from_params(self.vocab, params['model'])
# TODO(joelgrus) get rid of these
# (a lot of the model tests use them, so they'll have to be changed)
self.dataset = Batch(self.instances)
self.dataset.index_instances(self.vocab)
示例3: get_vocab_and_both_elmo_indexed_ids
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def get_vocab_and_both_elmo_indexed_ids(batch: List[List[str]]):
instances = []
indexer = ELMoTokenCharactersIndexer()
indexer2 = SingleIdTokenIndexer()
for sentence in batch:
tokens = [Token(token) for token in sentence]
field = TextField(tokens,
{'character_ids': indexer,
'tokens': indexer2})
instance = Instance({"elmo": field})
instances.append(instance)
dataset = Batch(instances)
vocab = Vocabulary.from_instances(instances)
dataset.index_instances(vocab)
return vocab, dataset.as_tensor_dict()["elmo"]
示例4: setUp
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def setUp(self):
super(TestOptimizer, self).setUp()
self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
vocab = Vocabulary.from_instances(self.instances)
self.model_params = Params({
"text_field_embedder": {
"tokens": {
"type": "embedding",
"embedding_dim": 5
}
},
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
"num_layers": 2
}
})
self.model = SimpleTagger.from_params(vocab=vocab, params=self.model_params)
示例5: set_up_model
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def set_up_model(self, param_file, dataset_file):
# pylint: disable=attribute-defined-outside-init
self.param_file = param_file
params = Params.from_file(self.param_file)
reader = DatasetReader.from_params(params['dataset_reader'])
instances = reader.read(dataset_file)
# Use parameters for vocabulary if they are present in the config file, so that choices like
# "non_padded_namespaces", "min_count" etc. can be set if needed.
if 'vocabulary' in params:
vocab_params = params['vocabulary']
vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
else:
vocab = Vocabulary.from_instances(instances)
self.vocab = vocab
self.instances = instances
self.model = Model.from_params(vocab=self.vocab, params=params['model'])
# TODO(joelgrus) get rid of these
# (a lot of the model tests use them, so they'll have to be changed)
self.dataset = Batch(self.instances)
self.dataset.index_instances(self.vocab)
示例6: setUp
# 需要导入模块: from allennlp.data import Vocabulary [as 别名]
# 或者: from allennlp.data.Vocabulary import from_instances [as 别名]
def setUp(self):
super(TestTrainer, self).setUp()
self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
vocab = Vocabulary.from_instances(self.instances)
self.vocab = vocab
self.model_params = Params({
"text_field_embedder": {
"tokens": {
"type": "embedding",
"embedding_dim": 5
}
},
"encoder": {
"type": "lstm",
"input_size": 5,
"hidden_size": 7,
"num_layers": 2
}
})
self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
self.iterator = BasicIterator(batch_size=2)
self.iterator.index_with(vocab)