本文整理汇总了Python中allennlp.data.Instance方法的典型用法代码示例。如果您正苦于以下问题:Python data.Instance方法的具体用法?Python data.Instance怎么用?Python data.Instance使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data
的用法示例。
在下文中一共展示了data.Instance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def evaluate(model: Model,
instances: Iterable[Instance],
data_iterator: DataIterator,
output_file: str = None,
eval_type: str = None) -> Dict[str, Any]:
model.eval()
iterator = data_iterator(instances, num_epochs=1)
logger.info("Iterating over dataset")
generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
with ExitStack() as stack:
if output_file is None:
file_handle = None
else:
file_handle = stack.enter_context(open(output_file, 'w'))
for batch in generator_tqdm:
model_output = model(**batch)
metrics = model.get_metrics()
if file_handle:
_persist_data(file_handle, batch.get("metadata"), model_output, eval_type)
description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
generator_tqdm.set_description(description)
return model.get_metrics(reset=True)
示例2: json_to_labeled_instances
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def json_to_labeled_instances(self, inputs: JsonDict) -> List[Instance]:
"""
Converts incoming json to a [`Instance`](../data/instance.md),
runs the model on the newly created instance, and adds labels to the
`Instance`s given by the model's output.
# Returns
`List[instance]`
A list of `Instance`'s.
"""
instance = self._json_to_instance(inputs)
outputs = self._model.forward_on_instance(instance)
new_instances = self.predictions_to_labeled_instances(instance, outputs)
return new_instances
示例3: evaluate
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def evaluate(model: Model,
instances: Iterable[Instance],
data_iterator: DataIterator,
output_file: str = None) -> Dict[str, Any]:
model.eval()
iterator = data_iterator(instances, num_epochs=1)
logger.info("Iterating over dataset")
generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
with ExitStack() as stack:
if output_file is None:
file_handle = None
else:
file_handle = stack.enter_context(open(output_file, 'w'))
for batch in generator_tqdm:
model_output = model(**batch)
metrics = model.get_metrics()
if file_handle:
id2label = model.vocab.get_index_to_token_vocabulary("labels")
_persist_data(file_handle, batch.get("metadata"), model_output, id2label=id2label)
description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
generator_tqdm.set_description(description)
return model.get_metrics()
示例4: get_instances
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def get_instances(self):
field1 = TextField(
[Token(t) for t in ["this", "is", "a", "sentence", "."]], self.token_indexer
)
field2 = TextField(
[Token(t) for t in ["this", "is", "a", "different", "sentence", "."]],
self.token_indexer,
)
field3 = TextField(
[Token(t) for t in ["here", "is", "a", "sentence", "."]], self.token_indexer
)
field4 = TextField([Token(t) for t in ["this", "is", "short"]], self.token_indexer)
instances = [
Instance({"text1": field1, "text2": field2}),
Instance({"text1": field3, "text2": field4}),
]
return instances
示例5: test_duplicate
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_duplicate(self):
# Verify the `duplicate()` method works with a `PretrainedTransformerIndexer` in
# a `TextField`. See https://github.com/allenai/allennlp/issues/4270.
instance = Instance(
{
"words": TextField(
[Token("hello")], {"tokens": PretrainedTransformerIndexer("bert-base-uncased")}
)
}
)
other = instance.duplicate()
assert other == instance
# Adding new fields to the original instance should not effect the duplicate.
instance.add_field("labels", LabelField("some_label"))
assert "labels" not in other.fields
assert other != instance # sanity check on the '__eq__' method.
示例6: test_saving_and_loading_works_with_byte_encoding
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_saving_and_loading_works_with_byte_encoding(self):
# We're going to set a vocabulary from a TextField using byte encoding, index it, save the
# vocab, load the vocab, then index the text field again, and make sure we get the same
# result.
tokenizer = CharacterTokenizer(byte_encoding="utf-8")
token_indexer = TokenCharactersIndexer(character_tokenizer=tokenizer, min_padding_length=2)
tokens = [Token(t) for t in ["Øyvind", "für", "汉字"]]
text_field = TextField(tokens, {"characters": token_indexer})
dataset = Batch([Instance({"sentence": text_field})])
vocab = Vocabulary.from_instances(dataset)
text_field.index(vocab)
indexed_tokens = deepcopy(text_field._indexed_tokens)
vocab_dir = self.TEST_DIR / "vocab_save"
vocab.save_to_files(vocab_dir)
vocab2 = Vocabulary.from_files(vocab_dir)
text_field2 = TextField(tokens, {"characters": token_indexer})
text_field2.index(vocab2)
indexed_tokens2 = deepcopy(text_field2._indexed_tokens)
assert indexed_tokens == indexed_tokens2
示例7: test_from_params_extend_config
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_from_params_extend_config(self):
vocab_dir = self.TEST_DIR / "vocab_save"
original_vocab = Vocabulary(non_padded_namespaces=["tokens"])
original_vocab.add_token_to_namespace("a", namespace="tokens")
original_vocab.save_to_files(vocab_dir)
text_field = TextField(
[Token(t) for t in ["a", "b"]], {"tokens": SingleIdTokenIndexer("tokens")}
)
instances = Batch([Instance({"text": text_field})])
# If you ask to extend vocab from `directory`, instances must be passed
# in Vocabulary constructor, or else there is nothing to extend to.
params = Params({"type": "extend", "directory": vocab_dir})
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(params)
# If you ask to extend vocab, `directory` key must be present in params,
# or else there is nothing to extend from.
params = Params({"type": "extend"})
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(params, instances=instances)
示例8: test_max_vocab_size_partial_dict
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_max_vocab_size_partial_dict(self):
indexers = {
"tokens": SingleIdTokenIndexer(),
"token_characters": TokenCharactersIndexer(min_padding_length=3),
}
instance = Instance(
{
"text": TextField(
[Token(w) for w in "Abc def ghi jkl mno pqr stu vwx yz".split(" ")], indexers
)
}
)
dataset = Batch([instance])
params = Params({"max_vocab_size": {"tokens": 1}})
vocab = Vocabulary.from_params(params=params, instances=dataset)
assert len(vocab.get_index_to_token_vocabulary("tokens").values()) == 3 # 1 + 2
assert len(vocab.get_index_to_token_vocabulary("token_characters").values()) == 28 # 26 + 2
示例9: _predict_unknown
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def _predict_unknown(self, instance: Instance):
"""
Maps each unknown label in each namespace to a default token
:param instance: the instance containing a list of labels for each namespace
"""
def replace_tokens(instance: Instance, namespace: str, token: str):
if namespace not in instance.fields:
return
instance.fields[namespace].labels = [label
if label in self._model.vocab._token_to_index[namespace]
else token
for label in instance.fields[namespace].labels]
replace_tokens(instance, "lemmas", "↓0;d¦")
replace_tokens(instance, "feats", "_")
replace_tokens(instance, "xpos", "_")
replace_tokens(instance, "upos", "NOUN")
replace_tokens(instance, "head_tags", "case")
示例10: get_vocab_and_both_elmo_indexed_ids
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def get_vocab_and_both_elmo_indexed_ids(batch ):
instances = []
indexer = ELMoTokenCharactersIndexer()
indexer2 = SingleIdTokenIndexer()
for sentence in batch:
tokens = [Token(token) for token in sentence]
field = TextField(tokens,
{u'character_ids': indexer,
u'tokens': indexer2})
instance = Instance({u"elmo": field})
instances.append(instance)
dataset = Batch(instances)
vocab = Vocabulary.from_instances(instances)
dataset.index_instances(vocab)
return vocab, dataset.as_tensor_dict()[u"elmo"]
示例11: test_saving_and_loading_works_with_byte_encoding
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_saving_and_loading_works_with_byte_encoding(self):
# We're going to set a vocabulary from a TextField using byte encoding, index it, save the
# vocab, load the vocab, then index the text field again, and make sure we get the same
# result.
tokenizer = CharacterTokenizer(byte_encoding=u'utf-8')
token_indexer = TokenCharactersIndexer(character_tokenizer=tokenizer)
tokens = [Token(t) for t in [u"Øyvind", u"für", u"汉字"]]
text_field = TextField(tokens, {u"characters": token_indexer})
dataset = Batch([Instance({u"sentence": text_field})])
vocab = Vocabulary.from_instances(dataset)
text_field.index(vocab)
indexed_tokens = deepcopy(text_field._indexed_tokens) # pylint: disable=protected-access
vocab_dir = self.TEST_DIR / u'vocab_save'
vocab.save_to_files(vocab_dir)
vocab2 = Vocabulary.from_files(vocab_dir)
text_field2 = TextField(tokens, {u"characters": token_indexer})
text_field2.index(vocab2)
indexed_tokens2 = deepcopy(text_field2._indexed_tokens) # pylint: disable=protected-access
assert indexed_tokens == indexed_tokens2
示例12: test_registrability
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def test_registrability(self):
class MyVocabulary(object):
@classmethod
def from_params(cls, params, instances=None):
# pylint: disable=unused-argument
return MyVocabulary()
MyVocabulary = Vocabulary.register(u'my-vocabulary')(MyVocabulary)
params = Params({u'type': u'my-vocabulary'})
instance = Instance(fields={})
vocab = Vocabulary.from_params(params=params, instances=[instance])
assert isinstance(vocab, MyVocabulary)
示例13: predict_batch
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def predict_batch(self, texts):
instances = []
for text in texts:
tokens = self._tokenizer.tokenize(text)
instance = Instance({'tokens': TextField(tokens, self._token_indexers)})
instances.append(instance)
result = self.model.forward_on_instances(instances)
results = []
for instance_result, text in zip(result, texts):
result = self._format_instance_result(instance_result)
result['text'] = text
results.append(result)
return results
示例14: text_to_instance
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str]=None) -> Instance:
if len(tokens) > self._max_token_len:
tokens = tokens[:self._max_token_len]
print(f'Length of tokens exceeded the limit {self._max_token_len}. Truncating...')
if tags:
tags = tags[:self._max_token_len]
fields = {}
text_field = TextField(tokens, self._token_indexers)
fields['tokens'] = text_field
if tags:
fields['tags'] = SequenceLabelField(tags, text_field)
return Instance(fields)
示例15: _json_to_instance
# 需要导入模块: from allennlp import data [as 别名]
# 或者: from allennlp.data import Instance [as 别名]
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
sentence = json_dict["sentence"]
if "worlds" in json_dict:
# This is grouped data
worlds = json_dict["worlds"]
if isinstance(worlds, str):
worlds = json.loads(worlds)
else:
structured_rep = json_dict["structured_rep"]
if isinstance(structured_rep, str):
structured_rep = json.loads(structured_rep)
worlds = [structured_rep]
identifier = json_dict["identifier"] if "identifier" in json_dict else None
instance = self._dataset_reader.text_to_instance(
sentence=sentence, # type: ignore
structured_representations=worlds,
identifier=identifier,
)
return instance