本文整理匯總了Python中allennlp.training.metrics.SpanBasedF1Measure方法的典型用法代碼示例。如果您正苦於以下問題:Python metrics.SpanBasedF1Measure方法的具體用法?Python metrics.SpanBasedF1Measure怎麽用?Python metrics.SpanBasedF1Measure使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類allennlp.training.metrics
的用法示例。
在下文中一共展示了metrics.SpanBasedF1Measure方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self,
vocab: Vocabulary,
embedder: TextFieldEmbedder,
encoder: Seq2SeqEncoder) -> None:
super().__init__(vocab)
self._embedder = embedder
self._encoder = encoder
self._classifier = torch.nn.Linear(
in_features=encoder.get_output_dim(),
out_features=vocab.get_vocab_size('labels')
)
self._crf = ConditionalRandomField(
vocab.get_vocab_size('labels')
)
self._f1 = SpanBasedF1Measure(vocab, 'labels')
示例2: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
encoder: Seq2SeqEncoder,
label_namespace: str = "labels",
constraint_type: str = None,
include_start_end_transitions: bool = True,
dropout: float = None,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None) -> None:
super().__init__(vocab, regularizer)
self.label_namespace = label_namespace
self.text_field_embedder = text_field_embedder
self.num_tags = self.vocab.get_vocab_size(label_namespace)
self.encoder = encoder
if dropout:
self.dropout = torch.nn.Dropout(dropout)
else:
self.dropout = None
self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
self.num_tags))
if constraint_type is not None:
labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
constraints = allowed_transitions(constraint_type, labels)
else:
constraints = None
self.crf = ConditionalRandomField(
self.num_tags, constraints,
include_start_end_transitions=include_start_end_transitions
)
self.span_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=constraint_type or "BIO")
check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
"text field embedding dim", "encoder input dim")
initializer(self)
示例3: test_bmes_span_metrics_are_computed_correctly
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def test_bmes_span_metrics_are_computed_correctly(self, device: str):
# (bmes_tags) B:0, M:1, E:2, S:3.
# [S, B, M, E, S]
# [S, S, S, S, S]
gold_indices = [[3, 0, 1, 2, 3], [3, 3, 3, 3, 3]]
gold_tensor = torch.tensor(gold_indices, device=device)
prediction_tensor = torch.rand([2, 5, 4], device=device)
# [S, B, E, S, S]
# TP: 2, FP: 2, FN: 1.
prediction_tensor[0, 0, 3] = 1 # (True positive)
prediction_tensor[0, 1, 0] = 1 # (False positive
prediction_tensor[0, 2, 2] = 1 # *)
prediction_tensor[0, 3, 3] = 1 # (False positive)
prediction_tensor[0, 4, 3] = 1 # (True positive)
# [B, E, S, B, E]
# TP: 1, FP: 2, FN: 4.
prediction_tensor[1, 0, 0] = 1 # (False positive
prediction_tensor[1, 1, 2] = 1 # *)
prediction_tensor[1, 2, 3] = 1 # (True positive)
prediction_tensor[1, 3, 0] = 1 # (False positive
prediction_tensor[1, 4, 2] = 1 # *)
metric = SpanBasedF1Measure(self.vocab, "bmes_tags", label_encoding="BMES")
metric(prediction_tensor, gold_tensor)
# TP: 3, FP: 4, FN: 5.
metric_dict = metric.get_metric()
assert_allclose(metric_dict["recall-overall"], 0.375, rtol=0.001, atol=1e-03)
assert_allclose(metric_dict["precision-overall"], 0.428, rtol=0.001, atol=1e-03)
assert_allclose(metric_dict["f1-measure-overall"], 0.4, rtol=0.001, atol=1e-03)
示例4: test_span_f1_accepts_tags_to_spans_function_argument
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def test_span_f1_accepts_tags_to_spans_function_argument(self, device: str):
def mock_tags_to_spans_function(tag_sequence, classes_to_ignore=None):
return [("mock", (42, 42))]
# Should be ignore.
bio_tags = ["B-ARG1", "O", "B-C-ARG1", "B-V", "B-ARGM-ADJ", "O"]
gold_indices = [self.vocab.get_token_index(x, "tags") for x in bio_tags]
gold_tensor = torch.tensor([gold_indices], device=device)
prediction_tensor = torch.rand([1, 6, self.vocab.get_vocab_size("tags")], device=device)
metric = SpanBasedF1Measure(
self.vocab,
"tags",
label_encoding=None,
tags_to_spans_function=mock_tags_to_spans_function,
)
metric(prediction_tensor, gold_tensor)
metric_dict = metric.get_metric()
assert_allclose(metric_dict["recall-overall"], 1.0)
assert_allclose(metric_dict["precision-overall"], 1.0)
assert_allclose(metric_dict["f1-measure-overall"], 1.0)
with pytest.raises(ConfigurationError):
SpanBasedF1Measure(self.vocab, label_encoding="INVALID")
with pytest.raises(ConfigurationError):
SpanBasedF1Measure(self.vocab, tags_to_spans_function=mock_tags_to_spans_function)
with pytest.raises(ConfigurationError):
SpanBasedF1Measure(self.vocab, label_encoding=None, tags_to_spans_function=None)
示例5: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self, vocab ,
text_field_embedder ,
encoder ,
binary_feature_dim ,
embedding_dropout = 0.0,
initializer = InitializerApplicator(),
regularizer = None,
label_smoothing = None) :
super(SemanticRoleLabeler, self).__init__(vocab, regularizer)
self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size(u"labels")
# For the span based evaluation, we don't want to consider labels
# for verb, because the verb index is provided to the model.
self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=u"labels", ignore_classes=[u"V"])
self.encoder = encoder
# There are exactly 2 binary features for the verb predicate embedding.
self.binary_feature_embedding = Embedding(2, binary_feature_dim)
self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
self.num_classes))
self.embedding_dropout = Dropout(p=embedding_dropout)
self._label_smoothing = label_smoothing
check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim,
encoder.get_input_dim(),
u"text embedding dim + verb indicator embedding dim",
u"encoder input dim")
initializer(self)
示例6: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
seq2seq_encoder: Seq2SeqEncoder,
initializer: InitializerApplicator) -> None:
super(ProLocalModel, self).__init__(vocab)
self.text_field_embedder = text_field_embedder
self.seq2seq_encoder = seq2seq_encoder
self.attention_layer = \
Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(),
seq2seq_encoder.get_output_dim()), normalize=True)
self.num_types = self.vocab.get_vocab_size("state_change_type_labels")
self.aggregate_feedforward = Linear(seq2seq_encoder.get_output_dim(),
self.num_types)
self.span_metric = SpanBasedF1Measure(vocab,
tag_namespace="state_change_tags") # by default "O" is ignored in metric computation
self.num_tags = self.vocab.get_vocab_size("state_change_tags")
self.tag_projection_layer = TimeDistributed(Linear(self.seq2seq_encoder.get_output_dim() + 2
, self.num_tags))
self._type_accuracy = CategoricalAccuracy()
self.type_f1_metrics = {}
self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary("state_change_type_labels")
for type_label in self.type_labels_vocab.values():
self.type_f1_metrics["type_" + type_label] = F1Measure(self.vocab.get_token_index(type_label, "state_change_type_labels"))
self._loss = torch.nn.CrossEntropyLoss()
initializer(self)
示例7: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self,
vocab: Vocabulary,
embedder: TextFieldEmbedder,
encoder: Seq2SeqEncoder) -> None:
super().__init__(vocab)
self._embedder = embedder
self._encoder = encoder
self._classifier = torch.nn.Linear(in_features=encoder.get_output_dim(),
out_features=vocab.get_vocab_size('labels'))
self._f1 = SpanBasedF1Measure(vocab, 'labels', 'IOB1')
示例8: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
encoder: Seq2SeqEncoder,
calculate_span_f1: bool = None,
label_encoding: Optional[str] = None,
label_namespace: str = "labels",
verbose_metrics: bool = False,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None) -> None:
super(SimpleTagger, self).__init__(vocab, regularizer)
self.label_namespace = label_namespace
self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size(label_namespace)
self.encoder = encoder
self._verbose_metrics = verbose_metrics
self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
self.num_classes))
check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
"text field embedding dim", "encoder input dim")
# We keep calculate_span_f1 as a constructor argument for API consistency with
# the CrfTagger, even it is redundant in this class
# (label_encoding serves the same purpose).
if calculate_span_f1 and not label_encoding:
raise ConfigurationError("calculate_span_f1 is True, but "
"no label_encoding was specified.")
self.metrics = {
"accuracy": CategoricalAccuracy(),
"accuracy3": CategoricalAccuracy(top_k=3)
}
if calculate_span_f1 or label_encoding:
self._f1_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=label_encoding)
else:
self._f1_metric = None
initializer(self)
示例9: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(
self,
vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
encoder: Seq2SeqEncoder,
calculate_span_f1: bool = None,
label_encoding: Optional[str] = None,
label_namespace: str = "labels",
verbose_metrics: bool = False,
initializer: InitializerApplicator = InitializerApplicator(),
**kwargs,
) -> None:
super().__init__(vocab, **kwargs)
self.label_namespace = label_namespace
self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size(label_namespace)
self.encoder = encoder
self._verbose_metrics = verbose_metrics
self.tag_projection_layer = TimeDistributed(
Linear(self.encoder.get_output_dim(), self.num_classes)
)
check_dimensions_match(
text_field_embedder.get_output_dim(),
encoder.get_input_dim(),
"text field embedding dim",
"encoder input dim",
)
self.metrics = {
"accuracy": CategoricalAccuracy(),
"accuracy3": CategoricalAccuracy(top_k=3),
}
# We keep calculate_span_f1 as a constructor argument for API consistency with
# the CrfTagger, even it is redundant in this class
# (label_encoding serves the same purpose).
if calculate_span_f1 is None:
calculate_span_f1 = label_encoding is not None
self.calculate_span_f1 = calculate_span_f1
if calculate_span_f1:
if not label_encoding:
raise ConfigurationError(
"calculate_span_f1 is True, but no label_encoding was specified."
)
self._f1_metric = SpanBasedF1Measure(
vocab, tag_namespace=label_namespace, label_encoding=label_encoding
)
else:
self._f1_metric = None
initializer(self)
示例10: __init__
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def __init__(self, vocab ,
text_field_embedder ,
encoder ,
label_namespace = u"labels",
constraint_type = None,
feedforward = None,
include_start_end_transitions = True,
dropout = None,
verbose_metrics = False,
initializer = InitializerApplicator(),
regularizer = None) :
super(CrfTagger, self).__init__(vocab, regularizer)
self.label_namespace = label_namespace
self.text_field_embedder = text_field_embedder
self.num_tags = self.vocab.get_vocab_size(label_namespace)
self.encoder = encoder
self._verbose_metrics = verbose_metrics
if dropout:
self.dropout = torch.nn.Dropout(dropout)
else:
self.dropout = None
self._feedforward = feedforward
if feedforward is not None:
output_dim = feedforward.get_output_dim()
else:
output_dim = self.encoder.get_output_dim()
self.tag_projection_layer = TimeDistributed(Linear(output_dim,
self.num_tags))
if constraint_type is not None:
labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
constraints = allowed_transitions(constraint_type, labels)
else:
constraints = None
self.crf = ConditionalRandomField(
self.num_tags, constraints,
include_start_end_transitions=include_start_end_transitions
)
self.span_metric = SpanBasedF1Measure(vocab,
tag_namespace=label_namespace,
label_encoding=constraint_type or u"BIO")
check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
u"text field embedding dim", u"encoder input dim")
if feedforward is not None:
check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
u"encoder output dim", u"feedforward input dim")
initializer(self)
#overrides
示例11: test_span_metrics_are_computed_correctly
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def test_span_metrics_are_computed_correctly(self):
gold_labels = [u"O", u"B-ARG1", u"I-ARG1", u"O", u"B-ARG2", u"I-ARG2", u"O", u"O", u"O"]
gold_indices = [self.vocab.get_token_index(x, u"tags") for x in gold_labels]
gold_tensor = torch.Tensor([gold_indices])
prediction_tensor = torch.rand([2, 9, self.vocab.get_vocab_size(u"tags")])
# Test that the span measure ignores completely masked sequences by
# passing a mask with a fully masked row.
mask = torch.LongTensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0]])
prediction_tensor[:, 0, 0] = 1
prediction_tensor[:, 1, 1] = 1 # (True positive - ARG1
prediction_tensor[:, 2, 2] = 1 # *)
prediction_tensor[:, 3, 0] = 1
prediction_tensor[:, 4, 0] = 1 # (False Negative - ARG2
prediction_tensor[:, 5, 0] = 1 # *)
prediction_tensor[:, 6, 0] = 1
prediction_tensor[:, 7, 1] = 1 # (False Positive - ARG1
prediction_tensor[:, 8, 2] = 1 # *)
metric = SpanBasedF1Measure(self.vocab, u"tags")
metric(prediction_tensor, gold_tensor, mask)
assert metric._true_positives[u"ARG1"] == 1
assert metric._true_positives[u"ARG2"] == 0
assert u"O" not in list(metric._true_positives.keys())
assert metric._false_negatives[u"ARG1"] == 0
assert metric._false_negatives[u"ARG2"] == 1
assert u"O" not in list(metric._false_negatives.keys())
assert metric._false_positives[u"ARG1"] == 1
assert metric._false_positives[u"ARG2"] == 0
assert u"O" not in list(metric._false_positives.keys())
# Check things are accumulating correctly.
metric(prediction_tensor, gold_tensor, mask)
assert metric._true_positives[u"ARG1"] == 2
assert metric._true_positives[u"ARG2"] == 0
assert u"O" not in list(metric._true_positives.keys())
assert metric._false_negatives[u"ARG1"] == 0
assert metric._false_negatives[u"ARG2"] == 2
assert u"O" not in list(metric._false_negatives.keys())
assert metric._false_positives[u"ARG1"] == 2
assert metric._false_positives[u"ARG2"] == 0
assert u"O" not in list(metric._false_positives.keys())
metric_dict = metric.get_metric()
numpy.testing.assert_almost_equal(metric_dict[u"recall-ARG2"], 0.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-ARG2"], 0.0)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-ARG2"], 0.0)
numpy.testing.assert_almost_equal(metric_dict[u"recall-ARG1"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-ARG1"], 0.5)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-ARG1"], 0.666666666)
numpy.testing.assert_almost_equal(metric_dict[u"recall-overall"], 0.5)
numpy.testing.assert_almost_equal(metric_dict[u"precision-overall"], 0.5)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-overall"], 0.5)
示例12: test_span_f1_matches_perl_script_for_continued_arguments
# 需要導入模塊: from allennlp.training import metrics [as 別名]
# 或者: from allennlp.training.metrics import SpanBasedF1Measure [as 別名]
def test_span_f1_matches_perl_script_for_continued_arguments(self):
bio_tags = [u"B-ARG1", u"O", u"B-C-ARG1", u"B-V", u"B-ARGM-ADJ", u"O"]
sentence = [u"Mark", u"and", u"Matt", u"were", u"running", u"fast", u"."]
gold_indices = [self.vocab.get_token_index(x, u"tags") for x in bio_tags]
gold_tensor = torch.Tensor([gold_indices])
prediction_tensor = torch.rand([1, 6, self.vocab.get_vocab_size(u"tags")])
mask = torch.LongTensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])
# Make prediction so that it is exactly correct.
for i, tag_index in enumerate(gold_indices):
prediction_tensor[0, i, tag_index] = 1
metric = SpanBasedF1Measure(self.vocab, u"tags")
metric(prediction_tensor, gold_tensor, mask)
metric_dict = metric.get_metric()
# We merged the continued ARG1 label into a single span, so there should
# be exactly 1 true positive for ARG1 and nothing present for C-ARG1
assert metric._true_positives[u"ARG1"] == 1
# The labels containing continuation references get merged into
# the labels that they continue, so they should never appear in
# the precision/recall counts.
assert u"C-ARG1" not in list(metric._true_positives.keys())
assert metric._true_positives[u"V"] == 1
assert metric._true_positives[u"ARGM-ADJ"] == 1
numpy.testing.assert_almost_equal(metric_dict[u"recall-ARG1"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-ARG1"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-ARG1"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"recall-V"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-V"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-V"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"recall-ARGM-ADJ"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-ARGM-ADJ"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-ARGM-ADJ"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"recall-overall"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"precision-overall"], 1.0)
numpy.testing.assert_almost_equal(metric_dict[u"f1-measure-overall"], 1.0)
# Check that the number of true positive ARG1 labels is the same as the perl script's output:
gold_file_path = os.path.join(self.TEST_DIR, u"gold_conll_eval.txt")
prediction_file_path = os.path.join(self.TEST_DIR, u"prediction_conll_eval.txt")
with open(gold_file_path, u"a+") as gold_file, open(prediction_file_path, u"a+") as prediction_file:
# Use the same bio tags as prediction vs gold to make it obvious by looking
# at the perl script output if something is wrong.
write_to_conll_eval_file(gold_file, prediction_file, 4, sentence, bio_tags, bio_tags)
# Run the official perl script and collect stdout.
perl_script_command = [u"perl", unicode(self.TOOLS_ROOT / u"srl-eval.pl"), prediction_file_path, gold_file_path]
stdout = subprocess.check_output(perl_script_command, universal_newlines=True)
stdout_lines = stdout.split(u"\n")
# Parse the stdout of the perl script to find the ARG1 row (this happens to be line 8).
num_correct_arg1_instances_from_perl_evaluation = int([token for token in
stdout_lines[8].split(u" ") if token][1])
assert num_correct_arg1_instances_from_perl_evaluation == metric._true_positives[u"ARG1"]