本文整理汇总了Python中seqeval.metrics.f1_score方法的典型用法代码示例。如果您正苦于以下问题:Python metrics.f1_score方法的具体用法?Python metrics.f1_score怎么用?Python metrics.f1_score使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类seqeval.metrics
的用法示例。
在下文中一共展示了metrics.f1_score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate_results
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def evaluate_results(net, test_loader, pad_id, cuda):
logger.info("Evaluating test samples...")
acc = 0; out_labels = []; true_labels = []
net.eval()
with torch.no_grad():
for i, data in tqdm(enumerate(test_loader), total=len(test_loader)):
x, e1_e2_start, labels, _,_,_ = data
attention_mask = (x != pad_id).float()
token_type_ids = torch.zeros((x.shape[0], x.shape[1])).long()
if cuda:
x = x.cuda()
labels = labels.cuda()
attention_mask = attention_mask.cuda()
token_type_ids = token_type_ids.cuda()
classification_logits = net(x, token_type_ids=token_type_ids, attention_mask=attention_mask, Q=None,\
e1_e2_start=e1_e2_start)
accuracy, (o, l) = evaluate_(classification_logits, labels, ignore_idx=-1)
out_labels.append([str(i) for i in o]); true_labels.append([str(i) for i in l])
acc += accuracy
accuracy = acc/(i + 1)
results = {
"accuracy": accuracy,
"precision": precision_score(true_labels, out_labels),
"recall": recall_score(true_labels, out_labels),
"f1": f1_score(true_labels, out_labels)
}
logger.info("***** Eval results *****")
for key in sorted(results.keys()):
logger.info(" %s = %s", key, str(results[key]))
return results
示例2: on_epoch_end
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def on_epoch_end(self, epoch, logs={}):
label_true, label_pred = [], []
for i in range(len(self.seq)):
x_true, y_true = self.seq[i]
lengths = self.get_lengths(y_true)
y_pred = self.model.predict_on_batch(x_true)
y_true = self.t.inverse_transform(y_true, lengths)
y_pred = self.t.inverse_transform(y_pred, lengths)
label_true.extend(y_true)
label_pred.extend(y_pred)
acc = accuracy_score(label_true, label_pred)
f1 = f1_seq_score(label_true, label_pred)
print(' - acc: {:04.2f}'.format(acc * 100))
print(' - f1: {:04.2f}'.format(f1 * 100))
print(sequence_report(label_true, label_pred))
logs['f1_seq'] = np.float64(f1)
logs['seq_acc'] = np.float64(acc)
示例3: on_epoch_end
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def on_epoch_end(self, epoch, logs={}):
label_true = []
label_pred = []
for i in range(len(self.seq)):
x_true, y_true = self.seq[i]
lengths = self.get_lengths(y_true)
y_pred = self.model.predict_on_batch(x_true)
y_true = self.p.inverse_transform(y_true, lengths)
y_pred = self.p.inverse_transform(y_pred, lengths)
label_true.extend(y_true)
label_pred.extend(y_pred)
score = f1_score(label_true, label_pred)
print(' - f1: {:04.2f}'.format(score * 100))
print(classification_report(label_true, label_pred))
logs['f1'] = score
示例4: score
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def score(self, x_test, y_test):
"""Returns the f1-micro score on the given test data and labels.
Args:
x_test : array-like, shape = (n_samples, sent_length)
Test samples.
y_test : array-like, shape = (n_samples, sent_length)
True labels for x.
Returns:
score : float, f1-micro score.
"""
if self.model:
x_test = self.p.transform(x_test)
lengths = map(len, y_test)
y_pred = self.model.predict(x_test)
y_pred = self.p.inverse_transform(y_pred, lengths)
score = f1_score(y_test, y_pred)
return score
else:
raise OSError('Could not find a model. Call load(dir_path).')
示例5: compute_metrics
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def compute_metrics(metric, preds, labels):
assert len(preds) == len(labels)
if metric == "mcc":
return {"mcc": matthews_corrcoef(labels, preds)}
elif metric == "acc":
return simple_accuracy(preds, labels)
elif metric == "acc_f1":
return acc_and_f1(preds, labels)
elif metric == "pear_spear":
return pearson_and_spearman(preds, labels)
# TODO this metric seems very specific for NER and doesnt work for
elif metric == "seq_f1":
return {"seq_f1": ner_f1_score(labels, preds)}
elif metric == "f1_macro":
return f1_macro(preds, labels)
elif metric == "squad":
return squad(preds, labels)
elif metric == "mse":
return {"mse": mean_squared_error(preds, labels)}
elif metric == "r2":
return {"r2": r2_score(preds, labels)}
elif metric == "top_n_accuracy":
return {"top_n_accuracy": top_n_accuracy(preds, labels)}
# elif metric == "masked_accuracy":
# return simple_accuracy(preds, labels, ignore=-1)
elif metric in registered_metrics:
metric_func = registered_metrics[metric]
return metric_func(preds, labels)
else:
raise KeyError(metric)
示例6: score
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def score(self, y_true, y_pred):
"""Calculate f1 score.
Args:
y_true (list): true sequences.
y_pred (list): predicted sequences.
Returns:
score: f1 score.
"""
score = f1_score(y_true, y_pred)
print(' - f1: {:04.2f}'.format(score * 100))
if self.digits:
print(classification_report(y_true, y_pred, digits=self.digits))
return score
示例7: main
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def main():
# load the testset
test_X, test_Y = nagisa.utils.load_file("kwdlc.test")
# build the tagger for kwdlc
ner_tagger = nagisa.Tagger(vocabs='kwdlc_ner_model.vocabs',
params='kwdlc_ner_model.params',
hp='kwdlc_ner_model.hp')
# predict
true_Y = []
pred_Y = []
for words, true_y in zip(test_X, test_Y):
pred_y= ner_tagger.decode(words)
_pred_y = []
_true_y = []
for word, pred, true in zip(words, pred_y, true_y):
_pred_y.append(pred)
_true_y.append(true)
true_Y.append(_true_y)
pred_Y.append(_pred_y)
# evaluate
accuracy = accuracy_score(true_Y, pred_Y)
print("accuracy: {}".format(accuracy))
f1 = f1_score(true_Y, pred_Y)
print("macro-f1: {}".format(f1))
report = classification_report(true_Y, pred_Y)
print(report)
示例8: eval_model
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def eval_model(self, eval_data, output_dir=None, verbose=True, silent=False, wandb_log=True, **kwargs):
"""
Evaluates the model on eval_data. Saves results to output_dir.
Args:
eval_data: eval_data should be the path to a .txt file containing the evaluation data or a pandas DataFrame.
If a text file is used the data should be in the CoNLL format. I.e. One word per line, with sentences seperated by an empty line.
The first word of the line should be a word, and the last should be a Name Entity Tag.
If a DataFrame is given, each sentence should be split into words, with each word assigned a tag, and with all words from the same sentence given the same sentence_id.
output_dir: The directory where model files will be saved. If not given, self.args.output_dir will be used.
verbose: If verbose, results will be printed to the console on completion of evaluation.
silent: If silent, tqdm progress bars will be hidden.
wandb_log: If True, evaluation results will be logged to wandb.
**kwargs: Additional metrics that should be used. Pass in the metrics as keyword arguments (name of metric: function to use). E.g. f1=sklearn.metrics.f1_score.
A metric function should take in two parameters. The first parameter will be the true labels, and the second parameter will be the predictions.
Returns:
result: Dictionary containing evaluation results. (eval_loss, precision, recall, f1_score)
model_outputs: List of raw model outputs
preds_list: List of predicted tags
""" # noqa: ignore flake8"
if not output_dir:
output_dir = self.args.output_dir
self._move_model_to_device()
eval_dataset = self.load_and_cache_examples(eval_data, evaluate=True)
result, model_outputs, preds_list = self.evaluate(
eval_dataset, output_dir, verbose=verbose, silent=silent, wandb_log=wandb_log, **kwargs
)
self.results.update(result)
if verbose:
logger.info(self.results)
return result, model_outputs, preds_list
示例9: _create_training_progress_scores
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def _create_training_progress_scores(self, **kwargs):
extra_metrics = {key: [] for key in kwargs}
training_progress_scores = {
"global_step": [],
"precision": [],
"recall": [],
"f1_score": [],
"train_loss": [],
"eval_loss": [],
**extra_metrics,
}
return training_progress_scores
示例10: _eval_end
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def _eval_end(self, outputs):
"Evaluation called for both Val and Test"
val_loss_mean = torch.stack([x["val_loss"] for x in outputs]).mean()
preds = np.concatenate([x["pred"] for x in outputs], axis=0)
preds = np.argmax(preds, axis=2)
out_label_ids = np.concatenate([x["target"] for x in outputs], axis=0)
label_map = {i: label for i, label in enumerate(self.labels)}
out_label_list = [[] for _ in range(out_label_ids.shape[0])]
preds_list = [[] for _ in range(out_label_ids.shape[0])]
for i in range(out_label_ids.shape[0]):
for j in range(out_label_ids.shape[1]):
if out_label_ids[i, j] != self.pad_token_label_id:
out_label_list[i].append(label_map[out_label_ids[i][j]])
preds_list[i].append(label_map[preds[i][j]])
results = {
"val_loss": val_loss_mean,
"precision": precision_score(out_label_list, preds_list),
"recall": recall_score(out_label_list, preds_list),
"f1": f1_score(out_label_list, preds_list),
}
if self.is_logger():
logger.info("***** Eval results *****")
for key in sorted(results.keys()):
logger.info(" %s = %s", key, str(results[key]))
tensorboard_logs = results
ret = {k: v for k, v in results.items()}
ret["log"] = tensorboard_logs
return ret, preds_list, out_label_list
示例11: get_slot_metrics
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def get_slot_metrics(preds, labels):
assert len(preds) == len(labels)
return {
"slot_precision": precision_score(labels, preds),
"slot_recall": recall_score(labels, preds),
"slot_f1": f1_score(labels, preds)
}
示例12: f1_pre_rec
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def f1_pre_rec(labels, preds):
return {
"precision": precision_score(labels, preds, suffix=True),
"recall": recall_score(labels, preds, suffix=True),
"f1": f1_score(labels, preds, suffix=True)
}
示例13: evaluate
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def evaluate(self, data: List[List[str]], labels: List[List[str]]) -> float:
"""Evaluate the performance of ner model with given data and labels, and return the f1
score.
Args:
data: List of List of str. List of tokenized (in char level) texts ,
like ``[['我', '在', '上', '海', '上', '学'], ...]``.
labels: List of List of str. The corresponding labels , usually in BIO or BIOES
format, like ``[['O', 'O', 'B-LOC', 'I-LOC', 'O', 'O'], ...]``.
Returns:
Float. The F1 score.
"""
features, y = self.preprocessor.prepare_input(data, labels)
pred_probs = self.model.predict(features)
if self.preprocessor.use_bert:
pred_probs = pred_probs[:, 1:-1, :] # remove <CLS> and <SEQ>
lengths = [min(len(label), pred_prob.shape[0])
for label, pred_prob in zip(labels, pred_probs)]
y_pred = self.preprocessor.label_decode(pred_probs, lengths)
r = metrics.recall_score(labels, y_pred)
p = metrics.precision_score(labels, y_pred)
f1 = metrics.f1_score(labels, y_pred)
logging.info('Recall: {}, Precision: {}, F1: {}'.format(r, p, f1))
logging.info(metrics.classification_report(labels, y_pred))
return f1
示例14: on_epoch_end
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def on_epoch_end(self, epoch, logs=None):
pred_probs = self.model.predict(self.valid_features)
if self.preprocessor.use_bert:
pred_probs = pred_probs[:, 1:-1, :] # remove <CLS> and <SEQ>
y_pred = self.preprocessor.label_decode(pred_probs, self.get_lengths(pred_probs))
r = metrics.recall_score(self.valid_labels, y_pred)
p = metrics.precision_score(self.valid_labels, y_pred)
f1 = metrics.f1_score(self.valid_labels, y_pred)
logs['val_r'] = r
logs['val_p'] = p
logs['val_f1'] = f1
print('Epoch {}: val_r: {}, val_p: {}, val_f1: {}'.format(epoch+1, r, p, f1))
print(metrics.classification_report(self.valid_labels, y_pred))
示例15: reduce_aggregated_logs
# 需要导入模块: from seqeval import metrics [as 别名]
# 或者: from seqeval.metrics import f1_score [as 别名]
def reduce_aggregated_logs(self, aggregated_logs):
"""Reduces aggregated logs over validation steps."""
label_class = aggregated_logs['label_class']
predict_class = aggregated_logs['predict_class']
return {
'f1':
seqeval_metrics.f1_score(label_class, predict_class),
'precision':
seqeval_metrics.precision_score(label_class, predict_class),
'recall':
seqeval_metrics.recall_score(label_class, predict_class),
'accuracy':
seqeval_metrics.accuracy_score(label_class, predict_class),
}