本文整理汇总了Python中fairseq.bleu.Scorer方法的典型用法代码示例。如果您正苦于以下问题:Python bleu.Scorer方法的具体用法?Python bleu.Scorer怎么用?Python bleu.Scorer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fairseq.bleu
的用法示例。
在下文中一共展示了bleu.Scorer方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def main():
parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
parser.add_argument('-s', '--sys', default='-', help='system output')
parser.add_argument('-r', '--ref', required=True, help='references')
parser.add_argument('-o', '--order', default=4, metavar='N',
type=int, help='consider ngrams up to this order')
parser.add_argument('--ignore-case', action='store_true',
help='case-insensitive scoring')
args = parser.parse_args()
print(args)
assert args.sys == '-' or os.path.exists(args.sys), \
"System output file {} does not exist".format(args.sys)
assert os.path.exists(args.ref), \
"Reference file {} does not exist".format(args.ref)
dict = dictionary.Dictionary()
def readlines(fd):
for line in fd.readlines():
if args.ignore_case:
yield line.lower()
yield line
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
sys_tok = tokenizer.Tokenizer.tokenize(sys_tok, dict)
ref_tok = tokenizer.Tokenizer.tokenize(ref_tok, dict)
scorer.add(ref_tok, sys_tok)
print(scorer.result_string(args.order))
if args.sys == '-':
score(sys.stdin)
else:
with open(args.sys, 'r') as f:
score(f)
示例2: __init__
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def __init__(self, args, src_dict, dst_dict):
super().__init__(args, src_dict, dst_dict)
self.translator = None
self.scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk())
示例3: evaluate_weights
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def evaluate_weights(scores_info, feature_weights, length_penalty):
scorer = bleu.Scorer(
vocab_constants.PAD_ID, vocab_constants.EOS_ID, vocab_constants.UNK_ID
)
for example in scores_info:
weighted_scores = (example["scores"] * feature_weights).sum(axis=1)
weighted_scores /= (example["tgt_len"] ** length_penalty) + 1e-12
top_hypo_ind = np.argmax(weighted_scores)
top_hypo = example["hypos"][top_hypo_ind]
ref = example["target_tokens"]
scorer.add(torch.IntTensor(ref), torch.IntTensor(top_hypo))
return scorer.score()
示例4: main
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def main():
parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
parser.add_argument('-s', '--sys', default='-', help='system output')
parser.add_argument('-r', '--ref', required=True, help='references')
parser.add_argument('-o', '--order', default=4, metavar='N',
type=int, help='consider ngrams up to this order')
parser.add_argument('--ignore-case', action='store_true',
help='case-insensitive scoring')
args = parser.parse_args()
print(args)
assert args.sys == '-' or os.path.exists(args.sys), \
"System output file {} does not exist".format(args.sys)
assert os.path.exists(args.ref), \
"Reference file {} does not exist".format(args.ref)
dict = dictionary.Dictionary()
def readlines(fd):
for line in fd.readlines():
if args.ignore_case:
yield line.lower()
else:
yield line
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
sys_tok = tokenizer.Tokenizer.tokenize(sys_tok, dict)
ref_tok = tokenizer.Tokenizer.tokenize(ref_tok, dict)
scorer.add(ref_tok, sys_tok)
print(scorer.result_string(args.order))
if args.sys == '-':
score(sys.stdin)
else:
with open(args.sys, 'r') as f:
score(f)
示例5: compute_many
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def compute_many(
guess: torch.Tensor, answers: torch.Tensor, pad_idx, end_idx, unk_idx
):
"""
Return BLEU-1..4 using fairseq and tokens.
"""
if fairseqbleu is None:
return None
scorer = fairseqbleu.Scorer(pad_idx, end_idx, unk_idx)
answers = answers.cpu().int()
guess = guess.cpu().int()
scorer.add(answers, guess)
return [FairseqBleuMetric(scorer.score(i) / 100.0) for i in range(1, 5)]
示例6: main
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def main():
parser = get_parser()
args = parser.parse_args()
print(args)
assert args.sys == '-' or os.path.exists(args.sys), \
"System output file {} does not exist".format(args.sys)
assert os.path.exists(args.ref), \
"Reference file {} does not exist".format(args.ref)
dict = dictionary.Dictionary()
def readlines(fd):
for line in fd.readlines():
if args.ignore_case:
yield line.lower()
else:
yield line
if args.sacrebleu:
import sacrebleu
def score(fdsys):
with open(args.ref) as fdref:
print(sacrebleu.corpus_bleu(fdsys, [fdref]))
else:
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
sys_tok = dict.encode_line(sys_tok)
ref_tok = dict.encode_line(ref_tok)
scorer.add(ref_tok, sys_tok)
print(scorer.result_string(args.order))
if args.sys == '-':
score(sys.stdin)
else:
with open(args.sys, 'r') as f:
score(f)
示例7: cli_main
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def cli_main():
parser = get_parser()
args = parser.parse_args()
print(args)
assert args.sys == '-' or os.path.exists(args.sys), \
"System output file {} does not exist".format(args.sys)
assert os.path.exists(args.ref), \
"Reference file {} does not exist".format(args.ref)
dict = dictionary.Dictionary()
def readlines(fd):
for line in fd.readlines():
if args.ignore_case:
yield line.lower()
else:
yield line
if args.sacrebleu:
import sacrebleu
def score(fdsys):
with open(args.ref) as fdref:
print(sacrebleu.corpus_bleu(fdsys, [fdref]))
elif args.sentence_bleu:
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))):
scorer.reset(one_init=True)
sys_tok = dict.encode_line(sys_tok)
ref_tok = dict.encode_line(ref_tok)
scorer.add(ref_tok, sys_tok)
print(i, scorer.result_string(args.order))
else:
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
sys_tok = dict.encode_line(sys_tok)
ref_tok = dict.encode_line(ref_tok)
scorer.add(ref_tok, sys_tok)
print(scorer.result_string(args.order))
if args.sys == '-':
score(sys.stdin)
else:
with open(args.sys, 'r') as f:
score(f)
示例8: main
# 需要导入模块: from fairseq import bleu [as 别名]
# 或者: from fairseq.bleu import Scorer [as 别名]
def main():
parser = argparse.ArgumentParser(
description=("Rescore generated hypotheses with extra models")
)
add_args(parser)
add_args_rescore(parser)
args = parser.parse_args()
assert (
args.translation_info_export_path is not None
), "--translation_info_export_path is required for rescoring"
assert args.l2r_model_path is not None, "Rescoring needs forward model"
_, _, forward_task = utils.load_diverse_ensemble_for_inference(
[args.l2r_model_path]
)
rescorer = Rescorer(args, forward_task)
dst_dict = forward_task.tgt_dict
base_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk())
rescoring_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk())
with open(args.translation_info_export_path, "rb") as file:
translation_info_list = pickle.load(file)
scores_to_export_list = []
trans_batch_info = []
for k in tqdm(range(0, len(translation_info_list), args.batch_size)):
trans_batch_info = translation_info_list[k : k + args.batch_size]
for j in range(len(trans_batch_info)):
trans_batch_info[j]["hypos"] = [
{"score": hypo["score"], "tokens": hypo["tokens"].cuda()}
for hypo in trans_batch_info[j]["hypos"]
]
top_tokens, scores_to_export = find_top_tokens(
args, trans_batch_info, rescorer, dst_dict.pad()
)
if args.scores_info_export_path is not None:
scores_to_export_list += scores_to_export
for i, trans_info in enumerate(trans_batch_info):
base_bleu_scorer.add(
trans_info["target_tokens"].int().cpu(),
trans_info["hypos"][0]["tokens"].int().cpu(),
)
rescoring_bleu_scorer.add(
trans_info["target_tokens"].int().cpu(), top_tokens[i].int().cpu()
)
trans_batch_info = []
print("| Base ", base_bleu_scorer.result_string())
print("| Rescoring ", rescoring_bleu_scorer.result_string())
if args.scores_info_export_path is not None:
with open(args.scores_info_export_path, "wb") as file:
pickle.dump(scores_to_export_list, file)