本文整理汇总了Python中sacrebleu.corpus_bleu方法的典型用法代码示例。如果您正苦于以下问题:Python sacrebleu.corpus_bleu方法的具体用法?Python sacrebleu.corpus_bleu怎么用?Python sacrebleu.corpus_bleu使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sacrebleu
的用法示例。
在下文中一共展示了sacrebleu.corpus_bleu方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calc_bleu_from_stats
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def calc_bleu_from_stats(sentence_stats: pd.DataFrame) -> sacrebleu.BLEU:
corpus_stats = sentence_stats.sum(axis=0)
corpus_bleu = sacrebleu.compute_bleu(
correct=[
corpus_stats.correct_1_grams,
corpus_stats.correct_2_grams,
corpus_stats.correct_3_grams,
corpus_stats.correct_4_grams,
],
total=[
corpus_stats.total_1_grams,
corpus_stats.total_2_grams,
corpus_stats.total_3_grams,
corpus_stats.total_4_grams,
],
sys_len=corpus_stats.translation_length,
ref_len=corpus_stats.reference_length,
)
return corpus_bleu
示例2: get_all_scores
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def get_all_scores(
orig_sents: List[str], sys_sents: List[str], refs_sents: List[List[str]],
lowercase: bool = False, tokenizer: str = '13a', metrics: List[str] = DEFAULT_METRICS,
):
scores = OrderedDict()
if 'bleu' in metrics:
scores['BLEU'] = corpus_bleu(sys_sents, refs_sents, force=True, tokenize=tokenizer, lowercase=lowercase).score
if 'sari' in metrics:
scores['SARI'] = corpus_sari(orig_sents, sys_sents, refs_sents, tokenizer=tokenizer, lowercase=lowercase)
if 'samsa' in metrics:
scores['SAMSA'] = corpus_samsa(orig_sents, sys_sents, tokenizer=tokenizer, verbose=True, lowercase=lowercase)
if 'fkgl' in metrics:
scores['FKGL'] = corpus_fkgl(sys_sents, tokenizer=tokenizer)
quality_estimation_scores = corpus_quality_estimation(
orig_sents,
sys_sents,
tokenizer=tokenizer,
lowercase=lowercase
)
scores = add_dicts(
scores,
quality_estimation_scores,
)
return {key: round(value, 2) for key, value in scores.items()}
示例3: score_batch
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def score_batch(self,
hypotheses: List[List[str]],
references: List[List[str]]) -> float:
hyp_joined = [" ".join(hyp) for hyp in hypotheses]
ref_joined = [" ".join(ref) for ref in references]
bleu = corpus_bleu(hyp_joined, [ref_joined],
smooth_method=self.smooth_method,
smooth_value=self.smooth_value,
force=self.force,
lowercase=self.lowercase,
tokenize=self.tokenize,
use_effective_order=self.use_effective_order)
return bleu.score
# pylint: disable=invalid-name
示例4: validate
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def validate(model, test_data, beam_size=8, alpha=0.6, max_time_step=100):
"""For development Only"""
pp = PostProcess()
ref_stream = []
sys_stream = []
for batch in test_data:
res = generate_batch(model, batch, beam_size, alpha, max_time_step)
sys_stream.extend(res['token'])
ref_stream.extend(batch['target'])
assert len(sys_stream) == len(ref_stream)
sys_stream = [ pp.post_process(o) for o in sys_stream]
ref_stream = [ ' '.join(o) for i in ref_stream]
ref_streams = [ref_stream]
bleu = sacrebleu.corpus_bleu(sys_stream, ref_streams,
force=True, lowercase=False,
tokenize='none').score
chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream)
return bleu, chrf
示例5: evaluate
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def evaluate(self, ignore_missing=False):
if ignore_missing:
keep = set(self.answers.keys())
target_segments = {sid: text for sid, text in self._target_segments.items() if sid in keep}
else:
target_segments = self._target_segments
answers = [self.answers.get(sid, "") for sid in target_segments]
references = [target for target in target_segments.values()]
bleu = corpus_bleu(answers, [references])
self._results = {'SacreBLEU': bleu.score}
if self._tokenization is not None:
tok_answers = [self._tokenization(answer) for answer in answers]
tok_references = [self._tokenization(target) for target in references]
tok_bleu = corpus_bleu(tok_answers, [tok_references], tokenize='none', force=True)
self._results['BLEU score'] = tok_bleu.score
示例6: bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def bleu(targets, predictions):
"""Computes BLEU score.
Args:
targets: list of strings or list of list of strings if multiple references
are present.
predictions: list of strings
Returns:
bleu_score across all targets and predictions
"""
if isinstance(targets[0], list):
targets = [[x for x in target] for target in targets]
else:
# Need to wrap targets in another list for corpus_bleu.
targets = [targets]
bleu_score = sacrebleu.corpus_bleu(predictions, targets,
smooth_method="exp",
smooth_value=0.0,
force=False,
lowercase=False,
tokenize="intl",
use_effective_order=False)
return {"bleu": bleu_score.score}
示例7: cache_stats
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def cache_stats(self, ref, out):
"""
Cache sufficient statistics for caculating SacreBLEU score
Args:
ref: A reference corpus
out: An output corpus
Returns:
A list of cached statistics
"""
if self.case_insensitive:
ref = corpus_utils.lower(ref)
out = corpus_utils.lower(out)
cached_stats = []
for r, o in zip(ref, out):
re = sacrebleu.corpus_bleu(" ".join(o), " ".join(r))
cached_stats.append( (re.counts, re.totals, re.sys_len, re.ref_len) )
return cached_stats
示例8: computeBLEU
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def computeBLEU(outputs, targets):
targets = [[t[i] for t in targets] for i in range(len(targets[0]))]
return corpus_bleu(outputs, targets, lowercase=True).score
示例9: corpus_bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def corpus_bleu(sys_stream, ref_streams):
bleu = _corpus_bleu(sys_stream, ref_streams, tokenize='none')
return bleu.score
示例10: sentence_bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def sentence_bleu(hypothesis, reference):
bleu = _corpus_bleu(hypothesis, reference)
for i in range(1, 4):
bleu.counts[i] += 1
bleu.totals[i] += 1
bleu = compute_bleu(
bleu.counts, bleu.totals,
bleu.sys_len, bleu.ref_len,
smooth='exp', smooth_floor=0.0,
)
return bleu.score
示例11: _inference_with_bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def _inference_with_bleu(self, generator, sample, model):
import sacrebleu
def decode(toks, escape_unk=False):
s = self.tgt_dict.string(
toks.int().cpu(),
self.args.eval_bleu_remove_bpe,
# The default unknown string in fairseq is `<unk>`, but
# this is tokenized by sacrebleu as `< unk >`, inflating
# BLEU scores. Instead, we use a somewhat more verbose
# alternative that is unlikely to appear in the real
# reference, but doesn't get split into multiple tokens.
unk_string=(
"UNKNOWNTOKENINREF" if escape_unk else "UNKNOWNTOKENINHYP"
),
)
if self.tokenizer:
s = self.tokenizer.decode(s)
return s
gen_out = self.inference_step(generator, [model], sample, None)
hyps, refs = [], []
for i in range(len(gen_out)):
hyps.append(decode(gen_out[i][0]['tokens']))
refs.append(decode(
utils.strip_pad(sample['target'][i], self.tgt_dict.pad()),
escape_unk=True, # don't count <unk> as matches to the hypo
))
if self.args.eval_bleu_print_samples:
logger.info('example hypothesis: ' + hyps[0])
logger.info('example reference: ' + refs[0])
if self.args.eval_tokenized_bleu:
return sacrebleu.corpus_bleu(hyps, [refs], tokenize='none')
else:
return sacrebleu.corpus_bleu(hyps, [refs])
示例12: __call__
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def __call__(self, ref_path, hyp_path):
with tf.io.gfile.GFile(ref_path) as ref_stream, tf.io.gfile.GFile(hyp_path) as sys_stream:
bleu = corpus_bleu(sys_stream, [ref_stream], force=True)
return bleu.score
示例13: corpus_bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def corpus_bleu(sys_sents: List[str],
refs_sents: List[List[str]],
smooth_method: str = 'exp',
smooth_value: float = None,
force: bool = True,
lowercase: bool = False,
tokenizer: str = '13a',
use_effective_order: bool = False):
sys_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in sys_sents]
refs_sents = [[utils_prep.normalize(sent, lowercase, tokenizer) for sent in ref_sents]
for ref_sents in refs_sents]
return sacrebleu.corpus_bleu(sys_sents, refs_sents, smooth_method, smooth_value, force,
lowercase=False, tokenize='none', use_effective_order=use_effective_order).score
示例14: sentence_bleu
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def sentence_bleu(sys_sent: str,
ref_sents: List[str],
smooth_method: str = 'floor',
smooth_value: float = None,
lowercase: bool = False,
tokenizer: str = '13a',
use_effective_order: bool = True):
return corpus_bleu([sys_sent], [[ref] for ref in ref_sents], smooth_method, smooth_value, force=True,
lowercase=lowercase, tokenizer=tokenizer, use_effective_order=use_effective_order)
示例15: validate
# 需要导入模块: import sacrebleu [as 别名]
# 或者: from sacrebleu import corpus_bleu [as 别名]
def validate(model, test_data, golden_file, beam_size=8, alpha=0.6, max_time_step=100):
"""For development Only"""
pp = PostProcess()
ref_stream = []
for line in open(golden_file+'.input_clean'):
if line.startswith('# ::tokens '):
o = json.loads(line[len('# ::tokens '):].strip())
ref_stream.append(' '.join(o).lower())
# gold model output
graph, gold_sys_stream, _, abstract = read_file(golden_file+'.preproc')
ref_streams = [ref_stream]
sys_stream = []
for batch in test_data:
res = generate_batch(model, batch, beam_size, alpha, max_time_step)
sys_stream.extend(res['token'])
assert len(sys_stream) == len(ref_stream)
sys_stream = [ pp.post_process(o, abstract[i], graph[i]) for i, o in enumerate(sys_stream)]
bleu = sacrebleu.corpus_bleu(sys_stream, ref_streams,
force=True, lowercase=True,
tokenize='none').score
chrf = sacrebleu.corpus_chrf(sys_stream, ref_stream)
return bleu, chrf