本文整理汇总了Python中pycocoevalcap.tokenizer.ptbtokenizer.PTBTokenizer方法的典型用法代码示例。如果您正苦于以下问题:Python ptbtokenizer.PTBTokenizer方法的具体用法?Python ptbtokenizer.PTBTokenizer怎么用?Python ptbtokenizer.PTBTokenizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycocoevalcap.tokenizer.ptbtokenizer
的用法示例。
在下文中一共展示了ptbtokenizer.PTBTokenizer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def __init__(self, args, task):
super().__init__(args, task)
self.task = task
self.generator = SimpleSequenceGenerator(beam=args.scst_beam,
penalty=args.scst_penalty,
max_pos=args.max_target_positions,
eos_index=task.target_dictionary.eos_index)
# Needed for decoding model output to string
self.conf_tokenizer = encoders.build_tokenizer(args)
self.conf_decoder = encoders.build_bpe(args)
self.captions_dict = task.target_dictionary
# Tokenizer needed for computing CIDEr scores
self.tokenizer = PTBTokenizer()
self.scorer = Cider()
示例2: get_dcc_scores
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def get_dcc_scores(self):
imgIds = self.params['image_id']
# imgIds = self.coco.getImgIds()
gts = {}
res = {}
for imgId in imgIds:
gts[imgId] = self.coco.imgToAnns[imgId]
res[imgId] = self.cocoRes.imgToAnns[imgId]
tokenizer = PTBTokenizer()
gts = tokenizer.tokenize(gts)
res = tokenizer.tokenize(res)
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(df='noc_test_freq'), "CIDEr"),
(Spice(), "SPICE")
]
score_dict = {}
for scorer, method in scorers:
print('computing %s score...' % (scorer.method()))
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, scs, m in zip(score, scores, method):
score_dict[m] = sc
print("%s: %0.3f" % (m, sc))
else:
score_dict[method] = score
print("%s: %0.3f" % (method, score))
return score_dict
示例3: tokenize_captions
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def tokenize_captions(output_dir, split, coco):
image_ids = data.read_image_ids(os.path.join(output_dir, f'{split}-ids.txt'), non_redundant=True)
gts = dict()
for image_id in image_ids:
caps = coco.imgToAnns[image_id]
gts[image_id] = caps
return PTBTokenizer().tokenize(gts)
示例4: __init__
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def __init__(self, ground_truth_filenames=None, prediction_filename=None,
tious=None, max_proposals=1000,
prediction_fields=PREDICTION_FIELDS, verbose=False):
# Check that the gt and submission files exist and load them
if len(tious) == 0:
raise IOError('Please input a valid tIoU.')
if not ground_truth_filenames:
raise IOError('Please input a valid ground truth file.')
if not prediction_filename:
raise IOError('Please input a valid prediction file.')
self.verbose = verbose
self.tious = tious
self.max_proposals = max_proposals
self.pred_fields = prediction_fields
self.ground_truths = self.import_ground_truths(ground_truth_filenames)
self.prediction = self.import_prediction(prediction_filename)
self.tokenizer = PTBTokenizer()
# Set up scorers, if not verbose, we only use the one we're
# testing on: METEOR
if self.verbose:
self.scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(),"METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr")
]
else:
self.scorers = [(Meteor(), "METEOR")]
示例5: score
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
self.eval = {}
self.imgToEval = {}
gts = {}
res = {}
for ID in IDs:
# print ID
gts[ID] = GT[ID]
res[ID] = RES[ID]
print('tokenization...')
tokenizer = PTBTokenizer()
gts = tokenizer.tokenize(gts)
res = tokenizer.tokenize(res)
# =================================================
# Set up scorers
# =================================================
print('setting up scorers...')
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(),"METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr"),
#(Spice(), "SPICE")
]
# =================================================
# Compute scores
# =================================================
eval = {}
for scorer, method in scorers:
print('computing %s score...' % (scorer.method()))
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, scs, m in zip(score, scores, method):
self.setEval(sc, m)
self.setImgToEvalImgs(scs, IDs, m)
print("%s: %0.3f" % (m, sc))
else:
self.setEval(score, method)
self.setImgToEvalImgs(scores, IDs, method)
print("%s: %0.3f" % (method, score))
# for metric, score in self.eval.items():
# print '%s: %.3f'%(metric, score)
return self.eval
示例6: score
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
self.eval = {}
self.imgToEval = {}
gts = {}
res = {}
for ID in IDs:
gts[ID] = GT[ID]
res[ID] = RES[ID]
print 'tokenization...'
tokenizer = PTBTokenizer()
gts = tokenizer.tokenize(gts)
res = tokenizer.tokenize(res)
# =================================================
# Set up scorers
# =================================================
print 'setting up scorers...'
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(),"METEOR"),
(Rouge(), "ROUGE_L"),
(Cider(), "CIDEr")
]
# =================================================
# Compute scores
# =================================================
eval = {}
for scorer, method in scorers:
print 'computing %s score...'%(scorer.method())
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
for sc, scs, m in zip(score, scores, method):
self.setEval(sc, m)
self.setImgToEvalImgs(scs, IDs, m)
#print "%s: %0.3f"%(m, sc)
else:
self.setEval(score, method)
self.setImgToEvalImgs(scores, IDs, method)
#print "%s: %0.3f"%(method, score)
for metric, score in self.eval.items():
print '%s: %.3f'%(metric, score)
return self.eval
示例7: eval_div_stats
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_div_stats(preds_n, model_id, split):
tokenizer = PTBTokenizer()
capsById = {}
for i, d in enumerate(preds_n):
d['id'] = i
capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]
n_caps_perimg = len(capsById[capsById.keys()[0]])
print(n_caps_perimg)
_capsById = capsById # save the untokenized version
capsById = tokenizer.tokenize(capsById)
div_1, adiv_1 = compute_div_n(capsById,1)
div_2, adiv_2 = compute_div_n(capsById,2)
globdiv_1, _= compute_global_div_n(capsById,1)
print('Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n'%(div_1,div_2, globdiv_1))
# compute mbleu
scorer = Bleu(4)
all_scrs = []
scrperimg = np.zeros((n_caps_perimg, len(capsById)))
for i in range(n_caps_perimg):
tempRefsById = {}
candsById = {}
for k in capsById:
tempRefsById[k] = capsById[k][:i] + capsById[k][i+1:]
candsById[k] = [capsById[k][i]]
score, scores = scorer.compute_score(tempRefsById, candsById)
all_scrs.append(score)
scrperimg[i,:] = scores[1]
all_scrs = np.array(all_scrs)
out = {}
out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1}
for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()):
out['overall'].update({'mBLeu_%d'%(k+1): score})
imgToEval = {}
for i,imgid in enumerate(capsById.keys()):
imgToEval[imgid] = {'mBleu_2' : scrperimg[:,i].mean()}
imgToEval[imgid]['individuals'] = []
for j, d in enumerate(_capsById[imgid]):
imgToEval[imgid]['individuals'].append(preds_n[d['id']])
imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j,i]
out['ImgToEval'] = imgToEval
print('Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4')
print(all_scrs.mean(axis=0))
return out
示例8: eval_div_stats
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_div_stats(dataset, preds_n, model_id, split):
tokenizer = PTBTokenizer()
capsById = {}
for i, d in enumerate(preds_n):
d['id'] = i
capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]
n_caps_perimg = len(capsById[list(capsById.keys())[0]])
print(n_caps_perimg)
_capsById = capsById # save the untokenized version
capsById = tokenizer.tokenize(capsById)
div_1, adiv_1 = compute_div_n(capsById,1)
div_2, adiv_2 = compute_div_n(capsById,2)
globdiv_1, _= compute_global_div_n(capsById,1)
print('Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n'%(div_1,div_2, globdiv_1))
# compute mbleu
scorer = Bleu(4)
all_scrs = []
scrperimg = np.zeros((n_caps_perimg, len(capsById)))
for i in range(n_caps_perimg):
tempRefsById = {}
candsById = {}
for k in capsById:
tempRefsById[k] = capsById[k][:i] + capsById[k][i+1:]
candsById[k] = [capsById[k][i]]
score, scores = scorer.compute_score(tempRefsById, candsById)
all_scrs.append(score)
scrperimg[i,:] = scores[1]
all_scrs = np.array(all_scrs)
out = {}
out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1}
for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()):
out['overall'].update({'mBLeu_%d'%(k+1): score})
imgToEval = {}
for i,imgid in enumerate(capsById.keys()):
imgToEval[imgid] = {'mBleu_2' : scrperimg[:,i].mean()}
imgToEval[imgid]['individuals'] = []
for j, d in enumerate(_capsById[imgid]):
imgToEval[imgid]['individuals'].append(preds_n[d['id']])
imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j,i]
out['ImgToEval'] = imgToEval
print('Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4')
print(all_scrs.mean(axis=0))
return out
示例9: eval_self_cider
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_self_cider(dataset, preds_n, model_id, split):
cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json')
coco = getCOCO(dataset)
valids = coco.getImgIds()
# Get Cider_scorer
Cider_scorer = Cider(df='corpus')
tokenizer = PTBTokenizer()
gts = {}
for imgId in valids:
gts[imgId] = coco.imgToAnns[imgId]
gts = tokenizer.tokenize(gts)
for imgId in valids:
Cider_scorer.cider_scorer += (None, gts[imgId])
Cider_scorer.cider_scorer.compute_doc_freq()
Cider_scorer.cider_scorer.ref_len = np.log(float(len(Cider_scorer.cider_scorer.crefs)))
# Prepare captions
capsById = {}
for d in preds_n:
capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]
capsById = tokenizer.tokenize(capsById)
imgIds = list(capsById.keys())
scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds])
def get_div(eigvals):
eigvals = np.clip(eigvals, 0, None)
return -np.log(np.sqrt(eigvals[-1]) / (np.sqrt(eigvals).sum())) / np.log(len(eigvals))
sc_scores = [get_div(np.linalg.eigvalsh(_/10)) for _ in scores]
score = np.mean(np.array(sc_scores))
imgToEval = {}
for i, image_id in enumerate(imgIds):
imgToEval[image_id] = {'self_cider': sc_scores[i], 'self_cider_mat': scores[i].tolist()}
return {'overall': {'self_cider': score}, 'imgToEval': imgToEval}
return score
示例10: score
# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
# edited by rgh
#self.eval = {}
self.eval = OrderedDict()
self.imgToEval = {}
gts = {}
res = {}
for ID in IDs:
# print ID
gts[ID] = GT[ID]
res[ID] = RES[ID]
print('tokenization...')
tokenizer = PTBTokenizer()
gts = tokenizer.tokenize(gts)
res = tokenizer.tokenize(res)
# =================================================
# Set up scorers
# =================================================
print('setting up scorers...')
# edited by rgh
# scorers = [
# (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
# (Meteor(),"METEOR"),
# (Rouge(), "ROUGE_L"),
# (Cider(), "CIDEr"),
# #(Spice(), "SPICE")
# ]
scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(), "METEOR"),
(Cider(), "CIDEr"),
(Rouge(), "ROUGE_L"),
# (Spice(), "SPICE")
]
# =================================================
# Compute scores
# =================================================
eval = {}
for scorer, method in scorers:
print('computing %s score...' % (scorer.method()))
score, scores = scorer.compute_score(gts, res)
if type(method) == list:
# added by rgh
# for sc, scs, m in zip(score, scores, method):
# self.setEval(sc, m)
# self.setImgToEvalImgs(scs, IDs, m)
# print("%s: %0.3f" % (m, sc))
self.setEval("%.4f" % score[-1], method[-1])
self.setImgToEvalImgs(scores[-1], IDs, method[-1])
print("%s: %0.4f" % (method[-1], score[-1]))
else:
self.setEval("%.4f" % score, method)
self.setImgToEvalImgs(scores, IDs, method)
print("%s: %0.4f" % (method, score))
# for metric, score in self.eval.items():
# print '%s: %.3f'%(metric, score)
return self.eval