当前位置: 首页>>代码示例>>Python>>正文


Python ptbtokenizer.PTBTokenizer方法代码示例

本文整理汇总了Python中pycocoevalcap.tokenizer.ptbtokenizer.PTBTokenizer方法的典型用法代码示例。如果您正苦于以下问题:Python ptbtokenizer.PTBTokenizer方法的具体用法?Python ptbtokenizer.PTBTokenizer怎么用?Python ptbtokenizer.PTBTokenizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycocoevalcap.tokenizer.ptbtokenizer的用法示例。


在下文中一共展示了ptbtokenizer.PTBTokenizer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def __init__(self, args, task):
        super().__init__(args, task)
        self.task = task

        self.generator = SimpleSequenceGenerator(beam=args.scst_beam,
                                                 penalty=args.scst_penalty,
                                                 max_pos=args.max_target_positions,
                                                 eos_index=task.target_dictionary.eos_index)

        # Needed for decoding model output to string
        self.conf_tokenizer = encoders.build_tokenizer(args)
        self.conf_decoder = encoders.build_bpe(args)
        self.captions_dict = task.target_dictionary

        # Tokenizer needed for computing CIDEr scores
        self.tokenizer = PTBTokenizer()
        self.scorer = Cider() 
开发者ID:krasserm,项目名称:fairseq-image-captioning,代码行数:19,代码来源:criterion.py

示例2: get_dcc_scores

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def get_dcc_scores(self):

    imgIds = self.params['image_id']
    # imgIds = self.coco.getImgIds()
    gts = {}
    res = {}
    for imgId in imgIds:
        gts[imgId] = self.coco.imgToAnns[imgId]
        res[imgId] = self.cocoRes.imgToAnns[imgId]

    tokenizer = PTBTokenizer()
    gts = tokenizer.tokenize(gts)
    res = tokenizer.tokenize(res)
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(df='noc_test_freq'), "CIDEr"),
        (Spice(), "SPICE")
    ]
    score_dict = {}
    for scorer, method in scorers:
      print('computing %s score...' % (scorer.method()))
      score, scores = scorer.compute_score(gts, res)
      if type(method) == list:
        for sc, scs, m in zip(score, scores, method):
          score_dict[m] = sc
          print("%s: %0.3f" % (m, sc))
      else:
        score_dict[method] = score
        print("%s: %0.3f" % (method, score))

    return score_dict 
开发者ID:jiasenlu,项目名称:NeuralBabyTalk,代码行数:35,代码来源:coco_eval.py

示例3: tokenize_captions

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def tokenize_captions(output_dir, split, coco):
    image_ids = data.read_image_ids(os.path.join(output_dir, f'{split}-ids.txt'), non_redundant=True)

    gts = dict()

    for image_id in image_ids:
        caps = coco.imgToAnns[image_id]
        gts[image_id] = caps

    return PTBTokenizer().tokenize(gts) 
开发者ID:krasserm,项目名称:fairseq-image-captioning,代码行数:12,代码来源:tokenize_captions_scst.py

示例4: __init__

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def __init__(self, ground_truth_filenames=None, prediction_filename=None,
                 tious=None, max_proposals=1000,
                 prediction_fields=PREDICTION_FIELDS, verbose=False):
        # Check that the gt and submission files exist and load them
        if len(tious) == 0:
            raise IOError('Please input a valid tIoU.')
        if not ground_truth_filenames:
            raise IOError('Please input a valid ground truth file.')
        if not prediction_filename:
            raise IOError('Please input a valid prediction file.')

        self.verbose = verbose
        self.tious = tious
        self.max_proposals = max_proposals
        self.pred_fields = prediction_fields
        self.ground_truths = self.import_ground_truths(ground_truth_filenames)
        self.prediction = self.import_prediction(prediction_filename)
        self.tokenizer = PTBTokenizer()

        # Set up scorers, if not verbose, we only use the one we're
        # testing on: METEOR
        if self.verbose:
            self.scorers = [
                (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
                (Meteor(),"METEOR"),
                (Rouge(), "ROUGE_L"),
                (Cider(), "CIDEr")
            ]
        else:
            self.scorers = [(Meteor(), "METEOR")] 
开发者ID:JaywongWang,项目名称:DenseVideoCaptioning,代码行数:32,代码来源:evaluate_old.py

示例5: score

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            #(Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.3f" % (method, score))

        # for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval 
开发者ID:xiadingZ,项目名称:video-caption-openNMT.pytorch,代码行数:48,代码来源:cocoeval.py

示例6: score

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
        self.eval = {}
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print 'tokenization...'
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print 'setting up scorers...'
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print 'computing %s score...'%(scorer.method())
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, IDs, m)
                    #print "%s: %0.3f"%(m, sc)
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                #print "%s: %0.3f"%(method, score)
                
        for metric, score in self.eval.items():
            print '%s: %.3f'%(metric, score)
        return self.eval 
开发者ID:VisionLearningGroup,项目名称:caption-guided-saliency,代码行数:46,代码来源:cocoeval.py

示例7: eval_div_stats

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_div_stats(preds_n, model_id, split):
    tokenizer = PTBTokenizer()

    capsById = {}
    for i, d in enumerate(preds_n):
        d['id'] = i
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    n_caps_perimg = len(capsById[capsById.keys()[0]])
    print(n_caps_perimg)
    _capsById = capsById # save the untokenized version
    capsById = tokenizer.tokenize(capsById)

    div_1, adiv_1 = compute_div_n(capsById,1)
    div_2, adiv_2 = compute_div_n(capsById,2)

    globdiv_1, _= compute_global_div_n(capsById,1)

    print('Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n'%(div_1,div_2, globdiv_1))

    # compute mbleu
    scorer = Bleu(4)
    all_scrs = []
    scrperimg = np.zeros((n_caps_perimg, len(capsById)))

    for i in range(n_caps_perimg):
        tempRefsById = {}
        candsById = {}
        for k in capsById:
            tempRefsById[k] = capsById[k][:i] + capsById[k][i+1:]
            candsById[k] = [capsById[k][i]]

        score, scores = scorer.compute_score(tempRefsById, candsById)
        all_scrs.append(score)
        scrperimg[i,:] = scores[1]

    all_scrs = np.array(all_scrs)
    
    out = {}
    out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1}
    for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()):
        out['overall'].update({'mBLeu_%d'%(k+1): score})
    imgToEval = {}
    for i,imgid in enumerate(capsById.keys()):
        imgToEval[imgid] = {'mBleu_2' : scrperimg[:,i].mean()}
        imgToEval[imgid]['individuals'] = []
        for j, d in enumerate(_capsById[imgid]):
            imgToEval[imgid]['individuals'].append(preds_n[d['id']])
            imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j,i]
    out['ImgToEval'] = imgToEval

    print('Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4')
    print(all_scrs.mean(axis=0))

    return out 
开发者ID:ruotianluo,项目名称:GoogleConceptualCaptioning,代码行数:57,代码来源:eval_multi.py

示例8: eval_div_stats

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_div_stats(dataset, preds_n, model_id, split):
    tokenizer = PTBTokenizer()

    capsById = {}
    for i, d in enumerate(preds_n):
        d['id'] = i
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    n_caps_perimg = len(capsById[list(capsById.keys())[0]])
    print(n_caps_perimg)
    _capsById = capsById # save the untokenized version
    capsById = tokenizer.tokenize(capsById)

    div_1, adiv_1 = compute_div_n(capsById,1)
    div_2, adiv_2 = compute_div_n(capsById,2)

    globdiv_1, _= compute_global_div_n(capsById,1)

    print('Diversity Statistics are as follows: \n Div1: %.2f, Div2: %.2f, gDiv1: %d\n'%(div_1,div_2, globdiv_1))

    # compute mbleu
    scorer = Bleu(4)
    all_scrs = []
    scrperimg = np.zeros((n_caps_perimg, len(capsById)))

    for i in range(n_caps_perimg):
        tempRefsById = {}
        candsById = {}
        for k in capsById:
            tempRefsById[k] = capsById[k][:i] + capsById[k][i+1:]
            candsById[k] = [capsById[k][i]]

        score, scores = scorer.compute_score(tempRefsById, candsById)
        all_scrs.append(score)
        scrperimg[i,:] = scores[1]

    all_scrs = np.array(all_scrs)
    
    out = {}
    out['overall'] = {'Div1': div_1, 'Div2': div_2, 'gDiv1': globdiv_1}
    for k, score in zip(range(4), all_scrs.mean(axis=0).tolist()):
        out['overall'].update({'mBLeu_%d'%(k+1): score})
    imgToEval = {}
    for i,imgid in enumerate(capsById.keys()):
        imgToEval[imgid] = {'mBleu_2' : scrperimg[:,i].mean()}
        imgToEval[imgid]['individuals'] = []
        for j, d in enumerate(_capsById[imgid]):
            imgToEval[imgid]['individuals'].append(preds_n[d['id']])
            imgToEval[imgid]['individuals'][-1]['mBleu_2'] = scrperimg[j,i]
    out['ImgToEval'] = imgToEval

    print('Mean mutual Bleu scores on this set is:\nmBLeu_1, mBLeu_2, mBLeu_3, mBLeu_4')
    print(all_scrs.mean(axis=0))

    return out 
开发者ID:ruotianluo,项目名称:self-critical.pytorch,代码行数:57,代码来源:eval_multi.py

示例9: eval_self_cider

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def eval_self_cider(dataset, preds_n, model_id, split):
    cache_path = os.path.join('eval_results/', model_id + '_' + split + '_n.json')

    coco = getCOCO(dataset)
    valids = coco.getImgIds()
    
    # Get Cider_scorer
    Cider_scorer = Cider(df='corpus')

    tokenizer = PTBTokenizer()
    gts = {}
    for imgId in valids:
        gts[imgId] = coco.imgToAnns[imgId]
    gts  = tokenizer.tokenize(gts)

    for imgId in valids:
        Cider_scorer.cider_scorer += (None, gts[imgId])
    Cider_scorer.cider_scorer.compute_doc_freq()
    Cider_scorer.cider_scorer.ref_len = np.log(float(len(Cider_scorer.cider_scorer.crefs)))

    # Prepare captions
    capsById = {}
    for d in preds_n:
        capsById[d['image_id']] = capsById.get(d['image_id'], []) + [d]

    capsById = tokenizer.tokenize(capsById)
    imgIds = list(capsById.keys())
    scores = Cider_scorer.my_self_cider([capsById[_] for _ in imgIds])

    def get_div(eigvals):
        eigvals = np.clip(eigvals, 0, None)
        return -np.log(np.sqrt(eigvals[-1]) / (np.sqrt(eigvals).sum())) / np.log(len(eigvals))
    sc_scores = [get_div(np.linalg.eigvalsh(_/10)) for _ in scores]
    score = np.mean(np.array(sc_scores))
    
    imgToEval = {}
    for i, image_id in enumerate(imgIds):
        imgToEval[image_id] = {'self_cider': sc_scores[i], 'self_cider_mat': scores[i].tolist()}
    return {'overall': {'self_cider': score}, 'imgToEval': imgToEval}


    return score 
开发者ID:ruotianluo,项目名称:self-critical.pytorch,代码行数:44,代码来源:eval_multi.py

示例10: score

# 需要导入模块: from pycocoevalcap.tokenizer import ptbtokenizer [as 别名]
# 或者: from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer [as 别名]
def score(self, GT, RES, IDs):
        # edited by rgh
        #self.eval = {}
        self.eval = OrderedDict()
        self.imgToEval = {}
        gts = {}
        res = {}
        for ID in IDs:
            #            print ID
            gts[ID] = GT[ID]
            res[ID] = RES[ID]
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        # edited by rgh
        # scorers = [
        #     (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        #     (Meteor(),"METEOR"),
        #     (Rouge(), "ROUGE_L"),
        #     (Cider(), "CIDEr"),
        #     #(Spice(), "SPICE")
        # ]
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(), "METEOR"),
            (Cider(), "CIDEr"),
            (Rouge(), "ROUGE_L"),
            # (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                # added by rgh
                # for sc, scs, m in zip(score, scores, method):
                #     self.setEval(sc, m)
                #     self.setImgToEvalImgs(scs, IDs, m)
                #     print("%s: %0.3f" % (m, sc))
                self.setEval("%.4f" % score[-1], method[-1])
                self.setImgToEvalImgs(scores[-1], IDs, method[-1])
                print("%s: %0.4f" % (method[-1], score[-1]))
            else:
                self.setEval("%.4f" % score, method)
                self.setImgToEvalImgs(scores, IDs, method)
                print("%s: %0.4f" % (method, score))

        # for metric, score in self.eval.items():
        #    print '%s: %.3f'%(metric, score)
        return self.eval 
开发者ID:Sundrops,项目名称:video-caption.pytorch,代码行数:62,代码来源:cocoeval.py


注:本文中的pycocoevalcap.tokenizer.ptbtokenizer.PTBTokenizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。