本文整理汇总了Python中evaluation.t2i方法的典型用法代码示例。如果您正苦于以下问题:Python evaluation.t2i方法的具体用法?Python evaluation.t2i怎么用?Python evaluation.t2i使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类evaluation
的用法示例。
在下文中一共展示了evaluation.t2i方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import t2i [as 别名]
def validate(opt, val_loader, model):
# compute the encoding for all the validation images and captions
img_embs, cap_embs = encode_data(
model, val_loader, opt.log_step, logging.info)
# caption retrieval
(r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, measure=opt.measure)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1, r5, r10, medr, meanr))
# image retrieval
(r1i, r5i, r10i, medri, meanr) = t2i(
img_embs, cap_embs, measure=opt.measure)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1i, r5i, r10i, medri, meanr))
# sum of recalls to be used for early stopping
currscore = r1 + r5 + r10 + r1i + r5i + r10i
# record metrics in tensorboard
tb_logger.log_value('r1', r1, step=model.Eiters)
tb_logger.log_value('r5', r5, step=model.Eiters)
tb_logger.log_value('r10', r10, step=model.Eiters)
tb_logger.log_value('medr', medr, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('r1i', r1i, step=model.Eiters)
tb_logger.log_value('r5i', r5i, step=model.Eiters)
tb_logger.log_value('r10i', r10i, step=model.Eiters)
tb_logger.log_value('medri', medri, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore
示例2: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import t2i [as 别名]
def validate(opt, val_loader, model, tb_logger):
# compute the encoding for all the validation images and captions
print("start validate")
model.val_start()
img_embs, cap_embs, cap_masks = encode_data(
model, val_loader, opt.log_step, logging.info)
# caption retrieval
(i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr), (t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr) = i2t(img_embs, cap_embs, cap_masks, measure=opt.measure, model=model)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr))
# image retrieval
#(r1i, r5i, r10i, medri, meanr) = t2i(
# img_embs, cap_embs, measure=opt.measure, model=model)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr))
# sum of recalls to be used for early stopping
currscore = i2t_r1 + i2t_r5 + i2t_r10 + t2i_r1 + t2i_r5 + t2i_r10
# record metrics in tensorboard
tb_logger.log_value('i2t_r1', i2t_r1, step=model.Eiters)
tb_logger.log_value('i2t_r5', i2t_r5, step=model.Eiters)
tb_logger.log_value('i2t_r10', i2t_r10, step=model.Eiters)
tb_logger.log_value('i2t_medr', i2t_medr, step=model.Eiters)
tb_logger.log_value('i2t_meanr', i2t_meanr, step=model.Eiters)
tb_logger.log_value('t2i_r1', t2i_r1, step=model.Eiters)
tb_logger.log_value('t2i_r5', t2i_r5, step=model.Eiters)
tb_logger.log_value('t2i_r10', t2i_r10, step=model.Eiters)
tb_logger.log_value('t2i_medr', t2i_medr, step=model.Eiters)
tb_logger.log_value('t2i_meanr', t2i_meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore
示例3: parse_args
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import t2i [as 别名]
def parse_args():
# Hyper Parameters
parser = argparse.ArgumentParser()
parser.add_argument('--rootpath', type=str, default=ROOT_PATH,
help='path to datasets. (default: %s)'%ROOT_PATH)
parser.add_argument('trainCollection', type=str, help='train collection')
parser.add_argument('valCollection', type=str, help='validation collection')
parser.add_argument('testCollection', type=str, help='test collection')
parser.add_argument('--n_caption', type=int, default=20, help='number of captions of each image/video (default: 1)')
parser.add_argument('--overwrite', type=int, default=0, choices=[0,1], help='overwrite existed file. (default: 0)')
# model
parser.add_argument('--model', type=str, default='dual_encoding', help='model name. (default: dual_encoding)')
parser.add_argument('--concate', type=str, default='full', help='feature concatenation style. (full|reduced) full=level 1+2+3; reduced=level 2+3')
parser.add_argument('--measure', type=str, default='cosine', help='measure method. (default: cosine)')
parser.add_argument('--dropout', default=0.2, type=float, help='dropout rate (default: 0.2)')
# text-side multi-level encoding
parser.add_argument('--vocab', type=str, default='word_vocab_5', help='word vocabulary. (default: word_vocab_5)')
parser.add_argument('--word_dim', type=int, default=500, help='word embedding dimension')
parser.add_argument('--text_rnn_size', type=int, default=512, help='text rnn encoder size. (default: 1024)')
parser.add_argument('--text_kernel_num', default=512, type=int, help='number of each kind of text kernel')
parser.add_argument('--text_kernel_sizes', default='2-3-4', type=str, help='dash-separated kernel size to use for text convolution')
parser.add_argument('--text_norm', action='store_true', help='normalize the text embeddings at last layer')
# video-side multi-level encoding
parser.add_argument('--visual_feature', type=str, default='resnet-152-img1k-flatten0_outputos', help='visual feature.')
parser.add_argument('--visual_rnn_size', type=int, default=1024, help='visual rnn encoder size')
parser.add_argument('--visual_kernel_num', default=512, type=int, help='number of each kind of visual kernel')
parser.add_argument('--visual_kernel_sizes', default='2-3-4-5', type=str, help='dash-separated kernel size to use for visual convolution')
parser.add_argument('--visual_norm', action='store_true', help='normalize the visual embeddings at last layer')
# common space learning
parser.add_argument('--text_mapping_layers', type=str, default='0-2048', help='text fully connected layers for common space learning. (default: 0-2048)')
parser.add_argument('--visual_mapping_layers', type=str, default='0-2048', help='visual fully connected layers for common space learning. (default: 0-2048)')
# loss
parser.add_argument('--loss_fun', type=str, default='mrl', help='loss function')
parser.add_argument('--margin', type=float, default=0.2, help='rank loss margin')
parser.add_argument('--direction', type=str, default='all', help='retrieval direction (all|t2i|i2t)')
parser.add_argument('--max_violation', action='store_true', help='use max instead of sum in the rank loss')
parser.add_argument('--cost_style', type=str, default='sum', help='cost style (sum, mean). (default: sum)')
# optimizer
parser.add_argument('--optimizer', type=str, default='adam', help='optimizer. (default: rmsprop)')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='initial learning rate')
parser.add_argument('--lr_decay_rate', default=0.99, type=float, help='learning rate decay rate. (default: 0.99)')
parser.add_argument('--grad_clip', type=float, default=2, help='gradient clipping threshold')
parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
parser.add_argument('--val_metric', default='recall', type=str, help='performance metric for validation (mir|recall)')
# misc
parser.add_argument('--num_epochs', default=50, type=int, help='Number of training epochs.')
parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.')
parser.add_argument('--workers', default=5, type=int, help='Number of data loader workers.')
parser.add_argument('--postfix', default='runs_0', help='Path to save the model and Tensorboard log.')
parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.')
parser.add_argument('--cv_name', default='cvpr_2019', type=str, help='')
args = parser.parse_args()
return args
示例4: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import t2i [as 别名]
def validate(opt, val_loader, model):
# compute the encoding for all the validation images and captions
img_embs, cap_embs, cap_lens = encode_data(
model, val_loader, opt.log_step, logging.info)
img_embs = numpy.array([img_embs[i] for i in range(0, len(img_embs), 5)])
start = time.time()
if opt.cross_attn == 't2i':
sims = shard_xattn_t2i(img_embs, cap_embs, cap_lens, opt, shard_size=128)
elif opt.cross_attn == 'i2t':
sims = shard_xattn_i2t(img_embs, cap_embs, cap_lens, opt, shard_size=128)
else:
raise NotImplementedError
end = time.time()
print("calculate similarity time:", end-start)
# caption retrieval
(r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, cap_lens, sims)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1, r5, r10, medr, meanr))
# image retrieval
(r1i, r5i, r10i, medri, meanr) = t2i(
img_embs, cap_embs, cap_lens, sims)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1i, r5i, r10i, medri, meanr))
# sum of recalls to be used for early stopping
currscore = r1 + r5 + r10 + r1i + r5i + r10i
# record metrics in tensorboard
tb_logger.log_value('r1', r1, step=model.Eiters)
tb_logger.log_value('r5', r5, step=model.Eiters)
tb_logger.log_value('r10', r10, step=model.Eiters)
tb_logger.log_value('medr', medr, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('r1i', r1i, step=model.Eiters)
tb_logger.log_value('r5i', r5i, step=model.Eiters)
tb_logger.log_value('r10i', r10i, step=model.Eiters)
tb_logger.log_value('medri', medri, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore