本文整理汇总了Python中evaluation.i2t方法的典型用法代码示例。如果您正苦于以下问题:Python evaluation.i2t方法的具体用法?Python evaluation.i2t怎么用?Python evaluation.i2t使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类evaluation
的用法示例。
在下文中一共展示了evaluation.i2t方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import i2t [as 别名]
def validate(opt, val_loader, model):
# compute the encoding for all the validation images and captions
img_embs, cap_embs = encode_data(
model, val_loader, opt.log_step, logging.info)
# caption retrieval
(r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, measure=opt.measure)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1, r5, r10, medr, meanr))
# image retrieval
(r1i, r5i, r10i, medri, meanr) = t2i(
img_embs, cap_embs, measure=opt.measure)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1i, r5i, r10i, medri, meanr))
# sum of recalls to be used for early stopping
currscore = r1 + r5 + r10 + r1i + r5i + r10i
# record metrics in tensorboard
tb_logger.log_value('r1', r1, step=model.Eiters)
tb_logger.log_value('r5', r5, step=model.Eiters)
tb_logger.log_value('r10', r10, step=model.Eiters)
tb_logger.log_value('medr', medr, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('r1i', r1i, step=model.Eiters)
tb_logger.log_value('r5i', r5i, step=model.Eiters)
tb_logger.log_value('r10i', r10i, step=model.Eiters)
tb_logger.log_value('medri', medri, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore
示例2: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import i2t [as 别名]
def validate(opt, val_loader, model, tb_logger):
# compute the encoding for all the validation images and captions
print("start validate")
model.val_start()
img_embs, cap_embs, cap_masks = encode_data(
model, val_loader, opt.log_step, logging.info)
# caption retrieval
(i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr), (t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr) = i2t(img_embs, cap_embs, cap_masks, measure=opt.measure, model=model)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(i2t_r1, i2t_r5, i2t_r10, i2t_medr, i2t_meanr))
# image retrieval
#(r1i, r5i, r10i, medri, meanr) = t2i(
# img_embs, cap_embs, measure=opt.measure, model=model)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(t2i_r1, t2i_r5, t2i_r10, t2i_medr, t2i_meanr))
# sum of recalls to be used for early stopping
currscore = i2t_r1 + i2t_r5 + i2t_r10 + t2i_r1 + t2i_r5 + t2i_r10
# record metrics in tensorboard
tb_logger.log_value('i2t_r1', i2t_r1, step=model.Eiters)
tb_logger.log_value('i2t_r5', i2t_r5, step=model.Eiters)
tb_logger.log_value('i2t_r10', i2t_r10, step=model.Eiters)
tb_logger.log_value('i2t_medr', i2t_medr, step=model.Eiters)
tb_logger.log_value('i2t_meanr', i2t_meanr, step=model.Eiters)
tb_logger.log_value('t2i_r1', t2i_r1, step=model.Eiters)
tb_logger.log_value('t2i_r5', t2i_r5, step=model.Eiters)
tb_logger.log_value('t2i_r10', t2i_r10, step=model.Eiters)
tb_logger.log_value('t2i_medr', t2i_medr, step=model.Eiters)
tb_logger.log_value('t2i_meanr', t2i_meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore
示例3: parse_args
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import i2t [as 别名]
def parse_args():
# Hyper Parameters
parser = argparse.ArgumentParser()
parser.add_argument('--rootpath', type=str, default=ROOT_PATH,
help='path to datasets. (default: %s)'%ROOT_PATH)
parser.add_argument('trainCollection', type=str, help='train collection')
parser.add_argument('valCollection', type=str, help='validation collection')
parser.add_argument('testCollection', type=str, help='test collection')
parser.add_argument('--n_caption', type=int, default=20, help='number of captions of each image/video (default: 1)')
parser.add_argument('--overwrite', type=int, default=0, choices=[0,1], help='overwrite existed file. (default: 0)')
# model
parser.add_argument('--model', type=str, default='dual_encoding', help='model name. (default: dual_encoding)')
parser.add_argument('--concate', type=str, default='full', help='feature concatenation style. (full|reduced) full=level 1+2+3; reduced=level 2+3')
parser.add_argument('--measure', type=str, default='cosine', help='measure method. (default: cosine)')
parser.add_argument('--dropout', default=0.2, type=float, help='dropout rate (default: 0.2)')
# text-side multi-level encoding
parser.add_argument('--vocab', type=str, default='word_vocab_5', help='word vocabulary. (default: word_vocab_5)')
parser.add_argument('--word_dim', type=int, default=500, help='word embedding dimension')
parser.add_argument('--text_rnn_size', type=int, default=512, help='text rnn encoder size. (default: 1024)')
parser.add_argument('--text_kernel_num', default=512, type=int, help='number of each kind of text kernel')
parser.add_argument('--text_kernel_sizes', default='2-3-4', type=str, help='dash-separated kernel size to use for text convolution')
parser.add_argument('--text_norm', action='store_true', help='normalize the text embeddings at last layer')
# video-side multi-level encoding
parser.add_argument('--visual_feature', type=str, default='resnet-152-img1k-flatten0_outputos', help='visual feature.')
parser.add_argument('--visual_rnn_size', type=int, default=1024, help='visual rnn encoder size')
parser.add_argument('--visual_kernel_num', default=512, type=int, help='number of each kind of visual kernel')
parser.add_argument('--visual_kernel_sizes', default='2-3-4-5', type=str, help='dash-separated kernel size to use for visual convolution')
parser.add_argument('--visual_norm', action='store_true', help='normalize the visual embeddings at last layer')
# common space learning
parser.add_argument('--text_mapping_layers', type=str, default='0-2048', help='text fully connected layers for common space learning. (default: 0-2048)')
parser.add_argument('--visual_mapping_layers', type=str, default='0-2048', help='visual fully connected layers for common space learning. (default: 0-2048)')
# loss
parser.add_argument('--loss_fun', type=str, default='mrl', help='loss function')
parser.add_argument('--margin', type=float, default=0.2, help='rank loss margin')
parser.add_argument('--direction', type=str, default='all', help='retrieval direction (all|t2i|i2t)')
parser.add_argument('--max_violation', action='store_true', help='use max instead of sum in the rank loss')
parser.add_argument('--cost_style', type=str, default='sum', help='cost style (sum, mean). (default: sum)')
# optimizer
parser.add_argument('--optimizer', type=str, default='adam', help='optimizer. (default: rmsprop)')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='initial learning rate')
parser.add_argument('--lr_decay_rate', default=0.99, type=float, help='learning rate decay rate. (default: 0.99)')
parser.add_argument('--grad_clip', type=float, default=2, help='gradient clipping threshold')
parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
parser.add_argument('--val_metric', default='recall', type=str, help='performance metric for validation (mir|recall)')
# misc
parser.add_argument('--num_epochs', default=50, type=int, help='Number of training epochs.')
parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.')
parser.add_argument('--workers', default=5, type=int, help='Number of data loader workers.')
parser.add_argument('--postfix', default='runs_0', help='Path to save the model and Tensorboard log.')
parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.')
parser.add_argument('--cv_name', default='cvpr_2019', type=str, help='')
args = parser.parse_args()
return args
示例4: validate
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import i2t [as 别名]
def validate(opt, val_loader, model):
# compute the encoding for all the validation images and captions
img_embs, cap_embs, cap_lens = encode_data(
model, val_loader, opt.log_step, logging.info)
img_embs = numpy.array([img_embs[i] for i in range(0, len(img_embs), 5)])
start = time.time()
if opt.cross_attn == 't2i':
sims = shard_xattn_t2i(img_embs, cap_embs, cap_lens, opt, shard_size=128)
elif opt.cross_attn == 'i2t':
sims = shard_xattn_i2t(img_embs, cap_embs, cap_lens, opt, shard_size=128)
else:
raise NotImplementedError
end = time.time()
print("calculate similarity time:", end-start)
# caption retrieval
(r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, cap_lens, sims)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1, r5, r10, medr, meanr))
# image retrieval
(r1i, r5i, r10i, medri, meanr) = t2i(
img_embs, cap_embs, cap_lens, sims)
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1i, r5i, r10i, medri, meanr))
# sum of recalls to be used for early stopping
currscore = r1 + r5 + r10 + r1i + r5i + r10i
# record metrics in tensorboard
tb_logger.log_value('r1', r1, step=model.Eiters)
tb_logger.log_value('r5', r5, step=model.Eiters)
tb_logger.log_value('r10', r10, step=model.Eiters)
tb_logger.log_value('medr', medr, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('r1i', r1i, step=model.Eiters)
tb_logger.log_value('r5i', r5i, step=model.Eiters)
tb_logger.log_value('r10i', r10i, step=model.Eiters)
tb_logger.log_value('medri', medri, step=model.Eiters)
tb_logger.log_value('meanr', meanr, step=model.Eiters)
tb_logger.log_value('rsum', currscore, step=model.Eiters)
return currscore
示例5: test_CAMP_model
# 需要导入模块: import evaluation [as 别名]
# 或者: from evaluation import i2t [as 别名]
def test_CAMP_model(config_path):
print("OK!")
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
parser = argparse.ArgumentParser()
#config_path = "./experiments/f30k_cross_attention/config_test.yaml"
with open(config_path) as f:
opt = yaml.load(f)
opt = EasyDict(opt['common'])
vocab = pickle.load(open(os.path.join(opt.vocab_path,
'%s_vocab.pkl' % opt.data_name), 'rb'))
opt.vocab_size = len(vocab)
train_logger = LogCollector()
print("----Start init model----")
CAMP = model.CAMP(opt)
CAMP.logger = train_logger
if opt.resume is not None:
ckp = torch.load(opt.resume)
CAMP.load_state_dict(ckp["model"])
CAMP.train_start()
print("----Model init success----")
"""
fake_img = torch.randn(16, 36, opt.img_dim)
fake_text = torch.ones(16, 32).long()
fake_lengths = torch.Tensor([32] * 16)
fake_pos = torch.ones(16, 32).long()
fake_ids = torch.ones(16).long()
CAMP.train_emb(fake_img, fake_text, fake_lengths,
instance_ids=fake_ids)
print("----Test train_emb success----")
"""
train_loader, val_loader = data.get_loaders(
opt.data_name, vocab, opt.crop_size, 128, 4, opt)
test_loader = data.get_test_loader("test", opt.data_name, vocab, opt.crop_size, 128, 4, opt)
CAMP.val_start()
img_embs, cap_embs, cap_masks = encode_data(
CAMP, test_loader, opt.log_step, logging.info)
(r1, r5, r10, medr, meanr), (r1i, r5i, r10i, medri, meanri), score_matrix= i2t(img_embs, cap_embs, cap_masks, measure=opt.measure,
model=CAMP, return_ranks=True)
logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1, r5, r10, medr, meanr))
logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
(r1i, r5i, r10i, medri, meanri))