本文整理汇总了Python中torch.backends.cudnn.fastest方法的典型用法代码示例。如果您正苦于以下问题:Python cudnn.fastest方法的具体用法?Python cudnn.fastest怎么用?Python cudnn.fastest使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.backends.cudnn
的用法示例。
在下文中一共展示了cudnn.fastest方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def main():
args = parse_args()
cfg.set_args(args.gpu_ids)
cudnn.fastest = True
cudnn.benchmark = True
tester = Tester(args.test_epoch)
tester._make_batch_generator()
tester._make_model()
preds = []
with torch.no_grad():
for itr, (input_img, cam_param) in enumerate(tqdm(tester.batch_generator)):
coord_out = tester.model(input_img, cam_param)
coord_out = coord_out.cpu().numpy()
preds.append(coord_out)
# evaluate
preds = np.concatenate(preds, axis=0)
tester._evaluate(preds, cfg.result_dir)
示例2: benchmark_inference
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def benchmark_inference(model, opts):
"""Benchmarks inference phase.
:param obj model: A model to benchmark
:param dict opts: A dictionary of parameters.
:rtype: tuple
:return: A tuple of (model_name, list of batch times)
"""
if opts['phase'] != 'inference':
raise "Phase in benchmark_inference func is '%s'" % opts['phase']
if opts['device'] == 'gpu' and opts['world_size'] != 1:
raise "GPU inference can only be used with one GPU (world_size: %d)." % opts['world_size']
# Batch, Channels, Height, Width
data = autograd.Variable(torch.randn((opts['batch_size'],) + model.input_shape))
if opts['device'] == 'gpu':
# TODO: Is it good to enable cuDNN autotuning (batch size is fixed)?
# https://github.com/soumith/cudnn.torch#modes
# https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
# How many iterations do we need to get cuDNN decide what kernels to use?
cudnn.benchmark = opts['cudnn_benchmark']
cudnn.fastest = opts['cudnn_fastest']
data = data.cuda()
model = model.cuda()
if opts['dtype'] == 'float16':
data = data.half()
model = model.half()
model.eval()
# Do warmup round
for i in range(opts['num_warmup_batches']):
model(data)
# Do benchmark round
batch_times = np.zeros(opts['num_batches'])
for i in range(opts['num_batches']):
start_time = timeit.default_timer()
model(data)
batch_times[i] = timeit.default_timer() - start_time
return (model.name, batch_times)
示例3: main
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def main():
args = parse_args()
cfg.set_args(args.gpu_ids)
cudnn.fastest = True
cudnn.benchmark = True
cudnn.deterministic = False
cudnn.enabled = True
tester = Tester(args.test_epoch)
tester._make_batch_generator()
tester._make_model()
preds = []
with torch.no_grad():
for itr, input_img in enumerate(tqdm(tester.batch_generator)):
# forward
coord_out = tester.model(input_img)
if cfg.flip_test:
flipped_input_img = flip(input_img, dims=3)
flipped_coord_out = tester.model(flipped_input_img)
flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1
for pair in tester.flip_pairs:
flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone()
coord_out = (coord_out + flipped_coord_out)/2.
vis = False
if vis:
filename = str(itr)
tmpimg = input_img[0].cpu().numpy()
tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(3,1,1) + np.array(cfg.pixel_mean).reshape(3,1,1)
tmpimg = tmpimg.astype(np.uint8)
tmpimg = tmpimg[::-1, :, :]
tmpimg = np.transpose(tmpimg,(1,2,0)).copy()
tmpkps = np.zeros((3,tester.joint_num))
tmpkps[:2,:] = coord_out[0,:,:2].cpu().numpy().transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0]
tmpkps[2,:] = 1
tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton)
cv2.imwrite(filename + '_output.jpg', tmpimg)
coord_out = coord_out.cpu().numpy()
preds.append(coord_out)
# evaluate
preds = np.concatenate(preds, axis=0)
tester._evaluate(preds, cfg.result_dir)
示例4: main
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def main():
# argument parse and create log
args = parse_args()
cfg.set_args(args.gpu_ids, args.continue_train)
cudnn.fastest = True
cudnn.benchmark = True
trainer = Trainer()
trainer._make_batch_generator()
trainer._make_model()
# train
for epoch in range(trainer.start_epoch, cfg.end_epoch):
trainer.set_lr(epoch)
trainer.tot_timer.tic()
trainer.read_timer.tic()
for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator):
trainer.read_timer.toc()
trainer.gpu_timer.tic()
# forward
trainer.optimizer.zero_grad()
target = {'coord': joint_img, 'vis': joint_vis, 'have_depth': joints_have_depth}
loss_coord = trainer.model(input_img, target)
loss_coord = loss_coord.mean()
# backward
loss = loss_coord
loss.backward()
trainer.optimizer.step()
trainer.gpu_timer.toc()
screen = [
'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
'lr: %g' % (trainer.get_lr()),
'speed: %.2f(%.2fs r%.2f)s/itr' % (
trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
'%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
'%s: %.4f' % ('loss_coord', loss_coord.detach()),
]
trainer.logger.info(' '.join(screen))
trainer.tot_timer.toc()
trainer.tot_timer.tic()
trainer.read_timer.tic()
trainer.save_model({
'epoch': epoch,
'network': trainer.model.state_dict(),
'optimizer': trainer.optimizer.state_dict(),
}, epoch)
示例5: main
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def main():
# argument parse and create log
args = parse_args()
cfg.set_args(args.gpu_ids, args.continue_train)
cudnn.fastest = True
cudnn.benchmark = True
trainer = Trainer()
trainer._make_batch_generator()
trainer._make_model()
# train
for epoch in range(trainer.start_epoch, cfg.end_epoch):
trainer.set_lr(epoch)
trainer.tot_timer.tic()
trainer.read_timer.tic()
for itr, (input_img, k_value, root_img, root_vis, joints_have_depth) in enumerate(trainer.batch_generator):
trainer.read_timer.toc()
trainer.gpu_timer.tic()
# forward
trainer.optimizer.zero_grad()
target = {'coord': root_img, 'vis': root_vis, 'have_depth': joints_have_depth}
loss_coord = trainer.model(input_img, k_value, target)
loss_coord = loss_coord.mean();
# backward
loss = loss_coord
loss.backward()
trainer.optimizer.step()
trainer.gpu_timer.toc()
screen = [
'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
'lr: %g' % (trainer.get_lr()),
'speed: %.2f(%.2fs r%.2f)s/itr' % (
trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
'%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
'%s: %.4f' % ('loss_coord', loss_coord.detach()),
]
trainer.logger.info(' '.join(screen))
trainer.tot_timer.toc()
trainer.tot_timer.tic()
trainer.read_timer.tic()
trainer.save_model({
'epoch': epoch,
'network': trainer.model.state_dict(),
'optimizer': trainer.optimizer.state_dict(),
}, epoch)
示例6: __init__
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def __init__(self, ARCH, DATA, datadir, logdir, modeldir):
# parameters
self.ARCH = ARCH
self.DATA = DATA
self.datadir = datadir
self.logdir = logdir
self.modeldir = modeldir
# get the data
parserModule = imp.load_source("parserModule",
booger.TRAIN_PATH + '/tasks/semantic/dataset/' +
self.DATA["name"] + '/parser.py')
self.parser = parserModule.Parser(root=self.datadir,
train_sequences=self.DATA["split"]["train"],
valid_sequences=self.DATA["split"]["valid"],
test_sequences=self.DATA["split"]["test"],
labels=self.DATA["labels"],
color_map=self.DATA["color_map"],
learning_map=self.DATA["learning_map"],
learning_map_inv=self.DATA["learning_map_inv"],
sensor=self.ARCH["dataset"]["sensor"],
max_points=self.ARCH["dataset"]["max_points"],
batch_size=1,
workers=self.ARCH["train"]["workers"],
gt=True,
shuffle_train=False)
# concatenate the encoder and the head
with torch.no_grad():
self.model = Segmentator(self.ARCH,
self.parser.get_n_classes(),
self.modeldir)
# use knn post processing?
self.post = None
if self.ARCH["post"]["KNN"]["use"]:
self.post = KNN(self.ARCH["post"]["KNN"]["params"],
self.parser.get_n_classes())
# GPU?
self.gpu = False
self.model_single = self.model
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Infering in device: ", self.device)
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
cudnn.benchmark = True
cudnn.fastest = True
self.gpu = True
self.model.cuda()
示例7: __init__
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def __init__(self, path):
# parameters
self.path = path
# config from path
try:
yaml_path = self.path + "/cfg.yaml"
print("Opening config file %s" % yaml_path)
self.CFG = yaml.safe_load(open(yaml_path, 'r'))
except Exception as e:
print(e)
print("Error opening cfg.yaml file from trained model.")
quit()
# make a colorizer
self.colorizer = Colorizer(self.CFG["dataset"]["color_map"])
# get the data
parserModule = imp.load_source("parserModule",
booger.TRAIN_PATH + '/tasks/segmentation/dataset/' +
self.CFG["dataset"]["name"] + '/parser.py')
self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"],
img_means=self.CFG["dataset"]["img_means"],
img_stds=self.CFG["dataset"]["img_stds"],
classes=self.CFG["dataset"]["labels"],
train=False)
# some useful data
self.data_h, self.data_w, self.data_d = self.parser.get_img_size()
self.means, self.stds = self.parser.get_means_stds()
self.means = torch.tensor(self.means)
self.stds = torch.tensor(self.stds)
self.nclasses = self.parser.get_n_classes()
# architecture definition
# get weights?
try:
self.pytorch_path = os.path.join(self.path, "model.pytorch")
self.model = torch.jit.load(self.pytorch_path)
print("Successfully Pytorch-traced model from ", self.pytorch_path)
except Exception as e:
print("Couldn't load Pytorch-traced network. Error: ", e)
quit()
# GPU?
self.gpu = False
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
self.gpu = True
cudnn.benchmark = True
cudnn.fastest = True
self.model.cuda()
self.means = self.means.cuda()
self.stds = self.stds.cuda()
示例8: __init__
# 需要导入模块: from torch.backends import cudnn [as 别名]
# 或者: from torch.backends.cudnn import fastest [as 别名]
def __init__(self, path):
# parameters
self.path = path
# config from path
try:
yaml_path = self.path + "/cfg.yaml"
print("Opening config file %s" % yaml_path)
self.CFG = yaml.safe_load(open(yaml_path, 'r'))
except Exception as e:
print(e)
print("Error opening cfg.yaml file from trained model.")
quit()
# get the data
parserModule = imp.load_source("parserModule",
booger.TRAIN_PATH + '/tasks/classification/dataset/' +
self.CFG["dataset"]["name"] + '/parser.py')
self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"],
img_means=self.CFG["dataset"]["img_means"],
img_stds=self.CFG["dataset"]["img_stds"],
classes=self.CFG["dataset"]["labels"],
train=False)
# some useful data
self.data_h, self.data_w, self.data_d = self.parser.get_img_size()
self.means, self.stds = self.parser.get_means_stds()
self.means = torch.tensor(self.means)
self.stds = torch.tensor(self.stds)
self.nclasses = self.parser.get_n_classes()
# architecture definition
# get weights?
try:
self.pytorch_path = os.path.join(self.path, "model.pytorch")
self.model = torch.jit.load(self.pytorch_path)
print("Successfully Pytorch-traced model from ", self.pytorch_path)
except Exception as e:
print("Couldn't load Pytorch-traced network. Error: ", e)
quit()
# GPU?
self.gpu = False
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available() and torch.cuda.device_count() > 0:
self.gpu = True
cudnn.benchmark = True
cudnn.fastest = True
self.model.cuda()
self.means = self.means.cuda()
self.stds = self.stds.cuda()