本文整理汇总了Python中tensorpack.utils.gpu.get_num_gpu方法的典型用法代码示例。如果您正苦于以下问题:Python gpu.get_num_gpu方法的具体用法?Python gpu.get_num_gpu怎么用?Python gpu.get_num_gpu使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorpack.utils.gpu
的用法示例。
在下文中一共展示了gpu.get_num_gpu方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config(model):
nr_tower = max(get_num_gpu(), 1)
assert FLAGS.batch % nr_tower == 0
batch = FLAGS.batch // nr_tower
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
data = QueueInput(get_dataflow(FLAGS.train_list_filename, batch))
# learning rate
START_LR = FLAGS.lr
BASE_LR = START_LR * (FLAGS.batch / 256.0)
lr_list = []
for idx, decay_point in enumerate(FLAGS.lr_decay_points):
lr_list.append((decay_point, BASE_LR * 0.1 ** idx))
callbacks = [
ScopeModelSaver(checkpoint_dir=FLAGS.RHP_savepath, scope='RHP'),
EstimatedTimeLeft(),
ScheduledHyperParamSetter('learning_rate', lr_list),
]
if get_num_gpu() > 0:
callbacks.append(GPUUtilizationTracker())
return TrainConfig(
model=model,
data=data,
callbacks=callbacks,
steps_per_epoch=FLAGS.steps_per_epoch // FLAGS.batch,
max_epoch=FLAGS.max_epoch,
session_init=MultipleRestore()
)
示例2: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config():
nr_tower = max(get_num_gpu(), 1)
batch = args.batch
total_batch = batch * nr_tower
assert total_batch >= 256 # otherwise the learning rate warmup is wrong.
BASE_LR = 0.01 * (total_batch / 256.)
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
dataset_train = get_data('train', batch)
dataset_val = get_data('val', batch)
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
callbacks = [
ModelSaver(),
GPUUtilizationTracker(),
EstimatedTimeLeft(),
ScheduledHyperParamSetter(
'learning_rate',
[(0, 0.01), (3, max(BASE_LR, 0.01))], interp='linear'),
ScheduledHyperParamSetter(
'learning_rate',
[(30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (80, BASE_LR * 1e-3)]),
DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))),
]
input = QueueInput(dataset_train)
input = StagingInput(input, nr_stage=1)
return TrainConfig(
model=Model(),
data=input,
callbacks=callbacks,
steps_per_epoch=1281167 // total_batch,
max_epoch=100,
)
示例3: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config():
nr_tower = max(get_num_gpu(), 1)
batch = args.batch
total_batch = batch * nr_tower
if total_batch != 128:
logger.warn("AlexNet needs to be trained with a total batch size of 128.")
BASE_LR = 0.01 * (total_batch / 128.)
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
dataset_train = get_data('train', batch)
dataset_val = get_data('val', batch)
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
callbacks = [
ModelSaver(),
GPUUtilizationTracker(),
EstimatedTimeLeft(),
ScheduledHyperParamSetter(
'learning_rate',
[(0, BASE_LR), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (80, BASE_LR * 1e-3)]),
DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))),
]
return TrainConfig(
model=Model(),
data=StagingInput(QueueInput(dataset_train)),
callbacks=callbacks,
steps_per_epoch=1281167 // total_batch,
max_epoch=100,
)
示例4: train_net
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def train_net(net,
session_init,
batch_size,
num_epochs,
train_dataflow,
val_dataflow):
num_towers = max(get_num_gpu(), 1)
batch_per_tower = batch_size // num_towers
logger.info("Running on {} towers. Batch size per tower: {}".format(num_towers, batch_per_tower))
num_training_samples = 1281167
step_size = num_training_samples // batch_size
max_iter = (num_epochs - 1) * step_size
callbacks = [
ModelSaver(),
ScheduledHyperParamSetter(
"learning_rate",
[(0, 0.5), (max_iter, 0)],
interp="linear",
step_based=True),
EstimatedTimeLeft()]
infs = [ClassificationError("wrong-top1", "val-error-top1"),
ClassificationError("wrong-top5", "val-error-top5")]
if num_towers == 1:
# single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(
input=QueueInput(val_dataflow),
infs=infs))
else:
# multi-GPU inference (with mandatory queue prefetch)
callbacks.append(DataParallelInferenceRunner(
input=val_dataflow,
infs=infs,
gpus=list(range(num_towers))))
config = TrainConfig(
dataflow=train_dataflow,
model=net,
callbacks=callbacks,
session_init=session_init,
steps_per_epoch=step_size,
max_epoch=num_epochs)
launch_train_with_config(
config=config,
trainer=SyncMultiGPUTrainerParameterServer(num_towers))
示例5: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config(model, fake=False):
nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0
batch = args.batch // nr_tower
if fake:
logger.info("For benchmark, batch size is fixed to 64 per tower.")
dataset_train = FakeData(
[[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8')
callbacks = []
steps_per_epoch = 100
else:
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
dataset_train = get_imagenet_dataflow(args.data, 'train', batch)
dataset_val = get_imagenet_dataflow(args.data, 'val', min(64, batch))
steps_per_epoch = 1281167 // args.batch
BASE_LR = 0.1 * args.batch / 256.0
logger.info("BASELR: {}".format(BASE_LR))
callbacks = [
ModelSaver(),
EstimatedTimeLeft(),
GPUUtilizationTracker(),
ScheduledHyperParamSetter(
'learning_rate', [(0, BASE_LR), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2),
(90, BASE_LR * 1e-3)]),
]
if BASE_LR > 0.1:
callbacks.append(
ScheduledHyperParamSetter(
'learning_rate', [(0, 0.1), (5 * steps_per_epoch, BASE_LR)],
interp='linear', step_based=True))
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
if nr_tower == 1:
# single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
else:
# multi-GPU inference (with mandatory queue prefetch)
callbacks.append(DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))))
return TrainConfig(
model=model,
dataflow=dataset_train,
callbacks=callbacks,
steps_per_epoch=steps_per_epoch,
max_epoch=100,
)
示例6: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config(model, fake=False):
nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0
batch = args.batch // nr_tower
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
if batch < 32 or batch > 64:
logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
if fake:
data = QueueInput(FakeData(
[[batch, 224, 224, 3], [batch],[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
callbacks = []
else:
data = QueueInput(get_data('train', batch))
START_LR = 0.1
BASE_LR = START_LR * (args.batch / 256.0)
callbacks = [
ModelSaver(),
EstimatedTimeLeft(),
ScheduledHyperParamSetter(
'learning_rate', [
(0, min(START_LR, BASE_LR)), (30, BASE_LR * 1e-1), (45, BASE_LR * 1e-2),
(55, BASE_LR * 1e-3)]),
]
if BASE_LR > START_LR:
callbacks.append(
ScheduledHyperParamSetter(
'learning_rate', [(0, START_LR), (5, BASE_LR)], interp='linear'))
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
dataset_val = get_data('val', batch)
if nr_tower == 1:
# single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
else:
# multi-GPU inference (with mandatory queue prefetch)
callbacks.append(DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))))
return AutoResumeTrainConfig(
model=model,
data=data,
callbacks=callbacks,
steps_per_epoch=100 if args.fake else 1280000 // args.batch,
max_epoch=60,
)
示例7: get_config
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def get_config(model):
nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0
batch = args.batch // nr_tower
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
if batch < 32 or batch > 64:
logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
if args.fake:
data = QueueInput(FakeData(
[[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
callbacks = []
else:
if args.symbolic:
data = TFDatasetInput(get_imagenet_tfdata(args.data, 'train', batch))
else:
data = QueueInput(get_imagenet_dataflow(args.data, 'train', batch))
START_LR = 0.1
BASE_LR = START_LR * (args.batch / 256.0)
callbacks = [
ModelSaver(),
EstimatedTimeLeft(),
ScheduledHyperParamSetter(
'learning_rate', [
(0, min(START_LR, BASE_LR)), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2),
(90, BASE_LR * 1e-3), (100, BASE_LR * 1e-4)]),
]
if BASE_LR > START_LR:
callbacks.append(
ScheduledHyperParamSetter(
'learning_rate', [(0, START_LR), (5, BASE_LR)], interp='linear'))
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
dataset_val = get_imagenet_dataflow(args.data, 'val', batch)
if nr_tower == 1:
# single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
else:
# multi-GPU inference (with mandatory queue prefetch)
callbacks.append(DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))))
if get_num_gpu() > 0:
callbacks.append(GPUUtilizationTracker())
return TrainConfig(
model=model,
data=data,
callbacks=callbacks,
steps_per_epoch=100 if args.fake else 1281167 // args.batch,
max_epoch=105,
)
示例8: train
# 需要导入模块: from tensorpack.utils import gpu [as 别名]
# 或者: from tensorpack.utils.gpu import get_num_gpu [as 别名]
def train():
# assign GPUs for training & inference
num_gpu = get_num_gpu()
global PREDICTOR_THREAD
if num_gpu > 0:
if num_gpu > 1:
# use half gpus for inference
predict_tower = list(range(num_gpu))[-num_gpu // 2:]
else:
predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
else:
logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
PREDICTOR_THREAD = 1
predict_tower, train_tower = [0], [0]
# setup simulator processes
name_base = str(uuid.uuid1())[:6]
prefix = '@' if sys.platform.startswith('linux') else ''
namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
ensure_proc_terminate(procs)
start_proc_mask_signal(procs)
master = MySimulatorMaster(namec2s, names2c, predict_tower)
config = TrainConfig(
model=Model(),
dataflow=master.get_training_dataflow(),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
master,
PeriodicTrigger(Evaluator(
EVAL_EPISODE, ['state'], ['policy'], get_player),
every_k_epochs=3),
],
session_creator=sesscreate.NewSessionCreator(config=get_default_sess_config(0.5)),
steps_per_epoch=STEPS_PER_EPOCH,
session_init=SmartInit(args.load),
max_epoch=1000,
)
trainer = SimpleTrainer() if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower)
launch_train_with_config(config, trainer)