本文整理汇总了Python中cntk.Trainer.save_checkpoint方法的典型用法代码示例。如果您正苦于以下问题:Python Trainer.save_checkpoint方法的具体用法?Python Trainer.save_checkpoint怎么用?Python Trainer.save_checkpoint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cntk.Trainer
的用法示例。
在下文中一共展示了Trainer.save_checkpoint方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: entrenar
# 需要导入模块: from cntk import Trainer [as 别名]
# 或者: from cntk.Trainer import save_checkpoint [as 别名]
def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones):
minibatch_size = 100;
epocs=900;
minibatchIteraciones = int(len(entrOperaciones) / minibatch_size);
# Input variables denoting the features and label data
feature = input((input_dim), np.float32)
label = input((num_output_classes), np.float32)
netout = crearRed(input_dim, num_output_classes, feature);
ce = cross_entropy_with_softmax(netout, label)
pe = classification_error(netout, label)
lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch)
# Instantiate the trainer object to drive the model training
learner = sgd(netout.parameters, lr=lr_per_minibatch)
progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs);
trainer = Trainer(netout, (ce, pe), learner, progress_printer)
if os.path.isfile(checkpoint):
trainer.restore_from_checkpoint(checkpoint);
npentrRuedas = np.array(entrRuedas).astype(np.float32);
npentrOperaciones = np.array(entrOperaciones).astype(np.float32);
#iteramos una vez por cada "epoc"
for i in range(0, epocs):
p = np.random.permutation(len(entrRuedas));
npentrOperaciones = npentrOperaciones[p];
npentrRuedas = npentrRuedas[p];
#ahora partimos los datos en "minibatches" y entrenamos
for j in range(0, minibatchIteraciones):
features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size];
labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size];
trainer.train_minibatch({feature: features, label: labels});
trainer.summarize_training_progress()
trainer.save_checkpoint(checkpoint);
minibatchIteraciones = int(len(testOperaciones) / minibatch_size);
avg_error = 0;
for j in range(0, minibatchIteraciones):
test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32);
test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32);
#test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32);
#test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32);
avg_error = avg_error + ( trainer.test_minibatch(
{feature: test_features, label: test_labels}) / minibatchIteraciones)
return avg_error
示例2: train_and_evaluate
# 需要导入模块: from cntk import Trainer [as 别名]
# 或者: from cntk.Trainer import save_checkpoint [as 别名]
def train_and_evaluate(create_train_reader, test_reader, network_name, max_epochs, create_dist_learner, scale_up=False):
set_computation_network_trace_level(0)
# Input variables denoting the features and label data
input_var = input_variable((num_channels, image_height, image_width))
label_var = input_variable((num_classes))
# create model, and configure learning parameters
if network_name == 'resnet20':
z = create_cifar10_model(input_var, 3, num_classes)
lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
elif network_name == 'resnet110':
z = create_cifar10_model(input_var, 18, num_classes)
lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
else:
return RuntimeError("Unknown model name!")
# loss and metric
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
# shared training parameters
epoch_size = 50000 # for now we manually specify epoch size
# NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine,
# ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling
# up. However, bigger minimatch size on the same number of samples means less updates,
# thus leads to higher training error. This is a trade-off of speed and accuracy
minibatch_size = 128 * (distributed.Communicator.num_workers() if scale_up else 1)
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001
# Set learning parameters
lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
# trainer object
learner = create_dist_learner(momentum_sgd(z.parameters, lr_schedule, mm_schedule,
l2_regularization_weight = l2_reg_weight))
trainer = Trainer(z, ce, pe, learner)
total_number_of_samples = max_epochs * epoch_size
train_reader=create_train_reader(total_number_of_samples)
# define mapping from reader streams to network inputs
input_map = {
input_var: train_reader.streams.features,
label_var: train_reader.streams.labels
}
log_number_of_parameters(z) ; print()
progress_printer = ProgressPrinter(tag='Training')
# perform model training
current_epoch=0
updated=True
while updated:
data=train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch.
updated=trainer.train_minibatch(data) # update model with it
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
epoch_index = int(trainer.total_number_of_samples_seen/epoch_size)
if current_epoch != epoch_index: # new epoch reached
progress_printer.epoch_summary(with_metric=True)
current_epoch=epoch_index
trainer.save_checkpoint(os.path.join(model_path, network_name + "_{}.dnn".format(current_epoch)))
# Evaluation parameters
epoch_size = 10000
minibatch_size = 16
# process minibatches and evaluate the model
metric_numer = 0
metric_denom = 0
sample_count = 0
minibatch_index = 0
while True:
data = test_reader.next_minibatch(minibatch_size, input_map=input_map)
if not data: break;
local_mb_samples=data[label_var].num_samples
metric_numer += trainer.test_minibatch(data) * local_mb_samples
metric_denom += local_mb_samples
minibatch_index += 1
print("")
print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
print("")
return metric_numer/metric_denom