本文整理汇总了C++中Executor::Backward方法的典型用法代码示例。如果您正苦于以下问题:C++ Executor::Backward方法的具体用法?C++ Executor::Backward怎么用?C++ Executor::Backward使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Executor
的用法示例。
在下文中一共展示了Executor::Backward方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: trainWithBuiltInRNNOp
void trainWithBuiltInRNNOp(const string file, int batch_size, int max_epoch, int start_epoch) {
Context device(DeviceType::kGPU, 0);
BucketSentenceIter dataIter(file, batch_size, device);
string prefix = file.substr(0, file.rfind("."));
dataIter.saveCharIndices(prefix + ".dictionary");
input_dim = static_cast<int>(dataIter.characterSize());
sequence_length_max = dataIter.maxSequenceLength();
auto RNN = LSTMWithBuiltInRNNOp(num_lstm_layer, sequence_length_max, input_dim, num_hidden,
num_embed, dropout);
map<string, NDArray> args_map;
args_map["data"] = NDArray(Shape(batch_size, sequence_length_max), device, false);
// Avoiding SwapAxis, batch_size is of second dimension.
args_map["LSTM_init_c"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false);
args_map["LSTM_init_h"] = NDArray(Shape(num_lstm_layer, batch_size, num_hidden), device, false);
args_map["softmax_label"] = NDArray(Shape(batch_size, sequence_length_max), device, false);
vector<mx_float> zeros(batch_size * num_lstm_layer * num_hidden, 0);
Executor* exe = RNN.SimpleBind(device, args_map);
if (start_epoch == -1) {
RNNXavier xavier = RNNXavier(Xavier::gaussian, Xavier::in, 2.34);
for (auto &arg : exe->arg_dict())
xavier(arg.first, &arg.second);
} else {
LoadCheckpoint(prefix + "-" + to_string(start_epoch) + ".params", exe);
}
start_epoch++;
mx_float learning_rate = 0.0002;
mx_float weight_decay = 0.000002;
Optimizer* opt = OptimizerRegistry::Find("ccsgd");
// opt->SetParam("momentum", 0.9)->SetParam("rescale_grad", 1.0 / batch_size)
// ->SetParam("clip_gradient", 10);
for (int epoch = start_epoch; epoch < max_epoch; ++epoch) {
dataIter.Reset();
auto tic = chrono::system_clock::now();
while (dataIter.Next()) {
auto data_batch = dataIter.GetDataBatch();
data_batch.data.CopyTo(&exe->arg_dict()["data"]);
data_batch.label.CopyTo(&exe->arg_dict()["softmax_label"]);
exe->arg_dict()["LSTM_init_c"].SyncCopyFromCPU(zeros);
exe->arg_dict()["LSTM_init_h"].SyncCopyFromCPU(zeros);
NDArray::WaitAll();
exe->Forward(true);
exe->Backward();
exe->UpdateAll(opt, learning_rate, weight_decay);
NDArray::WaitAll();
}
auto toc = chrono::system_clock::now();
cout << "Epoch[" << epoch << "] Time Cost:" <<
chrono::duration_cast<chrono::seconds>(toc - tic).count() << " seconds ";
OutputPerplexity(&exe->arg_dict()["softmax_label"], &exe->outputs[0]);
string filepath = prefix + "-" + to_string(epoch) + ".params";
SaveCheckpoint(filepath, RNN, exe);
}
}
示例2: Run
//.........这里部分代码省略.........
Shape(2, 2), 500);
Symbol tanh3 = Activation("tanh3", conv3, ActivationActType::tanh);
Symbol pool3 = Pooling("pool3", tanh3, Shape(2, 2), PoolingPoolType::max,
false, false, PoolingPoolingConvention::valid, Shape(1, 1));
Symbol flatten = Flatten("flatten", pool3);
Symbol fc1 = FullyConnected("fc1", flatten, fc1_w, fc1_b, 500);
Symbol tanh4 = Activation("tanh4", fc1, ActivationActType::tanh);
Symbol fc2 = FullyConnected("fc2", tanh4, fc2_w, fc2_b, 10);
Symbol lenet = SoftmaxOutput("softmax", fc2, data_label);
for (auto s : lenet.ListArguments()) {
LG << s;
}
/*setup basic configs*/
int val_fold = 1;
int W = 28;
int H = 28;
int batch_size = 42;
int max_epoch = 100000;
float learning_rate = 1e-4;
float weight_decay = 1e-4;
/*prepare the data*/
vector<float> data_vec, label_vec;
size_t data_count = GetData(&data_vec, &label_vec);
const float *dptr = data_vec.data();
const float *lptr = label_vec.data();
NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu,
false); // store in main memory, and copy to
// device memory while training
NDArray label_array =
NDArray(Shape(data_count), ctx_cpu,
false); // it's also ok if just store them all in device memory
data_array.SyncCopyFromCPU(dptr, data_count * W * H);
label_array.SyncCopyFromCPU(lptr, data_count);
data_array.WaitToRead();
label_array.WaitToRead();
size_t train_num = data_count * (1 - val_fold / 10.0);
train_data = data_array.Slice(0, train_num);
train_label = label_array.Slice(0, train_num);
val_data = data_array.Slice(train_num, data_count);
val_label = label_array.Slice(train_num, data_count);
LG << "here read fin";
/*init some of the args*/
// map<string, NDArray> args_map;
args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev);
args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev);
NDArray::WaitAll();
LG << "here slice fin";
/*
* we can also feed in some of the args other than the input all by
* ourselves,
* fc2-w , fc1-b for example:
* */
// args_map["fc2_w"] =
// NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false);
// NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]);
// args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false);
// args_map["fc1_b"] = 0;
lenet.InferArgsMap(ctx_dev, &args_map, args_map);
Optimizer* opt = OptimizerRegistry::Find("ccsgd");
opt->SetParam("momentum", 0.9)
->SetParam("rescale_grad", 1.0)
->SetParam("clip_gradient", 10);
for (int ITER = 0; ITER < max_epoch; ++ITER) {
size_t start_index = 0;
while (start_index < train_num) {
if (start_index + batch_size > train_num) {
start_index = train_num - batch_size;
}
args_map["data"] =
train_data.Slice(start_index, start_index + batch_size)
.Copy(ctx_dev);
args_map["data_label"] =
train_label.Slice(start_index, start_index + batch_size)
.Copy(ctx_dev);
start_index += batch_size;
NDArray::WaitAll();
Executor *exe = lenet.SimpleBind(ctx_dev, args_map);
exe->Forward(true);
exe->Backward();
exe->UpdateAll(opt, learning_rate, weight_decay);
delete exe;
}
LG << "Iter " << ITER
<< ", accuracy: " << ValAccuracy(batch_size * 10, lenet);
}
}
示例3: MLP
//.........这里部分代码省略.........
Context ctx_dev(DeviceType::kCPU, 0);
NDArray array_x(Shape(128, 28), ctx_dev, false);
NDArray array_y(Shape(128), ctx_dev, false);
mx_float* aptr_x = new mx_float[128 * 28];
mx_float* aptr_y = new mx_float[128];
// we make the data by hand, in 10 classes, with some pattern
for (int i = 0; i < 128; i++) {
for (int j = 0; j < 28; j++) {
aptr_x[i * 28 + j] = i % 10 * 1.0f;
}
aptr_y[i] = i % 10;
}
array_x.SyncCopyFromCPU(aptr_x, 128 * 28);
array_x.WaitToRead();
array_y.SyncCopyFromCPU(aptr_y, 128);
array_y.WaitToRead();
// init the parameters
NDArray array_w_1(Shape(512, 28), ctx_dev, false);
NDArray array_b_1(Shape(512), ctx_dev, false);
NDArray array_w_2(Shape(10, 512), ctx_dev, false);
NDArray array_b_2(Shape(10), ctx_dev, false);
// the parameters should be initialized in some kind of distribution,
// so it learns fast
// but here just give a const value by hand
array_w_1 = 0.5f;
array_b_1 = 0.0f;
array_w_2 = 0.5f;
array_b_2 = 0.0f;
// the grads
NDArray array_w_1_g(Shape(512, 28), ctx_dev, false);
NDArray array_b_1_g(Shape(512), ctx_dev, false);
NDArray array_w_2_g(Shape(10, 512), ctx_dev, false);
NDArray array_b_2_g(Shape(10), ctx_dev, false);
// Bind the symolic network with the ndarray
// all the input args
std::vector<NDArray> in_args;
in_args.push_back(array_x);
in_args.push_back(array_w_1);
in_args.push_back(array_b_1);
in_args.push_back(array_w_2);
in_args.push_back(array_b_2);
in_args.push_back(array_y);
// all the grads
std::vector<NDArray> arg_grad_store;
arg_grad_store.push_back(NDArray()); // we don't need the grad of the input
arg_grad_store.push_back(array_w_1_g);
arg_grad_store.push_back(array_b_1_g);
arg_grad_store.push_back(array_w_2_g);
arg_grad_store.push_back(array_b_2_g);
arg_grad_store.push_back(
NDArray()); // neither do we need the grad of the loss
// how to handle the grad
std::vector<OpReqType> grad_req_type;
grad_req_type.push_back(kNullOp);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kNullOp);
std::vector<NDArray> aux_states;
cout << "make the Executor" << endl;
Executor* exe = new Executor(sym_out, ctx_dev, in_args, arg_grad_store,
grad_req_type, aux_states);
cout << "Training" << endl;
int max_iters = 20000;
mx_float learning_rate = 0.0001;
for (int iter = 0; iter < max_iters; ++iter) {
exe->Forward(true);
if (iter % 100 == 0) {
cout << "epoch " << iter << endl;
std::vector<NDArray>& out = exe->outputs;
float* cptr = new float[128 * 10];
out[0].SyncCopyToCPU(cptr, 128 * 10);
NDArray::WaitAll();
OutputAccuracy(cptr, aptr_y);
delete[] cptr;
}
// update the parameters
exe->Backward();
for (int i = 1; i < 5; ++i) {
in_args[i] -= arg_grad_store[i] * learning_rate;
}
NDArray::WaitAll();
}
delete exe;
delete[] aptr_x;
delete[] aptr_y;
}
示例4: Run
void Run() {
/*
* LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner.
* "Gradient-based learning applied to document recognition."
* Proceedings of the IEEE (1998)
* */
/*define the symbolic net*/
for (auto s : lenet.ListArguments()) {
LG << s;
}
/*setup basic configs*/
int val_fold = 3;
int batch_size = 20;
int max_epoch = 50;
float learning_rate = 4e-6;
float weight_decay = 1e-5;
/*prepare the data*/
vector<float> data_vec, label_vec;
//size_t data_count = GetData(&data_vec, &label_vec);
size_t data_count = Getdata(data_vec, label_vec);
const float *dptr = data_vec.data();
const float *lptr = label_vec.data();
NDArray data_array = NDArray(Shape(data_count, 1, W, H), ctx_cpu,
false); // store in main memory, and copy to
// device memory while training
NDArray label_array =
NDArray(Shape(data_count), ctx_cpu,
false); // it's also ok if just store them all in device memory
data_array.SyncCopyFromCPU(dptr, data_count * W * H);
label_array.SyncCopyFromCPU(lptr, data_count);
data_array.WaitToRead();
label_array.WaitToRead();
size_t train_num = data_count * (1 - val_fold / 10.0);
train_data = data_array.Slice(0, train_num);
train_label = label_array.Slice(0, train_num);
val_data = data_array.Slice(train_num, data_count);
val_label = label_array.Slice(train_num, data_count);
LG << "here read fin";
/*init some of the args*/
// map<string, NDArray> args_map;
args_map["data"] =
NDArray(Shape(batch_size, 1, W, H), ctx_dev, false);
args_map["data"] = data_array.Slice(0, batch_size).Copy(ctx_dev);
//args_map["data_label"] = label_array.Slice(0, batch_size).Copy(ctx_dev);
/*args_map["fc1_weight"] =
NDArray(Shape(500, 4 * 4 * 50), ctx_dev, false);
//SampleGaussian(0, 1, &args_map["fc1_weight"]);
args_map["fc1_weight"] = 0.3;
args_map["fc2_bias"] = NDArray(Shape(10), ctx_dev, false);
args_map["fc2_bias"] = 0;*/
NDArray::WaitAll();
LG << "here slice fin";
/*
* we can also feed in some of the args other than the input all by
* ourselves,
* fc2-w , fc1-b for example:
* */
// args_map["fc2_w"] =
// NDArray(mshadow::Shape2(500, 4 * 4 * 50), ctx_dev, false);
// NDArray::SampleGaussian(0, 1, &args_map["fc2_w"]);
// args_map["fc1_b"] = NDArray(mshadow::Shape1(10), ctx_dev, false);
// args_map["fc1_b"] = 0;
Optimizer opt("ccsgd", learning_rate, weight_decay);
opt.SetParam("momentum", 0.9)
.SetParam("rescale_grad", 1.0)
.SetParam("clip_gradient", 10);
for (int ITER = 0; ITER < max_epoch; ++ITER) {
size_t start_index = 0;
while (start_index < train_num) {
if (start_index + batch_size > train_num) {
start_index = train_num - batch_size;
}
args_map["data"] =
train_data.Slice(start_index, start_index + batch_size)
.Copy(ctx_dev);
args_map["data_label"] =
train_label.Slice(start_index, start_index + batch_size)
.Copy(ctx_dev);
start_index += batch_size;
NDArray::WaitAll();
Executor *exe = lenet.SimpleBind(ctx_dev, args_map);
exe->Forward(true);
exe->Backward();
exe->UpdateAll(&opt, learning_rate, weight_decay);
delete exe;
//.........这里部分代码省略.........