本文整理汇总了C++中Projector::vectorNorm方法的典型用法代码示例。如果您正苦于以下问题:C++ Projector::vectorNorm方法的具体用法?C++ Projector::vectorNorm怎么用?C++ Projector::vectorNorm使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Projector
的用法示例。
在下文中一共展示了Projector::vectorNorm方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
void SwingPendulumTest::testOffPACSwingPendulum2()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
true);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getDiscreteActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_w = .005 / projector->vectorNorm();
double gamma = 0.99;
Trace<double>* critice = new AMaxTrace<double>(projector->dimension());
Trace<double>* criticeML = new MaxLengthTrace<double>(critice, 1000);
GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, 0.4, criticeML);
double alpha_u = 0.5 / projector->vectorNorm();
PolicyDistribution<double>* target = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<double>* actore = new AMaxTrace<double>(projector->dimension());
Trace<double>* actoreML = new MaxLengthTrace<double>(actore, 1000);
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actoreML);
ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, 0.4, target,
actoreTraces);
/*Policy<double>* behavior = new RandomPolicy<double>(
&problem->getActions());*/
Policy<double>* behavior = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
OffPolicyControlLearner<double>* control = new OffPAC<double>(behavior, critic, actor,
toStateAction, projector);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 200, 1);
sim->setTestEpisodesAfterEachRun(true);
sim->run();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete criticeML;
delete critic;
delete actore;
delete actoreML;
delete actoreTraces;
delete actor;
delete behavior;
delete target;
delete control;
delete agent;
delete sim;
}
示例2: testOffPACOnPolicySwingPendulum
void SwingPendulumTest::testOffPACOnPolicySwingPendulum()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
true);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getDiscreteActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_w = .0001 / projector->vectorNorm();
double gamma = 0.99;
double lambda = 0.4;
Trace<double>* critice = new ATrace<double>(projector->dimension());
GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, lambda, critice);
double alpha_u = 0.5 / projector->vectorNorm();
PolicyDistribution<double>* acting = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<double>* actore = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore);
ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OffPolicyControlLearner<double>* control = new OffPAC<double>(acting, critic, actor,
toStateAction, projector);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 10, 5);
sim->setTestEpisodesAfterEachRun(true);
sim->run();
sim->computeValueFunction();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore;
delete actoreTraces;
delete actor;
delete acting;
delete control;
delete agent;
delete sim;
}
示例3: testTrain
void NAOTest::testTrain()
{
// OffLine
{
Random<float>* random = new Random<float>;
RLProblem<float>* problem = new MountainCar<float>(random);
Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000);
Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10,
10);
StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector,
problem->getDiscreteActions());
double alpha_v = 0.05 / projector->vectorNorm();
double alpha_w = 0.0001 / projector->vectorNorm();
double lambda = 0.0; //0.4;
double gamma = 0.99;
Trace<float>* critice = new ATrace<float>(projector->dimension());
OffPolicyTD<float>* critic = new GTDLambda<float>(alpha_v, alpha_w, gamma, lambda, critice);
double alpha_u = 1.0 / projector->vectorNorm();
PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<float>* actore = new ATrace<float>(projector->dimension());
Traces<float>* actoreTraces = new Traces<float>();
actoreTraces->push_back(actore);
ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(alpha_u, gamma, lambda, target,
actoreTraces);
Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions());
OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor,
toStateAction, projector);
RLAgent<float>* agent = new LearnerAgent<float>(control);
Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 100, 1);
//sim->setVerbose(false);
sim->run();
control->persist("NAOTest_x32_M.bin");
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore;
delete actoreTraces;
delete actor;
delete behavior;
delete target;
delete control;
delete agent;
delete sim;
}
// OnLine
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>(random);
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10,
10, false);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getContinuousActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_u = 0.001 / projector->vectorNorm();
double alpha_r = .0001;
double gamma = 1.0;
double lambda = 0.5;
Trace<double>* critice = new ATrace<double>(projector->dimension());
TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);
PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
problem->getContinuousActions(), 0, 1.0, projector->dimension());
Range<double> policyRange(-2.0, 2.0);
Range<double> problemRange(-2.0, 2.0);
PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);
Trace<double>* actore1 = new ATrace<double>(projector->dimension());
Trace<double>* actore2 = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore1);
actoreTraces->push_back(actore2);
ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
projector, toStateAction, alpha_r);
RLAgent<double>* agent = new LearnerAgent<double>(control);
Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 100, 1);
sim->run();
control->persist("NAOTest_x32_S.bin");
delete random;
delete problem;
delete hashing;
//.........这里部分代码省略.........
示例4: testOnPolicySwingPendulum
void SwingPendulumTest::testOnPolicySwingPendulum()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
false);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getContinuousActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_u = 0.001 / projector->vectorNorm();
double alpha_r = .0001;
double gamma = 1.0;
double lambda = 0.5;
Trace<double>* critice = new ATrace<double>(projector->dimension());
TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);
PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
problem->getContinuousActions(), 0, 1.0, projector->dimension());
Range<double> policyRange(-2.0, 2.0);
Range<double> problemRange(-2.0, 2.0);
PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);
Trace<double>* actore1 = new ATrace<double>(projector->dimension());
Trace<double>* actore2 = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore1);
actoreTraces->push_back(actore2);
ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
projector, toStateAction, alpha_r);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 100, 10);
sim->setVerbose(true);
sim->run();
sim->runEvaluate(100);
sim->computeValueFunction();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore1;
delete actore2;
delete actoreTraces;
delete actor;
delete policyDistribution;
delete acting;
delete control;
delete agent;
delete sim;
}