本文整理汇总了C++中Projector::dimension方法的典型用法代码示例。如果您正苦于以下问题:C++ Projector::dimension方法的具体用法?C++ Projector::dimension怎么用?C++ Projector::dimension使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Projector
的用法示例。
在下文中一共展示了Projector::dimension方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: testSarsaOnMountainCarMaxLengthTraces
void TraceTest::testSarsaOnMountainCarMaxLengthTraces()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new MountainCar<double>(random);
Hashing<double>* hashing = new MurmurHashing<double>(random, 10000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
false);
Trace<double>* e = new ATrace<double>(projector->dimension());
Trace<double>* trace = new MaxLengthTrace<double>(e, 100);
runTest(random, problem, projector, e);
delete trace;
delete e;
e = new AMaxTrace<double>(projector->dimension());
trace = new MaxLengthTrace<double>(e, 100);
runTest(random, problem, projector, e);
delete trace;
delete e;
e = new RTrace<double>(projector->dimension());
trace = new MaxLengthTrace<double>(e, 100);
runTest(random, problem, projector, e);
delete trace;
delete e;
delete random;
delete problem;
delete hashing;
delete projector;
}
示例2:
void SwingPendulumTest::testOffPACSwingPendulum2()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
true);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getDiscreteActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_w = .005 / projector->vectorNorm();
double gamma = 0.99;
Trace<double>* critice = new AMaxTrace<double>(projector->dimension());
Trace<double>* criticeML = new MaxLengthTrace<double>(critice, 1000);
GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, 0.4, criticeML);
double alpha_u = 0.5 / projector->vectorNorm();
PolicyDistribution<double>* target = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<double>* actore = new AMaxTrace<double>(projector->dimension());
Trace<double>* actoreML = new MaxLengthTrace<double>(actore, 1000);
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actoreML);
ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, 0.4, target,
actoreTraces);
/*Policy<double>* behavior = new RandomPolicy<double>(
&problem->getActions());*/
Policy<double>* behavior = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
OffPolicyControlLearner<double>* control = new OffPAC<double>(behavior, critic, actor,
toStateAction, projector);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 200, 1);
sim->setTestEpisodesAfterEachRun(true);
sim->run();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete criticeML;
delete critic;
delete actore;
delete actoreML;
delete actoreTraces;
delete actor;
delete behavior;
delete target;
delete control;
delete agent;
delete sim;
}
示例3: testOffPACOnPolicySwingPendulum
void SwingPendulumTest::testOffPACOnPolicySwingPendulum()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
true);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getDiscreteActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_w = .0001 / projector->vectorNorm();
double gamma = 0.99;
double lambda = 0.4;
Trace<double>* critice = new ATrace<double>(projector->dimension());
GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, lambda, critice);
double alpha_u = 0.5 / projector->vectorNorm();
PolicyDistribution<double>* acting = new BoltzmannDistribution<double>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<double>* actore = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore);
ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OffPolicyControlLearner<double>* control = new OffPAC<double>(acting, critic, actor,
toStateAction, projector);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 10, 5);
sim->setTestEpisodesAfterEachRun(true);
sim->run();
sim->computeValueFunction();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore;
delete actoreTraces;
delete actor;
delete acting;
delete control;
delete agent;
delete sim;
}
示例4: testTrain
void NAOTest::testTrain()
{
// OffLine
{
Random<float>* random = new Random<float>;
RLProblem<float>* problem = new MountainCar<float>(random);
Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000);
Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10,
10);
StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector,
problem->getDiscreteActions());
double alpha_v = 0.05 / projector->vectorNorm();
double alpha_w = 0.0001 / projector->vectorNorm();
double lambda = 0.0; //0.4;
double gamma = 0.99;
Trace<float>* critice = new ATrace<float>(projector->dimension());
OffPolicyTD<float>* critic = new GTDLambda<float>(alpha_v, alpha_w, gamma, lambda, critice);
double alpha_u = 1.0 / projector->vectorNorm();
PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<float>* actore = new ATrace<float>(projector->dimension());
Traces<float>* actoreTraces = new Traces<float>();
actoreTraces->push_back(actore);
ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(alpha_u, gamma, lambda, target,
actoreTraces);
Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions());
OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor,
toStateAction, projector);
RLAgent<float>* agent = new LearnerAgent<float>(control);
Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 100, 1);
//sim->setVerbose(false);
sim->run();
control->persist("NAOTest_x32_M.bin");
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore;
delete actoreTraces;
delete actor;
delete behavior;
delete target;
delete control;
delete agent;
delete sim;
}
// OnLine
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>(random);
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10,
10, false);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getContinuousActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_u = 0.001 / projector->vectorNorm();
double alpha_r = .0001;
double gamma = 1.0;
double lambda = 0.5;
Trace<double>* critice = new ATrace<double>(projector->dimension());
TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);
PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
problem->getContinuousActions(), 0, 1.0, projector->dimension());
Range<double> policyRange(-2.0, 2.0);
Range<double> problemRange(-2.0, 2.0);
PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);
Trace<double>* actore1 = new ATrace<double>(projector->dimension());
Trace<double>* actore2 = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore1);
actoreTraces->push_back(actore2);
ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
projector, toStateAction, alpha_r);
RLAgent<double>* agent = new LearnerAgent<double>(control);
Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 100, 1);
sim->run();
control->persist("NAOTest_x32_S.bin");
delete random;
delete problem;
delete hashing;
//.........这里部分代码省略.........
示例5: testEvaluate
void NAOTest::testEvaluate()
{
{
Random<float>* random = new Random<float>;
RLProblem<float>* problem = new MountainCar<float>(random);
Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000);
Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10, 10,
true);
StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector,
problem->getDiscreteActions());
Trace<float>* critice = new ATrace<float>(projector->dimension());
OffPolicyTD<float>* critic = new GTDLambda<float>(0, 0, 0, 0, critice);
PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random,
problem->getDiscreteActions(), projector->dimension());
Trace<float>* actore = new ATrace<float>(projector->dimension());
Traces<float>* actoreTraces = new Traces<float>();
actoreTraces->push_back(actore);
ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(0, 0, 0, target, actoreTraces);
Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions());
OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor,
toStateAction, projector);
RLAgent<float>* agent = new ControlAgent<float>(control);
Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 10, 10);
control->reset();
control->resurrect("NAOTest_x32_M.bin");
sim->runEvaluate(10, 10);
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore;
delete actoreTraces;
delete actor;
delete behavior;
delete target;
delete control;
delete agent;
delete sim;
}
// OnLine
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>(random);
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10,
10, false);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getContinuousActions());
Trace<double>* critice = new ATrace<double>(projector->dimension());
TDLambda<double>* critic = new TDLambda<double>(0, 0, 0, critice);
PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
problem->getContinuousActions(), 0, 1.0, projector->dimension());
Range<double> policyRange(-2.0, 2.0);
Range<double> problemRange(-2.0, 2.0);
PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);
Trace<double>* actore1 = new ATrace<double>(projector->dimension());
Trace<double>* actore2 = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore1);
actoreTraces->push_back(actore2);
ActorOnPolicy<double>* actor = new ActorLambda<double>(0, 0, 0, acting, actoreTraces);
OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
projector, toStateAction, 0);
RLAgent<double>* agent = new ControlAgent<double>(control);
Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 10, 10);
control->reset();
control->resurrect("NAOTest_x32_S.bin");
sim->run();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore1;
delete actore2;
delete actoreTraces;
delete actor;
delete policyDistribution;
delete acting;
delete control;
//.........这里部分代码省略.........
示例6: testOnPolicySwingPendulum
void SwingPendulumTest::testOnPolicySwingPendulum()
{
Random<double>* random = new Random<double>;
RLProblem<double>* problem = new SwingPendulum<double>;
Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
false);
StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
problem->getContinuousActions());
double alpha_v = 0.1 / projector->vectorNorm();
double alpha_u = 0.001 / projector->vectorNorm();
double alpha_r = .0001;
double gamma = 1.0;
double lambda = 0.5;
Trace<double>* critice = new ATrace<double>(projector->dimension());
TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);
PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
problem->getContinuousActions(), 0, 1.0, projector->dimension());
Range<double> policyRange(-2.0, 2.0);
Range<double> problemRange(-2.0, 2.0);
PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);
Trace<double>* actore1 = new ATrace<double>(projector->dimension());
Trace<double>* actore2 = new ATrace<double>(projector->dimension());
Traces<double>* actoreTraces = new Traces<double>();
actoreTraces->push_back(actore1);
actoreTraces->push_back(actore2);
ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
actoreTraces);
OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
projector, toStateAction, alpha_r);
RLAgent<double>* agent = new LearnerAgent<double>(control);
RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 100, 10);
sim->setVerbose(true);
sim->run();
sim->runEvaluate(100);
sim->computeValueFunction();
delete random;
delete problem;
delete hashing;
delete projector;
delete toStateAction;
delete critice;
delete critic;
delete actore1;
delete actore2;
delete actoreTraces;
delete actor;
delete policyDistribution;
delete acting;
delete control;
delete agent;
delete sim;
}