当前位置: 首页>>代码示例>>C++>>正文


C++ Projector::vectorNorm方法代码示例

本文整理汇总了C++中Projector::vectorNorm方法的典型用法代码示例。如果您正苦于以下问题:C++ Projector::vectorNorm方法的具体用法?C++ Projector::vectorNorm怎么用?C++ Projector::vectorNorm使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Projector的用法示例。


在下文中一共展示了Projector::vectorNorm方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1:

void SwingPendulumTest::testOffPACSwingPendulum2()
{
  Random<double>* random = new Random<double>;
  RLProblem<double>* problem = new SwingPendulum<double>;
  Hashing<double>* hashing = new MurmurHashing<double>(random, 1000000);
  Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
      true);
  StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
      problem->getDiscreteActions());

  double alpha_v = 0.1 / projector->vectorNorm();
  double alpha_w = .005 / projector->vectorNorm();
  double gamma = 0.99;
  Trace<double>* critice = new AMaxTrace<double>(projector->dimension());
  Trace<double>* criticeML = new MaxLengthTrace<double>(critice, 1000);
  GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, 0.4, criticeML);
  double alpha_u = 0.5 / projector->vectorNorm();
  PolicyDistribution<double>* target = new BoltzmannDistribution<double>(random,
      problem->getDiscreteActions(), projector->dimension());

  Trace<double>* actore = new AMaxTrace<double>(projector->dimension());
  Trace<double>* actoreML = new MaxLengthTrace<double>(actore, 1000);
  Traces<double>* actoreTraces = new Traces<double>();
  actoreTraces->push_back(actoreML);
  ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, 0.4, target,
      actoreTraces);

  /*Policy<double>* behavior = new RandomPolicy<double>(
   &problem->getActions());*/
  Policy<double>* behavior = new BoltzmannDistribution<double>(random,
      problem->getDiscreteActions(), projector->dimension());
  OffPolicyControlLearner<double>* control = new OffPAC<double>(behavior, critic, actor,
      toStateAction, projector);

  RLAgent<double>* agent = new LearnerAgent<double>(control);
  RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 200, 1);
  sim->setTestEpisodesAfterEachRun(true);
  sim->run();

  delete random;
  delete problem;
  delete hashing;
  delete projector;
  delete toStateAction;
  delete critice;
  delete criticeML;
  delete critic;
  delete actore;
  delete actoreML;
  delete actoreTraces;
  delete actor;
  delete behavior;
  delete target;
  delete control;
  delete agent;
  delete sim;
}
开发者ID:csdlrl,项目名称:RLLib,代码行数:57,代码来源:SwingPendulumTest.cpp

示例2: testOffPACOnPolicySwingPendulum

void SwingPendulumTest::testOffPACOnPolicySwingPendulum()
{
  Random<double>* random = new Random<double>;
  RLProblem<double>* problem = new SwingPendulum<double>;
  Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
  Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
      true);
  StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
      problem->getDiscreteActions());

  double alpha_v = 0.1 / projector->vectorNorm();
  double alpha_w = .0001 / projector->vectorNorm();
  double gamma = 0.99;
  double lambda = 0.4;

  Trace<double>* critice = new ATrace<double>(projector->dimension());
  GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, lambda, critice);
  double alpha_u = 0.5 / projector->vectorNorm();
  PolicyDistribution<double>* acting = new BoltzmannDistribution<double>(random,
      problem->getDiscreteActions(), projector->dimension());

  Trace<double>* actore = new ATrace<double>(projector->dimension());
  Traces<double>* actoreTraces = new Traces<double>();
  actoreTraces->push_back(actore);
  ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, lambda, acting,
      actoreTraces);

  OffPolicyControlLearner<double>* control = new OffPAC<double>(acting, critic, actor,
      toStateAction, projector);

  RLAgent<double>* agent = new LearnerAgent<double>(control);
  RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 10, 5);
  sim->setTestEpisodesAfterEachRun(true);
  sim->run();
  sim->computeValueFunction();

  delete random;
  delete problem;
  delete hashing;
  delete projector;
  delete toStateAction;
  delete critice;
  delete critic;
  delete actore;
  delete actoreTraces;
  delete actor;
  delete acting;
  delete control;
  delete agent;
  delete sim;
}
开发者ID:csdlrl,项目名称:RLLib,代码行数:51,代码来源:SwingPendulumTest.cpp

示例3: testTrain

void NAOTest::testTrain()
{
  // OffLine
  {
    Random<float>* random = new Random<float>;
    RLProblem<float>* problem = new MountainCar<float>(random);
    Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000);
    Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10,
        10);
    StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector,
        problem->getDiscreteActions());

    double alpha_v = 0.05 / projector->vectorNorm();
    double alpha_w = 0.0001 / projector->vectorNorm();
    double lambda = 0.0;  //0.4;
    double gamma = 0.99;
    Trace<float>* critice = new ATrace<float>(projector->dimension());
    OffPolicyTD<float>* critic = new GTDLambda<float>(alpha_v, alpha_w, gamma, lambda, critice);
    double alpha_u = 1.0 / projector->vectorNorm();
    PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random,
        problem->getDiscreteActions(), projector->dimension());

    Trace<float>* actore = new ATrace<float>(projector->dimension());
    Traces<float>* actoreTraces = new Traces<float>();
    actoreTraces->push_back(actore);
    ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(alpha_u, gamma, lambda, target,
        actoreTraces);

    Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions());

    OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor,
        toStateAction, projector);

    RLAgent<float>* agent = new LearnerAgent<float>(control);
    Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 100, 1);
    //sim->setVerbose(false);
    sim->run();
    control->persist("NAOTest_x32_M.bin");

    delete random;
    delete problem;
    delete hashing;
    delete projector;
    delete toStateAction;
    delete critice;
    delete critic;
    delete actore;
    delete actoreTraces;
    delete actor;
    delete behavior;
    delete target;
    delete control;
    delete agent;
    delete sim;
  }
  // OnLine
  {
    Random<double>* random = new Random<double>;
    RLProblem<double>* problem = new SwingPendulum<double>(random);
    Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
    Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10,
        10, false);
    StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
        problem->getContinuousActions());

    double alpha_v = 0.1 / projector->vectorNorm();
    double alpha_u = 0.001 / projector->vectorNorm();
    double alpha_r = .0001;
    double gamma = 1.0;
    double lambda = 0.5;

    Trace<double>* critice = new ATrace<double>(projector->dimension());
    TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);

    PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
        problem->getContinuousActions(), 0, 1.0, projector->dimension());
    Range<double> policyRange(-2.0, 2.0);
    Range<double> problemRange(-2.0, 2.0);
    PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
        problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);

    Trace<double>* actore1 = new ATrace<double>(projector->dimension());
    Trace<double>* actore2 = new ATrace<double>(projector->dimension());
    Traces<double>* actoreTraces = new Traces<double>();
    actoreTraces->push_back(actore1);
    actoreTraces->push_back(actore2);
    ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
        actoreTraces);

    OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
        projector, toStateAction, alpha_r);

    RLAgent<double>* agent = new LearnerAgent<double>(control);
    Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 100, 1);
    sim->run();
    control->persist("NAOTest_x32_S.bin");

    delete random;
    delete problem;
    delete hashing;
//.........这里部分代码省略.........
开发者ID:Jimmy0319,项目名称:RLLib,代码行数:101,代码来源:NAOTest.cpp

示例4: testOnPolicySwingPendulum

void SwingPendulumTest::testOnPolicySwingPendulum()
{
  Random<double>* random = new Random<double>;
  RLProblem<double>* problem = new SwingPendulum<double>;
  Hashing<double>* hashing = new MurmurHashing<double>(random, 1000);
  Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10,
      false);
  StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector,
      problem->getContinuousActions());

  double alpha_v = 0.1 / projector->vectorNorm();
  double alpha_u = 0.001 / projector->vectorNorm();
  double alpha_r = .0001;
  double gamma = 1.0;
  double lambda = 0.5;

  Trace<double>* critice = new ATrace<double>(projector->dimension());
  TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice);

  PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random,
      problem->getContinuousActions(), 0, 1.0, projector->dimension());
  Range<double> policyRange(-2.0, 2.0);
  Range<double> problemRange(-2.0, 2.0);
  PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>(
      problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange);

  Trace<double>* actore1 = new ATrace<double>(projector->dimension());
  Trace<double>* actore2 = new ATrace<double>(projector->dimension());
  Traces<double>* actoreTraces = new Traces<double>();
  actoreTraces->push_back(actore1);
  actoreTraces->push_back(actore2);
  ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting,
      actoreTraces);

  OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor,
      projector, toStateAction, alpha_r);

  RLAgent<double>* agent = new LearnerAgent<double>(control);
  RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 100, 10);
  sim->setVerbose(true);
  sim->run();

  sim->runEvaluate(100);
  sim->computeValueFunction();

  delete random;
  delete problem;
  delete hashing;
  delete projector;
  delete toStateAction;
  delete critice;
  delete critic;
  delete actore1;
  delete actore2;
  delete actoreTraces;
  delete actor;
  delete policyDistribution;
  delete acting;
  delete control;
  delete agent;
  delete sim;
}
开发者ID:csdlrl,项目名称:RLLib,代码行数:62,代码来源:SwingPendulumTest.cpp


注:本文中的Projector::vectorNorm方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。