本文整理汇总了C++中HISTORY::Add方法的典型用法代码示例。如果您正苦于以下问题:C++ HISTORY::Add方法的具体用法?C++ HISTORY::Add怎么用?C++ HISTORY::Add使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类HISTORY
的用法示例。
在下文中一共展示了HISTORY::Add方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: DisplayValue
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr) const
{
if (history.Size() >= (uint) maxDepth)
return;
for (int action = 0; action < NumChildren; action++)
{
history.Add(action,-1);
Children[action].DisplayValue(history, maxDepth, ostr);
history.Pop();
}
}
示例2: DisplayValue
void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const std::vector<double> *qvalues) const
{
if (history.Size() >= maxDepth)
return;
for (int action = 0; action < NumChildren; action++)
{
history.Add(action);
const QNODE &qnode = Children[action];
if (qnode.Applicable()) {
ostr << "n=" << qnode.GetCount() << " ";
if (qvalues) {
qnode.DisplayValue(history, maxDepth, ostr, &(qvalues->at(action)));
}
else {
qnode.DisplayValue(history, maxDepth, ostr);
}
}
history.Pop();
}
}
示例3: DisplayPolicy
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
if (history.Size() >= (uint) maxDepth)
return;
double bestq = -Infinity;
int besta = -1;
for (int action = 0; action < NumChildren; action++)
{
if (Children[action].Value.GetValue() > bestq)
{
besta = action;
bestq = Children[action].Value.GetValue();
}
}
if (besta != -1)
{
history.Add((uint)besta,0);
Children[besta].DisplayPolicy(history, maxDepth, ostr);
history.Pop();
}
}
示例4: DisplayPolicy
void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
if (history.Size() >= maxDepth)
return;
// double bestq = -Infinity;
int besta = -1;
for (int action = 0; action < NumChildren; action++)
{
// if (Children[action].Dirichlet.GetValue() > bestq) //XXX
// {
// besta = action;
// bestq = Children[action].Dirichlet.GetValue();
// }
}
if (besta != -1)
{
history.Add(besta);
Children[besta].DisplayPolicy(history, maxDepth, ostr);
history.Pop();
}
}
示例5: Run
void EXPERIMENT::Run()
{
boost::timer timer;
MCTS mcts(Simulator, SearchParams);
double undiscountedReturn = 0.0;
double discountedReturn = 0.0;
double discount = 1.0;
bool terminal = false;
bool outOfParticles = false;
int t;
STATE* state = Real.CreateStartState();
if (SearchParams.Verbose >= 1)
Real.DisplayState(*state, cout);
for (t = 0; t < ExpParams.NumSteps; t++)
{
int observation;
double reward;
int action = mcts.SelectAction();
terminal = Real.Step(*state, action, observation, reward);
Results.Reward.Add(reward);
undiscountedReturn += reward;
discountedReturn += reward * discount;
discount *= Real.GetDiscount();
if (SearchParams.Verbose >= 1)
{
Real.DisplayAction(action, cout);
Real.DisplayState(*state, cout);
Real.DisplayObservation(*state, observation, cout);
Real.DisplayReward(reward, cout);
}
if (terminal)
{
cout << "Terminated" << endl;
break;
}
outOfParticles = !mcts.Update(action, observation, reward);
if (outOfParticles)
break;
if (timer.elapsed() > ExpParams.TimeOut)
{
cout << "Timed out after " << t << " steps in "
<< Results.Time.GetTotal() << "seconds" << endl;
break;
}
}
if (outOfParticles)
{
cout << "Out of particles, finishing episode with SelectRandom" << endl;
HISTORY history = mcts.GetHistory();
while (++t < ExpParams.NumSteps)
{
int observation;
double reward;
// This passes real state into simulator!
// SelectRandom must only use fully observable state
// to avoid "cheating"
int action = Simulator.SelectRandom(*state, history, mcts.GetStatus());
terminal = Real.Step(*state, action, observation, reward);
Results.Reward.Add(reward);
undiscountedReturn += reward;
discountedReturn += reward * discount;
discount *= Real.GetDiscount();
if (SearchParams.Verbose >= 1)
{
Real.DisplayAction(action, cout);
Real.DisplayState(*state, cout);
Real.DisplayObservation(*state, observation, cout);
Real.DisplayReward(reward, cout);
}
if (terminal)
{
cout << "Terminated" << endl;
break;
}
history.Add(action, observation);
}
}
Results.Time.Add(timer.elapsed());
Results.UndiscountedReturn.Add(undiscountedReturn);
Results.DiscountedReturn.Add(discountedReturn);
cout << "Discounted return = " << discountedReturn
<< ", average = " << Results.DiscountedReturn.GetMean() << endl;
cout << "Undiscounted return = " << undiscountedReturn
<< ", average = " << Results.UndiscountedReturn.GetMean() << endl;
}