当前位置: 首页>>代码示例>>C++>>正文


C++ HISTORY::Add方法代码示例

本文整理汇总了C++中HISTORY::Add方法的典型用法代码示例。如果您正苦于以下问题:C++ HISTORY::Add方法的具体用法?C++ HISTORY::Add怎么用?C++ HISTORY::Add使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HISTORY的用法示例。


在下文中一共展示了HISTORY::Add方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: DisplayValue

void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= (uint) maxDepth)
        return;

    for (int action = 0; action < NumChildren; action++)
    {
        history.Add(action,-1);
        Children[action].DisplayValue(history, maxDepth, ostr);
        history.Pop();
    }
}
开发者ID:caomw,项目名称:BBRL,代码行数:12,代码来源:node.cpp

示例2: DisplayValue

void VNODE::DisplayValue(HISTORY& history, int maxDepth, ostream& ostr, const std::vector<double> *qvalues) const
{
    if (history.Size() >= maxDepth)
        return;

    for (int action = 0; action < NumChildren; action++)
    {
        history.Add(action);
        const QNODE &qnode = Children[action];

        if (qnode.Applicable()) {
        	ostr << "n=" << qnode.GetCount() << " ";
        	if (qvalues) {
        		qnode.DisplayValue(history, maxDepth, ostr, &(qvalues->at(action)));
        	}
        	else {
        		qnode.DisplayValue(history, maxDepth, ostr);
        	}
        }
        history.Pop();
    }
}
开发者ID:aijunbai,项目名称:thompson-sampling,代码行数:22,代码来源:node.cpp

示例3: DisplayPolicy

void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= (uint) maxDepth)
        return;

    double bestq = -Infinity;
    int besta = -1;
    for (int action = 0; action < NumChildren; action++)
    {
        if (Children[action].Value.GetValue() > bestq)
        {
            besta = action;
            bestq = Children[action].Value.GetValue();
        }
    }

    if (besta != -1)
    {
        history.Add((uint)besta,0);
        Children[besta].DisplayPolicy(history, maxDepth, ostr);
        history.Pop();
    }
}
开发者ID:caomw,项目名称:BBRL,代码行数:23,代码来源:node.cpp

示例4: DisplayPolicy

void VNODE::DisplayPolicy(HISTORY& history, int maxDepth, ostream& ostr) const
{
    if (history.Size() >= maxDepth)
        return;

//    double bestq = -Infinity;
    int besta = -1;
    for (int action = 0; action < NumChildren; action++)
    {
//        if (Children[action].Dirichlet.GetValue() > bestq) //XXX
//        {
//            besta = action;
//            bestq = Children[action].Dirichlet.GetValue();
//        }
    }

    if (besta != -1)
    {
        history.Add(besta);
        Children[besta].DisplayPolicy(history, maxDepth, ostr);
        history.Pop();
    }
}
开发者ID:aijunbai,项目名称:thompson-sampling,代码行数:23,代码来源:node.cpp

示例5: Run

void EXPERIMENT::Run()
{
    boost::timer timer;

    MCTS mcts(Simulator, SearchParams);

    double undiscountedReturn = 0.0;
    double discountedReturn = 0.0;
    double discount = 1.0;
    bool terminal = false;
    bool outOfParticles = false;
    int t;

    STATE* state = Real.CreateStartState();
    if (SearchParams.Verbose >= 1)
        Real.DisplayState(*state, cout);

    for (t = 0; t < ExpParams.NumSteps; t++)
    {
        int observation;
        double reward;
        int action = mcts.SelectAction();
        terminal = Real.Step(*state, action, observation, reward);

        Results.Reward.Add(reward);
        undiscountedReturn += reward;
        discountedReturn += reward * discount;
        discount *= Real.GetDiscount();

        if (SearchParams.Verbose >= 1)
        {
            Real.DisplayAction(action, cout);
            Real.DisplayState(*state, cout);
            Real.DisplayObservation(*state, observation, cout);
            Real.DisplayReward(reward, cout);
        }

        if (terminal)
        {
            cout << "Terminated" << endl;
            break;
        }
        outOfParticles = !mcts.Update(action, observation, reward);
        if (outOfParticles)
            break;

        if (timer.elapsed() > ExpParams.TimeOut)
        {
            cout << "Timed out after " << t << " steps in "
                << Results.Time.GetTotal() << "seconds" << endl;
            break;
        }
    }

    if (outOfParticles)
    {
        cout << "Out of particles, finishing episode with SelectRandom" << endl;
        HISTORY history = mcts.GetHistory();
        while (++t < ExpParams.NumSteps)
        {
            int observation;
            double reward;

            // This passes real state into simulator!
            // SelectRandom must only use fully observable state
            // to avoid "cheating"
            int action = Simulator.SelectRandom(*state, history, mcts.GetStatus());
            terminal = Real.Step(*state, action, observation, reward);

            Results.Reward.Add(reward);
            undiscountedReturn += reward;
            discountedReturn += reward * discount;
            discount *= Real.GetDiscount();

            if (SearchParams.Verbose >= 1)
            {
                Real.DisplayAction(action, cout);
                Real.DisplayState(*state, cout);
                Real.DisplayObservation(*state, observation, cout);
                Real.DisplayReward(reward, cout);
            }

            if (terminal)
            {
                cout << "Terminated" << endl;
                break;
            }

            history.Add(action, observation);
        }
    }

    Results.Time.Add(timer.elapsed());
    Results.UndiscountedReturn.Add(undiscountedReturn);
    Results.DiscountedReturn.Add(discountedReturn);
    cout << "Discounted return = " << discountedReturn
        << ", average = " << Results.DiscountedReturn.GetMean() << endl;
    cout << "Undiscounted return = " << undiscountedReturn
        << ", average = " << Results.UndiscountedReturn.GetMean() << endl;
}
开发者ID:Haibo-Wang-ORG,项目名称:Optimal-Planning-Under-Uncertianty,代码行数:100,代码来源:experiment.cpp


注:本文中的HISTORY::Add方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。