本文整理汇总了C++中Agent::genPerceptAndUpdate方法的典型用法代码示例。如果您正苦于以下问题:C++ Agent::genPerceptAndUpdate方法的具体用法?C++ Agent::genPerceptAndUpdate怎么用?C++ Agent::genPerceptAndUpdate使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Agent
的用法示例。
在下文中一共展示了Agent::genPerceptAndUpdate方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sample
// Sample one possible sequence of future events, up to 'dfr' cycles.
reward_t SearchNode::sample(Agent &agent, unsigned int dfr) {
double newReward;
if (dfr == 0) {
return 0;
} else if (m_chance_node) {
// Generate whole observation-reward percept,
// according to the agent's model of the environment.
percept_t obs;
percept_t rew;
agent.genPerceptAndUpdate(obs, rew);
// Calculate the index of whole percept
percept_t percept = (rew << agent.numObsBits()) | obs;
if (m_child.count(percept) == 0) {
m_child[percept] = new SearchNode(false, agent.numActions());
}
newReward = rew + m_child[percept]->sample(agent, dfr - 1);
} else if (m_visits == 0) {
newReward = playout(agent, dfr);
} else {
// Select an action to sample.
action_t action = selectAction(agent, dfr);
agent.modelUpdate(action);
newReward = m_child[action]->sample(agent, dfr);
}
// Update our estimate of the future reward.
m_mean = (1.0 / (double) (m_visits + 1)) * (newReward + m_visits * m_mean);
++m_visits;
return newReward;
}
示例2: playout
// simulate a sequence of random actions, returning the accumulated reward.
static reward_t playout(Agent &agent, unsigned int playout_len) {
reward_t r = 0;
for (unsigned int i = 0; i < playout_len; ++i) {
// Pick a random action
action_t a = agent.genRandomAction();
agent.modelUpdate(a);
// Generate a random percept distributed according to the agent's
// internal model of the environment.
percept_t rew;
percept_t obs;
agent.genPerceptAndUpdate(obs, rew);
r = r + rew;
}
return r;
}