本文整理汇总了C++中Agent::genRandomAction方法的典型用法代码示例。如果您正苦于以下问题:C++ Agent::genRandomAction方法的具体用法?C++ Agent::genRandomAction怎么用?C++ Agent::genRandomAction使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Agent
的用法示例。
在下文中一共展示了Agent::genRandomAction方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: playout
// simulate a sequence of random actions, returning the accumulated reward.
static reward_t playout(Agent &agent, unsigned int playout_len) {
reward_t r = 0;
for (unsigned int i = 0; i < playout_len; ++i) {
// Pick a random action
action_t a = agent.genRandomAction();
agent.modelUpdate(a);
// Generate a random percept distributed according to the agent's
// internal model of the environment.
percept_t rew;
percept_t obs;
agent.genPerceptAndUpdate(obs, rew);
r = r + rew;
}
return r;
}
示例2: mainLoop
// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {
// Determine exploration options
bool explore = options.count("exploration") > 0;
double explore_rate, explore_decay;
if (explore) {
strExtract(options["exploration"], explore_rate);
strExtract(options["explore-decay"], explore_decay);
assert(0.0 <= explore_rate && explore_rate <= 1.0);
assert(0.0 <= explore_decay && explore_decay <= 1.0);
}
// Determine termination age
bool terminate_check = options.count("terminate-age") > 0;
age_t terminate_age;
if (terminate_check) {
strExtract(options["terminate-age"], terminate_age);
assert(0 <= terminate_age);
}
// Determine mc-timelimit
timelimit_t mc_timelimit;
strExtract(options["mc-timelimit"], mc_timelimit);
//if we assume that time_limit > agent.numActions() we can be sure
//that every action is selected at least once
if(mc_timelimit < ai.numActions()){
std::cerr << "WARNING: time_limit not large enough to sample all actions" << std::endl;
}
// Determine whether to write cts during the process, or only at the end.
bool intermediate_ct = true;
if(options.count("intermediate-ct") > 0){
intermediate_ct = !(options["intermediate-ct"] == "0");
}
std::cout << "starting agent/environment interaction loop...\n";
// Agent/environment interaction loop
for (unsigned int cycle = 1; !env.isFinished(); cycle++) {
// check for agent termination
if (terminate_check && ai.age() >= terminate_age) {
verboseLog << "info: terminating agent" << std::endl;
break;
}
// Get a percept from the environment
percept_t observation = env.getObservation();
percept_t reward = env.getReward();
// Update agent's environment model with the new percept
ai.modelUpdate(observation, reward);
// Determine best exploitive action, or explore
action_t action;
bool explored = false;
if (explore && rand01() < explore_rate) {
explored = true;
action = ai.genRandomAction();
}
else {
action = search(ai, mc_timelimit);
}
// Send an action to the environment
env.performAction(action);
// Update agent's environment model with the chosen action
ai.modelUpdate(action);
// Log this turn
verboseLog << "cycle: " << cycle << std::endl;
verboseLog << "observation: " << observation << std::endl;
verboseLog << "reward: " << reward << std::endl;
verboseLog << "action: " << action << std::endl;
verboseLog << "explored: " << (explored ? "yes" : "no") << std::endl;
verboseLog << "explore rate: " << explore_rate << std::endl;
verboseLog << "total reward: " << ai.reward() << std::endl;
verboseLog << "average reward: " << ai.averageReward() << std::endl;
// Log the data in a more compact form
compactLog << cycle << ", " << observation << ", " << reward << ", "
<< action << ", " << explored << ", " << explore_rate << ", "
<< ai.reward() << ", " << ai.averageReward() << std::endl;
// Print to standard output when cycle == 2^n
if ((cycle & (cycle - 1)) == 0) {
std::cout << "cycle: " << cycle << std::endl;
std::cout << "average reward: " << ai.averageReward() << std::endl;
if (explore) {
std::cout << "explore rate: " << explore_rate << std::endl;
}
// Write context tree file
if(options["write-ct"] != "" && intermediate_ct){
// write a ct for each 2^n cycles.
char cycle_string[256];
sprintf(cycle_string, "%d", cycle);
std::ofstream ct((options["write-ct"] + std::string(cycle_string) + ".ct").c_str());
//.........这里部分代码省略.........
示例3: mainLoop
// The main agent/environment interaction loop
void mainLoop(Agent &ai, Environment &env, options_t &options) {
// Determine exploration options
bool explore = options.count("exploration") > 0;
double explore_rate, explore_decay;
if (explore) {
strExtract(options["exploration"], explore_rate);
strExtract(options["explore-decay"], explore_decay);
assert(0.0 <= explore_rate && explore_rate <= 1.0);
assert(0.0 <= explore_decay && explore_decay <= 1.0);
}
// Determine termination lifetime
bool terminate_check = options.count("terminate-lifetime") > 0;
lifetime_t terminate_lifetime;
if (terminate_check) {
strExtract(options["terminate-lifetime"], terminate_lifetime);
assert(0 <= terminate_lifetime);
}
// Agent/environment interaction loop
for (unsigned int cycle = 1; !env.isFinished(); cycle++) {
// check for agent termination
if (terminate_check && ai.lifetime() > terminate_lifetime) {
log << "info: terminating lifetiment" << std::endl;
break;
}
// Get a percept from the environment
percept_t observation = env.getObservation();
percept_t reward = env.getReward();
// Update agent's environment model with the new percept
ai.modelUpdate(observation, reward); // TODO: implement in agent.cpp
// Determine best exploitive action, or explore
action_t action;
bool explored = false;
if (explore && rand01() < explore_rate) {
explored = true;
action = ai.genRandomAction();
}
else {
action = search(ai); // TODO: implement in search.cpp
}
// Send an action to the environment
env.performAction(action); // TODO: implement for each environment
// Update agent's environment model with the chosen action
ai.modelUpdate(action); // TODO: implement in agent.cpp
// Log this turn
log << "cycle: " << cycle << std::endl;
log << "observation: " << observation << std::endl;
log << "reward: " << reward << std::endl;
log << "action: " << action << std::endl;
log << "explored: " << (explored ? "yes" : "no") << std::endl;
log << "explore rate: " << explore_rate << std::endl;
log << "total reward: " << ai.reward() << std::endl;
log << "average reward: " << ai.averageReward() << std::endl;
// Log the data in a more compact form
compactLog << cycle << ", " << observation << ", " << reward << ", "
<< action << ", " << explored << ", " << explore_rate << ", "
<< ai.reward() << ", " << ai.averageReward() << std::endl;
// Print to standard output when cycle == 2^n
if ((cycle & (cycle - 1)) == 0) {
std::cout << "cycle: " << cycle << std::endl;
std::cout << "average reward: " << ai.averageReward() << std::endl;
if (explore) {
std::cout << "explore rate: " << explore_rate << std::endl;
}
}
// Update exploration rate
if (explore) explore_rate *= explore_decay;
}
// Print summary to standard output
std::cout << std::endl << std::endl << "SUMMARY" << std::endl;
std::cout << "agent lifetime: " << ai.lifetime() << std::endl;
std::cout << "average reward: " << ai.averageReward() << std::endl;
}
示例4: mainLoop
/** The main agent/environment interaction loop. Each interaction cycle begins
* with the agent receiving an observation and reward from the environment.
* Subsequently, the agent selects an action and informs the environment. The
* interactions that took place are logged to the ::logger and ::compactLogger
* streams. When the cycle equals a power of two, a summary of the interactions
* is printed to the standard output.
* \param ai The agent.
* \param env The environment.
* \param options The configuration options. */
void mainLoop(Agent &ai, Environment &env, options_t &options) {
// Apply random seed (Defaut: 0)
srand(getOption<unsigned int>(options, "random-seed", 0));
// Verbose output (Default: false)
bool verbose = getOption<bool>(options, "verbose", false);
// Determine exploration options (Default: don't explore, don't decay)
bool explore = options.count("exploration") > 0;
double explore_rate = getOption<double>(options, "exploration", 0.0);
double explore_decay = getOption<double>(options, "explore-decay", 1.0);
assert(0.0 <= explore_rate);
assert(0.0 <= explore_decay && explore_decay <= 1.0);
// Determine termination age (Default: don't terminate)
bool terminate_check = options.count("terminate-age") > 0;
age_t terminate_age = getOption<age_t>(options, "terminate-age", 0);
assert(0 <= terminate_age);
// Determine the cycle after which the agent stops learning (if ever)
int learning_period = getOption<int>(options, "learning-period", 0);
assert(0 <= learning_period);
// Agent/environment interaction loop
for (int cycle = 1; !env.isFinished(); cycle++) {
// Check for agent termination
if (terminate_check && ai.age() > terminate_age) {
break;
}
// Save the current clock cycle (to compute how long this cycle took)
clock_t cycle_start = clock();
// Get a percept from the environment
percept_t observation = env.getObservation();
percept_t reward = env.getReward();
if (learning_period > 0 && cycle > learning_period)
explore = false;
// Update agent's environment model with the new percept
ai.modelUpdate(observation, reward);
// Determine best exploitive action, or explore
action_t action;
bool explored = false;
if (explore && (rand01() < explore_rate)) { // Explore
explored = true;
action = ai.genRandomAction();
}
else { // Exploit
action = ai.search();
}
// Send an action to the environment
env.performAction(action);
// Update agent's environment model with the chosen action
ai.modelUpdate(action);
// Calculate how long this cycle took
double time = double(clock() - cycle_start) / double(CLOCKS_PER_SEC);
// Log this turn
logger << cycle << ", " << observation << ", " << reward << ", "
<< action << ", " << explored << ", " << explore_rate << ", "
<< ai.totalReward() << ", " << ai.averageReward() << ", "
<< time << ", " << ai.modelSize() << std::endl;
// Print to standard output when cycle == 2^n or on verbose option
if (verbose || (cycle & (cycle - 1)) == 0) {
std::cout << "cycle: " << cycle << std::endl;
std::cout << "average reward: " << ai.averageReward() << std::endl;
if (explore) {
std::cout << "explore rate: " << explore_rate << std::endl;
}
}
// Print environment state if verbose option is true
if (verbose) {
std::cout << env.print();
}
// Update exploration rate
if (explore) explore_rate *= explore_decay;
}
//.........这里部分代码省略.........