本文整理汇总了C++中ActionVect::size方法的典型用法代码示例。如果您正苦于以下问题:C++ ActionVect::size方法的具体用法?C++ ActionVect::size怎么用?C++ ActionVect::size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ActionVect
的用法示例。
在下文中一共展示了ActionVect::size方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: reset
void ALEState::reset(int numResetSteps) {
// reset the rom
m_settings->reset();
// Reset the paddles
default_paddles();
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps
for (int i = 0; i < 60; i++) {
apply_action(PLAYER_A_NOOP, PLAYER_B_NOOP);
simulate();
// Don't count these frames
frame_number--;
}
// reset for n steps
for (int i = 0; i < numResetSteps; i++) {
apply_action(RESET, PLAYER_B_NOOP);
simulate();
// Don't count these frames
frame_number--;
}
// Apply necessary actions specified by the rom itself
ActionVect startingActions = m_settings->getStartingActions();
for (int i = 0; i < startingActions.size(); i++) {
apply_action(startingActions[i], PLAYER_B_NOOP);
simulate();
frame_number--;
}
}
示例2: reset
/** Resets the system to its start state. */
void StellaEnvironment::reset() {
// RNG for generating environments
Random randGen;
// Reset the paddles
m_state.resetVariables(m_osystem->event());
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise
int noopSteps;
if (m_stochastic_start)
noopSteps = 60 + rand() % NUM_RANDOM_ENVIRONMENTS;
else
noopSteps = 60;
emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps);
// reset for n steps
emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps);
// reset the rom (after emulating, in case the NOOPs led to reward)
m_settings->reset();
// Apply necessary actions specified by the rom itself
if (m_use_starting_actions) {
ActionVect startingActions = m_settings->getStartingActions();
for (size_t i = 0; i < startingActions.size(); i++)
emulate(startingActions[i], PLAYER_B_NOOP);
}
}
示例3: reset
/** Resets the system to its start state. */
void StellaEnvironment::reset() {
m_state.resetEpisodeFrameNumber();
// Reset the paddles
m_state.resetPaddles(m_osystem->event());
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise
int noopSteps;
noopSteps = 60;
emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps);
// reset for n steps
emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps);
// reset the rom (after emulating, in case the NOOPs led to reward)
m_settings->reset();
// Apply necessary actions specified by the rom itself
ActionVect startingActions = m_settings->getStartingActions();
for (size_t i = 0; i < startingActions.size(); i++) {
emulate(startingActions[i], PLAYER_B_NOOP);
}
}
示例4: evaluatePolicy
double RandomAgent::evaluatePolicy(Environment<bool>& env){
int reward = 0;
int totalReward = 0;
int cumulativeReward = 0;
int numActions;
ActionVect actions;
//Check if one wants to sample from all possible actions or only the valid ones:
if(useMinActions){
actions = env.getMinimalActionSet();
}
else{
actions = env.getLegalActionSet();
}
numActions = actions.size();
printf("Number of Actions: %d\n\n", numActions);
//Repeat (for each episode):
for(int episode = 0; episode < numEpisodesToEval; episode++){
int step = 0;
while(!env.game_over() && step < maxStepsInEpisode) {
reward = env.act(actions[rand()%numActions]);
cumulativeReward += reward;
step++;
}
printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward);
totalReward += cumulativeReward;
cumulativeReward = 0;
env.reset_game(); //Start the game again when the episode is over
}
return double(totalReward)/numEpisodesToEval;
}
示例5: getAction
Action getAction(ActionVect av, ALEState state, ALEInterface& ale) {
float bestReward = 0;
Action bestAction = av[rand() % av.size()];
// for(int i = 0; i < av.size(); i++) {
// float reward = ale.act(av[i]);
// if(reward > bestReward) {
// bestAction = av[i];
// bestReward = reward;
// }
// ale.restoreState(state);
// }
return bestAction;
}
示例6: main
int main(int argc, char** argv){
ALEInterface ale(1);
if(argc != 4){
printf("Usage: %s rom_file path_to_save_bits seed\n", argv[0]);
exit(1);
}
int seed = atoi(argv[3]);
ale.setInt("frame_skip", 5);
ale.setInt("random_seed", seed);
ale.setInt("max_num_frames_per_episode", 18000);
ale.setFloat("repeat_action_prob", 0.00);
ale.loadROM(argv[1]);
string outputFile = argv[2];
srand(seed);
ActionVect actions = ale.getLegalActionSet();
RAMFeatures features;
vector<bool> F;
ofstream outFile;
outFile.open(outputFile);
int reward = 0;
F.clear();
features.getCompleteFeatureVector(ale.getRAM(), F);
for(int i = 0; i < F.size(); i++){
outFile << F[i] << ",";
}
outFile << endl;
while(!ale.game_over()) {
reward += ale.act(actions[rand() % actions.size()]);
F.clear();
features.getCompleteFeatureVector(ale.getRAM(), F);
for(int i = 0; i < F.size(); i++){
outFile << F[i] << ",";
}
outFile << endl;
}
printf("Episode ended with a score of %d points\n", reward);
outFile.close();
return 0;
}
示例7: PrintQValues
std::string PrintQValues(
const std::vector<float>& q_values, const ActionVect& actions) {
assert(!q_values.empty());
assert(!actions.empty());
assert(q_values.size() == actions.size());
std::ostringstream actions_buf;
std::ostringstream q_values_buf;
for (auto i = 0; i < q_values.size(); ++i) {
const auto a_str =
boost::algorithm::replace_all_copy(
action_to_string(actions[i]), "PLAYER_A_", "");
const auto q_str = std::to_string(q_values[i]);
const auto column_size = std::max(a_str.size(), q_str.size()) + 1;
actions_buf.width(column_size);
actions_buf << a_str;
q_values_buf.width(column_size);
q_values_buf << q_str;
}
actions_buf << std::endl;
q_values_buf << std::endl;
return actions_buf.str() + q_values_buf.str();
}
示例8: main
int main(int argc, char** argv){
//For the use of options:
RAMFeatures ramFeatures;
vector<bool> FRam, FnextRam;
vector<double> transitions((ramFeatures.getNumberOfFeatures() - 1)*2, 0);
readParameters(argc, argv);
readParamFiles();
srand(seed);
//Initializing ALE:
ALEInterface ale(1);
ale.setFloat("frame_skip", NUM_STEPS_PER_ACTION);
ale.setFloat("stochasticity", STOCHASTICITY);
ale.setInt("random_seed", seed);
ale.setInt("max_num_frames_per_episode", MAX_LENGTH_EPISODE);
ale.loadROM(romPath.c_str());
//Initializing useful things to agent:
BPROFeatures features;
actions = ale.getLegalActionSet();
numActions = actions.size();
numFeatures = NUM_COLUMNS * NUM_ROWS * NUM_COLORS
+ (2 * NUM_COLUMNS - 1) * (2 * NUM_ROWS - 1) * NUM_COLORS * NUM_COLORS + 1;
for(int i = 0; i < numActions; i++){
//Initialize Q;
Q.push_back(0);
w.push_back(vector<double>(numFeatures, 0.0));
}
loadWeights(wgtPath);
int reward = 0;
double intr_reward = 0.0;
FRam.clear();
ramFeatures.getCompleteFeatureVector(ale.getScreen(), ale.getRAM(), FRam);
while(!ale.game_over()){
//Get state and features active on that state:
F.clear();
features.getActiveFeaturesIndices(ale.getScreen(), ale.getRAM(), F);
updateQValues(); //Update Q-values for each possible action
currentAction = epsilonGreedy();
//Take action, observe reward and next state:
reward += ale.act(actions[currentAction]);
FnextRam.clear();
ramFeatures.getCompleteFeatureVector(ale.getScreen(), ale.getRAM(), FnextRam);
updateTransitionVector(FRam, FnextRam, transitions);
//Calculating intrinsic reward:
for(int i = 0; i < transitions.size(); i++){
transitions[i] = (transitions[i] - mean[i])/var[i];
}
intr_reward = 0.0;
for(int i = 0; i < transitions.size(); i++){
intr_reward += option[i] * transitions[i];
}
printf("%f\n", intr_reward);
FRam = FnextRam;
}
printf("Final score: %d\n", reward);
return 0;
}