本文整理汇总了C++中ActionVect类的典型用法代码示例。如果您正苦于以下问题:C++ ActionVect类的具体用法?C++ ActionVect怎么用?C++ ActionVect使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ActionVect类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: default_paddles
void ALEState::reset(int numResetSteps) {
// reset the rom
m_settings->reset();
// Reset the paddles
default_paddles();
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps
for (int i = 0; i < 60; i++) {
apply_action(PLAYER_A_NOOP, PLAYER_B_NOOP);
simulate();
// Don't count these frames
frame_number--;
}
// reset for n steps
for (int i = 0; i < numResetSteps; i++) {
apply_action(RESET, PLAYER_B_NOOP);
simulate();
// Don't count these frames
frame_number--;
}
// Apply necessary actions specified by the rom itself
ActionVect startingActions = m_settings->getStartingActions();
for (int i = 0; i < startingActions.size(); i++) {
apply_action(startingActions[i], PLAYER_B_NOOP);
simulate();
frame_number--;
}
}
示例2: rand
/** Resets the system to its start state. */
void StellaEnvironment::reset() {
// RNG for generating environments
Random randGen;
// Reset the paddles
m_state.resetVariables(m_osystem->event());
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise
int noopSteps;
if (m_stochastic_start)
noopSteps = 60 + rand() % NUM_RANDOM_ENVIRONMENTS;
else
noopSteps = 60;
emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps);
// reset for n steps
emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps);
// reset the rom (after emulating, in case the NOOPs led to reward)
m_settings->reset();
// Apply necessary actions specified by the rom itself
if (m_use_starting_actions) {
ActionVect startingActions = m_settings->getStartingActions();
for (size_t i = 0; i < startingActions.size(); i++)
emulate(startingActions[i], PLAYER_B_NOOP);
}
}
示例3: emulate
/** Resets the system to its start state. */
void StellaEnvironment::reset() {
m_state.resetEpisodeFrameNumber();
// Reset the paddles
m_state.resetPaddles(m_osystem->event());
// Reset the emulator
m_osystem->console().system().reset();
// NOOP for 60 steps in the deterministic environment setting, or some random amount otherwise
int noopSteps;
noopSteps = 60;
emulate(PLAYER_A_NOOP, PLAYER_B_NOOP, noopSteps);
// reset for n steps
emulate(RESET, PLAYER_B_NOOP, m_num_reset_steps);
// reset the rom (after emulating, in case the NOOPs led to reward)
m_settings->reset();
// Apply necessary actions specified by the rom itself
ActionVect startingActions = m_settings->getStartingActions();
for (size_t i = 0; i < startingActions.size(); i++) {
emulate(startingActions[i], PLAYER_B_NOOP);
}
}
示例4: evaluatePolicy
double RandomAgent::evaluatePolicy(Environment<bool>& env){
int reward = 0;
int totalReward = 0;
int cumulativeReward = 0;
int numActions;
ActionVect actions;
//Check if one wants to sample from all possible actions or only the valid ones:
if(useMinActions){
actions = env.getMinimalActionSet();
}
else{
actions = env.getLegalActionSet();
}
numActions = actions.size();
printf("Number of Actions: %d\n\n", numActions);
//Repeat (for each episode):
for(int episode = 0; episode < numEpisodesToEval; episode++){
int step = 0;
while(!env.game_over() && step < maxStepsInEpisode) {
reward = env.act(actions[rand()%numActions]);
cumulativeReward += reward;
step++;
}
printf("Episode %d, Cumulative Reward: %d\n", episode + 1, cumulativeReward);
totalReward += cumulativeReward;
cumulativeReward = 0;
env.reset_game(); //Start the game again when the episode is over
}
return double(totalReward)/numEpisodesToEval;
}
示例5: main
int main(int argc, char** argv){
ALEInterface ale(1);
if(argc != 4){
printf("Usage: %s rom_file path_to_save_bits seed\n", argv[0]);
exit(1);
}
int seed = atoi(argv[3]);
ale.setInt("frame_skip", 5);
ale.setInt("random_seed", seed);
ale.setInt("max_num_frames_per_episode", 18000);
ale.setFloat("repeat_action_prob", 0.00);
ale.loadROM(argv[1]);
string outputFile = argv[2];
srand(seed);
ActionVect actions = ale.getLegalActionSet();
RAMFeatures features;
vector<bool> F;
ofstream outFile;
outFile.open(outputFile);
int reward = 0;
F.clear();
features.getCompleteFeatureVector(ale.getRAM(), F);
for(int i = 0; i < F.size(); i++){
outFile << F[i] << ",";
}
outFile << endl;
while(!ale.game_over()) {
reward += ale.act(actions[rand() % actions.size()]);
F.clear();
features.getCompleteFeatureVector(ale.getRAM(), F);
for(int i = 0; i < F.size(); i++){
outFile << F[i] << ",";
}
outFile << endl;
}
printf("Episode ended with a score of %d points\n", reward);
outFile.close();
return 0;
}
示例6: getAction
Action getAction(ActionVect av, ALEState state, ALEInterface& ale) {
float bestReward = 0;
Action bestAction = av[rand() % av.size()];
// for(int i = 0; i < av.size(); i++) {
// float reward = ale.act(av[i]);
// if(reward > bestReward) {
// bestAction = av[i];
// bestReward = reward;
// }
// ale.restoreState(state);
// }
return bestAction;
}
示例7: PrintQValues
std::string PrintQValues(
const std::vector<float>& q_values, const ActionVect& actions) {
assert(!q_values.empty());
assert(!actions.empty());
assert(q_values.size() == actions.size());
std::ostringstream actions_buf;
std::ostringstream q_values_buf;
for (auto i = 0; i < q_values.size(); ++i) {
const auto a_str =
boost::algorithm::replace_all_copy(
action_to_string(actions[i]), "PLAYER_A_", "");
const auto q_str = std::to_string(q_values[i]);
const auto column_size = std::max(a_str.size(), q_str.size()) + 1;
actions_buf.width(column_size);
actions_buf << a_str;
q_values_buf.width(column_size);
q_values_buf << q_str;
}
actions_buf << std::endl;
q_values_buf << std::endl;
return actions_buf.str() + q_values_buf.str();
}
示例8:
ActionVect Pong2Player025Settings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(SELECT);
startingActions.push_back(PLAYER_A_NOOP);
startingActions.push_back(SELECT);
startingActions.push_back(PLAYER_A_NOOP);
startingActions.push_back(SELECT);
startingActions.push_back(RESET);
return startingActions;
}
示例9: getStartingActions
ActionVect JourneyEscapeSettings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(PLAYER_A_FIRE);
return startingActions;
}
示例10: getStartingActions
ActionVect AirRaidSettings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(PLAYER_A_FIRE);
return startingActions;
}
示例11: getStartingActions
ActionVect SirLancelotSettings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(RESET);
startingActions.push_back(PLAYER_A_LEFT);
return startingActions;
}
示例12: num_of_nops
ActionVect NBAGiveNGoSettings::getStartingActions(){
int num_of_nops(100);
ActionVect startingActions;
// startingActions.reserve(num_of_xs*num_of_nops);
// wait for intro to end
startingActions.insert(startingActions.end(), 9*num_of_nops, JOYPAD_NOOP);
// press start to begin
startingActions.push_back(JOYPAD_START);
// select arcade
startingActions.insert(startingActions.end(), 2*num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select 1 vs cpu
startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select team
startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select team
startingActions.insert(startingActions.end(), 1.5*num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
startingActions.insert(startingActions.end(), 15.1*num_of_nops, JOYPAD_NOOP);
return startingActions;
}
示例13: num_of_nops
ActionVect FZeroSettings::getStartingActions(){
int num_of_nops(100);
ActionVect startingActions;
// startingActions.reserve(num_of_xs*num_of_nops);
// wait for intro to end
startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP);
// press start to begin
startingActions.push_back(JOYPAD_START);
// select arcade
startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select 1 vs cpu
startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select team
startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// select team
startingActions.insert(startingActions.end(), num_of_nops, JOYPAD_NOOP);
startingActions.push_back(JOYPAD_START);
// wait for race countdown
startingActions.insert(startingActions.end(), 5.5*num_of_nops, JOYPAD_NOOP);
return startingActions;
}
示例14: getStartingActions
ActionVect BeamRiderSettings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(PLAYER_A_RIGHT);
return startingActions;
}
示例15: getStartingActions
ActionVect PrivateEyeSettings::getStartingActions() {
ActionVect startingActions;
startingActions.push_back(PLAYER_A_UP);
return startingActions;
}