本文整理匯總了Python中rl.policy.LinearAnnealedPolicy方法的典型用法代碼示例。如果您正苦於以下問題:Python policy.LinearAnnealedPolicy方法的具體用法?Python policy.LinearAnnealedPolicy怎麽用?Python policy.LinearAnnealedPolicy使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rl.policy
的用法示例。
在下文中一共展示了policy.LinearAnnealedPolicy方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: main
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def main():
ENV_NAME = 'LunarLander-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=0.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env, nb_steps=500000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
示例2: main
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def main():
ENV_NAME = 'BreakoutDeterministic-v4'
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = build_model(INPUT_SHAPE, num_actions)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env,
nb_steps=1750000,
log_interval=10000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例3: main
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def main():
ENV_NAME = 'CartPole-v0'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env, nb_steps=50000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
示例4: main
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def main():
ENV_NAME = 'LunarLander-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
# After training is done, we save the final weights.
dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f')
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例5: main
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def main():
ENV_NAME = 'BreakoutDeterministic-v4'
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
model = build_model(INPUT_SHAPE, num_actions)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
# After training is done, we save the final weights.
dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f')
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例6: training_game
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def training_game():
env = Environment()
input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1)
nb_actions = 12 # Number of actions
model = neural_network_model(input_shape, nb_actions)
memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)
processor = SC2Proc()
# Policy
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6)
# Agent
dqn = DQNAgent(model=model,
nb_actions=nb_actions,
memory=memory,
enable_double_dqn=False,
nb_steps_warmup=500,
# nb_steps_warmup=1,
target_model_update=1e-2,
policy=policy,
batch_size=150,
processor=processor)
dqn.compile(Adam(lr=.001), metrics=["mae"])
# Tensorboard callback
callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0,
write_graph=True, write_images=False)
# Save the parameters and upload them when needed
name = FLAGS.mini_game
w_file = "dqn_{}_weights.h5f".format(name)
check_w_file = "train_w" + name + "_weights.h5f"
if SAVE_MODEL:
check_w_file = "train_w" + name + "_weights_{step}.h5f"
log_file = "training_w_{}_log.json".format(name)
if LOAD_MODEL:
dqn.load_weights(w_file)
dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2,
log_interval=1e4, verbose=2)
dqn.save_weights(w_file, overwrite=True)
dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
示例7: training_game
# 需要導入模塊: from rl import policy [as 別名]
# 或者: from rl.policy import LinearAnnealedPolicy [as 別名]
def training_game():
env = Environment(map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat(
feature_dimensions=features.Dimensions(screen=64, minimap=32)
))
input_shape = (_SIZE, _SIZE, 1)
nb_actions = _SIZE * _SIZE # Should this be an integer
model = neural_network_model(input_shape, nb_actions)
# memory : how many subsequent observations should be provided to the network?
memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)
processor = SC2Proc()
### Policy
# Agent´s behaviour function. How the agent pick actions
# LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ?
# EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards
# BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy??
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0,
nb_steps=1e6)
# policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500
### Agent
# Double Q-learning ( combines Q-Learning with a deep Neural Network )
# Q Learning -- Bellman equation
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory,
nb_steps_warmup=500, target_model_update=1e-2, policy=policy,
batch_size=150, processor=processor)
dqn.compile(Adam(lr=.001), metrics=["mae"])
## Save the parameters and upload them when needed
name = "HallucinIce"
w_file = "dqn_{}_weights.h5f".format(name)
check_w_file = "train_w" + name + "_weights.h5f"
if SAVE_MODEL:
check_w_file = "train_w" + name + "_weights_{step}.h5f"
log_file = "training_w_{}_log.json".format(name)
callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)]
callbacks += [FileLogger(log_file, interval=100)]
if LOAD_MODEL:
dqn.load_weights(w_file)
dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2,
log_interval=1e4, verbose=2)
dqn.save_weights(w_file, overwrite=True)
dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)