本文整理匯總了Python中rl.agents.dqn.DQNAgent方法的典型用法代碼示例。如果您正苦於以下問題:Python dqn.DQNAgent方法的具體用法?Python dqn.DQNAgent怎麽用?Python dqn.DQNAgent使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rl.agents.dqn
的用法示例。
在下文中一共展示了dqn.DQNAgent方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_single_dqn_input
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def test_single_dqn_input():
model = Sequential()
model.add(Flatten(input_shape=(2, 3)))
model.add(Dense(2))
memory = SequentialMemory(limit=10, window_length=2)
for double_dqn in (True, False):
agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
enable_double_dqn=double_dqn)
agent.compile('sgd')
agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
示例2: test_multi_dqn_input
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def test_multi_dqn_input():
input1 = Input(shape=(2, 3))
input2 = Input(shape=(2, 4))
x = Concatenate()([input1, input2])
x = Flatten()(x)
x = Dense(2)(x)
model = Model(inputs=[input1, input2], outputs=x)
memory = SequentialMemory(limit=10, window_length=2)
processor = MultiInputProcessor(nb_inputs=2)
for double_dqn in (True, False):
agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
processor=processor, enable_double_dqn=double_dqn)
agent.compile('sgd')
agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
示例3: main
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def main():
ENV_NAME = 'LunarLander-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=0.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env, nb_steps=500000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
示例4: main
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def main():
ENV_NAME = 'BreakoutDeterministic-v4'
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = build_model(INPUT_SHAPE, num_actions)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env,
nb_steps=1750000,
log_interval=10000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例5: main
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def main():
ENV_NAME = 'CartPole-v0'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env, nb_steps=50000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
示例6: main
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def main():
ENV_NAME = 'LunarLander-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
# After training is done, we save the final weights.
dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f')
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例7: main
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def main():
ENV_NAME = 'BreakoutDeterministic-v4'
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
model = build_model(INPUT_SHAPE, num_actions)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
# After training is done, we save the final weights.
dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f')
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)
示例8: train_dqn_model
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False):
ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
env = gym.make(ENV_NAME)
env.seed(123)
nb_actions = env.action_space.n
window_length = 1 # "experience" consists of where we were, where we are now
# generate a policy model
model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions)
# configure and compile our agent
# BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
policy = BoltzmannQPolicy()
# memory can help a model during training
# for this, we only consider a single malware sample (window_length=1) for each "experience"
memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length)
# DQN agent as described in Mnih (2013) and Mnih (2015).
# http://arxiv.org/pdf/1312.5602.pdf
# http://arxiv.org/abs/1509.06461
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
target_model_update=1e-2, policy=policy, batch_size=16)
# keras-rl allows one to use and built-in keras optimizer
agent.compile(RMSprop(lr=1e-3), metrics=['mae'])
# play the game. learn something!
agent.fit(env, nb_steps=rounds, visualize=False, verbose=2)
history_train = env.history
history_test = None
if run_test:
# Set up the testing environment
TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
test_env = gym.make(TEST_NAME)
# evaluate the agent on a few episodes, drawing randomly from the test samples
agent.test(test_env, nb_episodes=100, visualize=False)
history_test = test_env.history
return agent, model, history_train, history_test
示例9: training_game
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def training_game():
env = Environment()
input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1)
nb_actions = 12 # Number of actions
model = neural_network_model(input_shape, nb_actions)
memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)
processor = SC2Proc()
# Policy
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6)
# Agent
dqn = DQNAgent(model=model,
nb_actions=nb_actions,
memory=memory,
enable_double_dqn=False,
nb_steps_warmup=500,
# nb_steps_warmup=1,
target_model_update=1e-2,
policy=policy,
batch_size=150,
processor=processor)
dqn.compile(Adam(lr=.001), metrics=["mae"])
# Tensorboard callback
callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0,
write_graph=True, write_images=False)
# Save the parameters and upload them when needed
name = FLAGS.mini_game
w_file = "dqn_{}_weights.h5f".format(name)
check_w_file = "train_w" + name + "_weights.h5f"
if SAVE_MODEL:
check_w_file = "train_w" + name + "_weights_{step}.h5f"
log_file = "training_w_{}_log.json".format(name)
if LOAD_MODEL:
dqn.load_weights(w_file)
dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2,
log_interval=1e4, verbose=2)
dqn.save_weights(w_file, overwrite=True)
dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
示例10: training_game
# 需要導入模塊: from rl.agents import dqn [as 別名]
# 或者: from rl.agents.dqn import DQNAgent [as 別名]
def training_game():
env = Environment(map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat(
feature_dimensions=features.Dimensions(screen=64, minimap=32)
))
input_shape = (_SIZE, _SIZE, 1)
nb_actions = _SIZE * _SIZE # Should this be an integer
model = neural_network_model(input_shape, nb_actions)
# memory : how many subsequent observations should be provided to the network?
memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)
processor = SC2Proc()
### Policy
# Agent´s behaviour function. How the agent pick actions
# LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ?
# EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards
# BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy??
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0,
nb_steps=1e6)
# policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500
### Agent
# Double Q-learning ( combines Q-Learning with a deep Neural Network )
# Q Learning -- Bellman equation
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory,
nb_steps_warmup=500, target_model_update=1e-2, policy=policy,
batch_size=150, processor=processor)
dqn.compile(Adam(lr=.001), metrics=["mae"])
## Save the parameters and upload them when needed
name = "HallucinIce"
w_file = "dqn_{}_weights.h5f".format(name)
check_w_file = "train_w" + name + "_weights.h5f"
if SAVE_MODEL:
check_w_file = "train_w" + name + "_weights_{step}.h5f"
log_file = "training_w_{}_log.json".format(name)
callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)]
callbacks += [FileLogger(log_file, interval=100)]
if LOAD_MODEL:
dqn.load_weights(w_file)
dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2,
log_interval=1e4, verbose=2)
dqn.save_weights(w_file, overwrite=True)
dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)