本文整理匯總了Python中rl.memory.SequentialMemory方法的典型用法代碼示例。如果您正苦於以下問題:Python memory.SequentialMemory方法的具體用法?Python memory.SequentialMemory怎麽用?Python memory.SequentialMemory使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rl.memory
的用法示例。
在下文中一共展示了memory.SequentialMemory方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_single_ddpg_input
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_single_ddpg_input():
nb_actions = 2
actor = Sequential()
actor.add(Flatten(input_shape=(2, 3)))
actor.add(Dense(nb_actions))
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(2, 3), name='observation_input')
x = Concatenate()([action_input, Flatten()(observation_input)])
x = Dense(1)(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
memory = SequentialMemory(limit=10, window_length=2)
agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory,
nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4)
agent.compile('sgd')
agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
示例2: test_single_continuous_dqn_input
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_single_continuous_dqn_input():
nb_actions = 2
V_model = Sequential()
V_model.add(Flatten(input_shape=(2, 3)))
V_model.add(Dense(1))
mu_model = Sequential()
mu_model.add(Flatten(input_shape=(2, 3)))
mu_model.add(Dense(nb_actions))
L_input = Input(shape=(2, 3))
L_input_action = Input(shape=(nb_actions,))
x = Concatenate()([Flatten()(L_input), L_input_action])
x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
L_model = Model(inputs=[L_input_action, L_input], outputs=x)
memory = SequentialMemory(limit=10, window_length=2)
agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
memory=memory, nb_steps_warmup=5, batch_size=4)
agent.compile('sgd')
agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
示例3: test_double_dqn
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_double_dqn():
env = TwoRoundDeterministicRewardEnv()
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.n
# Next, we build a very simple model.
model = Sequential()
model.add(Dense(16, input_shape=(1,)))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
memory = SequentialMemory(limit=1000, window_length=1)
policy = EpsGreedyQPolicy(eps=.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
target_model_update=1e-1, policy=policy, enable_double_dqn=True)
dqn.compile(Adam(lr=1e-3))
dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
policy.eps = 0.
h = dqn.test(env, nb_episodes=20, visualize=False)
assert_allclose(np.mean(h.history['episode_reward']), 3.)
示例4: test_duel_dqn
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_duel_dqn():
env = TwoRoundDeterministicRewardEnv()
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.n
# Next, we build a very simple model.
model = Sequential()
model.add(Dense(16, input_shape=(1,)))
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation='linear'))
memory = SequentialMemory(limit=1000, window_length=1)
policy = EpsGreedyQPolicy(eps=.1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True)
dqn.compile(Adam(lr=1e-3))
dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
policy.eps = 0.
h = dqn.test(env, nb_episodes=20, visualize=False)
assert_allclose(np.mean(h.history['episode_reward']), 3.)
示例5: initiate_agent
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def initiate_agent(self, nb_actions):
"""initiate a deep Q agent"""
self.model = Sequential()
self.model.add(Dense(512, activation='relu', input_shape=env.observation_space)) # pylint: disable=no-member
self.model.add(Dropout(0.2))
self.model.add(Dense(512, activation='relu'))
self.model.add(Dropout(0.2))
self.model.add(Dense(512, activation='relu'))
self.model.add(Dropout(0.2))
self.model.add(Dense(nb_actions, activation='linear'))
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=memory_limit, window_length=window_length) # pylint: disable=unused-variable
policy = TrumpPolicy() # pylint: disable=unused-variable
示例6: test_training_flag
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_training_flag():
obs_size = (3, 4)
obs0 = np.random.random(obs_size)
terminal0 = False
obs1 = np.random.random(obs_size)
terminal1 = True
obs2 = np.random.random(obs_size)
terminal2 = False
for training in (True, False):
memory = SequentialMemory(3, window_length=2)
state = np.array(memory.get_recent_state(obs0))
assert state.shape == (2,) + obs_size
assert np.allclose(state[0], 0.)
assert np.all(state[1] == obs0)
assert memory.nb_entries == 0
memory.append(obs0, 0, 0., terminal1, training=training)
state = np.array(memory.get_recent_state(obs1))
assert state.shape == (2,) + obs_size
assert np.all(state[0] == obs0)
assert np.all(state[1] == obs1)
if training:
assert memory.nb_entries == 1
else:
assert memory.nb_entries == 0
memory.append(obs1, 0, 0., terminal2, training=training)
state = np.array(memory.get_recent_state(obs2))
assert state.shape == (2,) + obs_size
assert np.allclose(state[0], 0.)
assert np.all(state[1] == obs2)
if training:
assert memory.nb_entries == 2
else:
assert memory.nb_entries == 0
示例7: test_multi_ddpg_input
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_multi_ddpg_input():
nb_actions = 2
actor_observation_input1 = Input(shape=(2, 3), name='actor_observation_input1')
actor_observation_input2 = Input(shape=(2, 4), name='actor_observation_input2')
actor = Sequential()
x = Concatenate()([actor_observation_input1, actor_observation_input2])
x = Flatten()(x)
x = Dense(nb_actions)(x)
actor = Model(inputs=[actor_observation_input1, actor_observation_input2], outputs=x)
action_input = Input(shape=(nb_actions,), name='action_input')
critic_observation_input1 = Input(shape=(2, 3), name='critic_observation_input1')
critic_observation_input2 = Input(shape=(2, 4), name='critic_observation_input2')
x = Concatenate()([critic_observation_input1, critic_observation_input2])
x = Concatenate()([action_input, Flatten()(x)])
x = Dense(1)(x)
critic = Model(inputs=[action_input, critic_observation_input1, critic_observation_input2], outputs=x)
processor = MultiInputProcessor(nb_inputs=2)
memory = SequentialMemory(limit=10, window_length=2)
agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory,
nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4,
processor=processor)
agent.compile('sgd')
agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
示例8: test_multi_dqn_input
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_multi_dqn_input():
input1 = Input(shape=(2, 3))
input2 = Input(shape=(2, 4))
x = Concatenate()([input1, input2])
x = Flatten()(x)
x = Dense(2)(x)
model = Model(inputs=[input1, input2], outputs=x)
memory = SequentialMemory(limit=10, window_length=2)
processor = MultiInputProcessor(nb_inputs=2)
for double_dqn in (True, False):
agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
processor=processor, enable_double_dqn=double_dqn)
agent.compile('sgd')
agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
示例9: test_multi_continuous_dqn_input
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_multi_continuous_dqn_input():
nb_actions = 2
V_input1 = Input(shape=(2, 3))
V_input2 = Input(shape=(2, 4))
x = Concatenate()([V_input1, V_input2])
x = Flatten()(x)
x = Dense(1)(x)
V_model = Model(inputs=[V_input1, V_input2], outputs=x)
mu_input1 = Input(shape=(2, 3))
mu_input2 = Input(shape=(2, 4))
x = Concatenate()([mu_input1, mu_input2])
x = Flatten()(x)
x = Dense(nb_actions)(x)
mu_model = Model(inputs=[mu_input1, mu_input2], outputs=x)
L_input1 = Input(shape=(2, 3))
L_input2 = Input(shape=(2, 4))
L_input_action = Input(shape=(nb_actions,))
x = Concatenate()([L_input1, L_input2])
x = Concatenate()([Flatten()(x), L_input_action])
x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
L_model = Model(inputs=[L_input_action, L_input1, L_input2], outputs=x)
memory = SequentialMemory(limit=10, window_length=2)
processor = MultiInputProcessor(nb_inputs=2)
agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
memory=memory, nb_steps_warmup=5, batch_size=4, processor=processor)
agent.compile('sgd')
agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
示例10: test_cdqn
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_cdqn():
# TODO: replace this with a simpler environment where we can actually test if it finds a solution
env = gym.make('Pendulum-v0')
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.shape[0]
V_model = Sequential()
V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
V_model.add(Dense(16))
V_model.add(Activation('relu'))
V_model.add(Dense(1))
mu_model = Sequential()
mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
mu_model.add(Dense(16))
mu_model.add(Activation('relu'))
mu_model.add(Dense(nb_actions))
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
x = Concatenate()([action_input, Flatten()(observation_input)])
x = Dense(16)(x)
x = Activation('relu')(x)
x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
L_model = Model(inputs=[action_input, observation_input], outputs=x)
memory = SequentialMemory(limit=1000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
memory=memory, nb_steps_warmup=50, random_process=random_process,
gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=1e-3))
agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
# TODO: evaluate history
示例11: test_ddpg
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def test_ddpg():
# TODO: replace this with a simpler environment where we can actually test if it finds a solution
env = gym.make('Pendulum-v0')
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.shape[0]
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('linear'))
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(16)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
memory = SequentialMemory(limit=1000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50,
random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])
agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
# TODO: evaluate history
示例12: initiate_agent
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def initiate_agent(self, env):
"""initiate a deep Q agent"""
tf.compat.v1.disable_eager_execution()
self.env = env
nb_actions = self.env.action_space.n
self.model = Sequential()
self.model.add(Dense(512, activation='relu', input_shape=env.observation_space))
self.model.add(Dropout(0.2))
self.model.add(Dense(512, activation='relu'))
self.model.add(Dropout(0.2))
self.model.add(Dense(512, activation='relu'))
self.model.add(Dropout(0.2))
self.model.add(Dense(nb_actions, activation='linear'))
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=memory_limit, window_length=window_length)
policy = TrumpPolicy()
nb_actions = env.action_space.n
self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
target_model_update=1e-2, policy=policy,
processor=CustomProcessor(),
batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
示例13: play
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def play(self, nb_episodes=5, render=False):
"""Let the agent play"""
memory = SequentialMemory(limit=memory_limit, window_length=window_length)
policy = TrumpPolicy()
class CustomProcessor(Processor): # pylint: disable=redefined-outer-name
"""The agent and the environment"""
def process_state_batch(self, batch):
"""
Given a state batch, I want to remove the second dimension, because it's
useless and prevents me from feeding the tensor into my CNN
"""
return np.squeeze(batch, axis=1)
def process_info(self, info):
processed_info = info['player_data']
if 'stack' in processed_info:
processed_info = {'x': 1}
return processed_info
nb_actions = self.env.action_space.n
self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
target_model_update=1e-2, policy=policy,
processor=CustomProcessor(),
batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae']) # pylint: disable=no-member
self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
示例14: main
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def main():
ENV_NAME = 'LunarLander-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
state_space = env.observation_space.shape[0]
print(num_actions)
model = build_model(state_space, num_actions)
memory = SequentialMemory(limit=50000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=10000)
dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=0.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env, nb_steps=500000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)
示例15: main
# 需要導入模塊: from rl import memory [as 別名]
# 或者: from rl.memory import SequentialMemory [as 別名]
def main():
ENV_NAME = 'BreakoutDeterministic-v4'
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(42)
env.seed(42)
num_actions = env.action_space.n
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = build_model(INPUT_SHAPE, num_actions)
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])
callbacks = build_callbacks(ENV_NAME)
dqn.fit(env,
nb_steps=1750000,
log_interval=10000,
visualize=False,
verbose=2,
callbacks=callbacks)
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=10, visualize=True)