本文整理汇总了Python中rl.agents.DDPGAgent方法的典型用法代码示例。如果您正苦于以下问题:Python agents.DDPGAgent方法的具体用法?Python agents.DDPGAgent怎么用?Python agents.DDPGAgent使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rl.agents
的用法示例。
在下文中一共展示了agents.DDPGAgent方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_ddpg
# 需要导入模块: from rl import agents [as 别名]
# 或者: from rl.agents import DDPGAgent [as 别名]
def test_ddpg():
# TODO: replace this with a simpler environment where we can actually test if it finds a solution
env = gym.make('Pendulum-v0')
np.random.seed(123)
env.seed(123)
random.seed(123)
nb_actions = env.action_space.shape[0]
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('linear'))
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(16)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
memory = SequentialMemory(limit=1000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50,
random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)])
agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100)
h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
# TODO: evaluate history
示例2: __init__
# 需要导入模块: from rl import agents [as 别名]
# 或者: from rl.agents import DDPGAgent [as 别名]
def __init__(self, env, *args, **kwargs):
super(KerasDDPGAgent, self).__init__(*args, **kwargs)
self.env = env
#assert len(env.action_space.shape) == 1
#TODO: is there a way to output a tuple (6,1)
nb_actions = sum(sum(1 for i in row if i) for row in self.env.action_space.sample())
#TODO: terminology? feature or observation?
observation = env.reset()
print ">>>>>>>>>>>>>>>>>>>", observation.shape
# TODO: find a way to customize network
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + observation.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('tanh'))
actor.add(Lambda(lambda x: x * 3.14159))
print(actor.summary())
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + observation.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = merge([action_input, flattened_observation], mode='concat')
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())
memory = SequentialMemory(limit=500000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000,
random_process=random_process, gamma=.99, target_model_update=1e-3)
self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])