本文整理汇总了Python中buffer.Buffer.sample方法的典型用法代码示例。如果您正苦于以下问题:Python Buffer.sample方法的具体用法?Python Buffer.sample怎么用?Python Buffer.sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类buffer.Buffer
的用法示例。
在下文中一共展示了Buffer.sample方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Q
# 需要导入模块: from buffer import Buffer [as 别名]
# 或者: from buffer.Buffer import sample [as 别名]
#print "mu:", u, "action:", action
#print "Q(mu):", Q(x, np.array([u])), "Q(action):", Q(x, np.array([action]))
# take the action and record reward
observation, reward, done, info = env.step(action)
episode_reward += reward
#print "reward:", reward
#print "poststate:", observation
# add experience to replay memory
R.add(x[0], action, reward, observation, done)
loss = 0
# perform train_repeat Q-updates
for k in range(args.train_repeat):
preobs, actions, rewards, postobs, terminals = R.sample(args.batch_size)
# Q-update
v = V(postobs)
y = rewards + args.gamma * np.squeeze(v)
loss += model.train_on_batch([preobs, actions], y)
# copy weights to target model, averaged by tau
weights = model.get_weights()
target_weights = target_model.get_weights()
for i in range(len(weights)):
target_weights[i] = args.tau * weights[i] + (1 - args.tau) * target_weights[i]
target_model.set_weights(target_weights)
#print "average loss:", loss/k
if done:
示例2: xrange
# 需要导入模块: from buffer import Buffer [as 别名]
# 或者: from buffer.Buffer import sample [as 别名]
action = env.action_space.sample()
else:
s = np.array([observation])
q = model.predict_on_batch(s)
#print "q:", q
action = np.argmax(q[0])
#print "action:", action
prev_observation = observation
observation, reward, done, info = env.step(action)
episode_reward += reward
#print "reward:", reward
mem.add(prev_observation, np.array([action]), reward, observation, done)
for k in xrange(args.train_repeat):
prestates, actions, rewards, poststates, terminals = mem.sample(args.batch_size)
qpre = model.predict_on_batch(prestates)
qpost = target_model.predict_on_batch(poststates)
for i in xrange(qpre.shape[0]):
if terminals[i]:
qpre[i, actions[i]] = rewards[i]
else:
qpre[i, actions[i]] = rewards[i] + args.gamma * np.amax(qpost[i])
model.train_on_batch(prestates, qpre)
weights = model.get_weights()
target_weights = target_model.get_weights()
for i in xrange(len(weights)):
target_weights[i] = args.tau * weights[i] + (1 - args.tau) * target_weights[i]
target_model.set_weights(target_weights)