本文整理汇总了Python中utils.ReplayBuffer方法的典型用法代码示例。如果您正苦于以下问题:Python utils.ReplayBuffer方法的具体用法?Python utils.ReplayBuffer怎么用?Python utils.ReplayBuffer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils
的用法示例。
在下文中一共展示了utils.ReplayBuffer方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import utils [as 别名]
# 或者: from utils import ReplayBuffer [as 别名]
def __init__(self, k_level, H, state_dim, action_dim, render, threshold,
action_bounds, action_offset, state_bounds, state_offset, lr):
# adding lowest level
self.HAC = [DDPG(state_dim, action_dim, action_bounds, action_offset, lr, H)]
self.replay_buffer = [ReplayBuffer()]
# adding remaining levels
for _ in range(k_level-1):
self.HAC.append(DDPG(state_dim, state_dim, state_bounds, state_offset, lr, H))
self.replay_buffer.append(ReplayBuffer())
# set some parameters
self.k_level = k_level
self.H = H
self.action_dim = action_dim
self.state_dim = state_dim
self.threshold = threshold
self.render = render
# logging parameters
self.goals = [None]*self.k_level
self.reward = 0
self.timestep = 0
示例2: train_BCQ
# 需要导入模块: import utils [as 别名]
# 或者: from utils import ReplayBuffer [as 别名]
def train_BCQ(state_dim, action_dim, max_action, device, args):
# For saving files
setting = f"{args.env}_{args.seed}"
buffer_name = f"{args.buffer_name}_{setting}"
# Initialize policy
policy = BCQ.BCQ(state_dim, action_dim, max_action, device, args.discount, args.tau, args.lmbda, args.phi)
# Load buffer
replay_buffer = utils.ReplayBuffer(state_dim, action_dim, device)
replay_buffer.load(f"./buffers/{buffer_name}")
evaluations = []
episode_num = 0
done = True
training_iters = 0
while training_iters < args.max_timesteps:
pol_vals = policy.train(replay_buffer, iterations=int(args.eval_freq), batch_size=args.batch_size)
evaluations.append(eval_policy(policy, args.env, args.seed))
np.save(f"./results/BCQ_{setting}", evaluations)
training_iters += args.eval_freq
print(f"Training iterations: {training_iters}")
# Runs policy for X episodes and returns average reward
# A fixed seed is used for the eval environment