本文整理匯總了Python中utils.ReplayBuffer方法的典型用法代碼示例。如果您正苦於以下問題:Python utils.ReplayBuffer方法的具體用法?Python utils.ReplayBuffer怎麽用?Python utils.ReplayBuffer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類utils
的用法示例。
在下文中一共展示了utils.ReplayBuffer方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: import utils [as 別名]
# 或者: from utils import ReplayBuffer [as 別名]
def __init__(self, k_level, H, state_dim, action_dim, render, threshold,
action_bounds, action_offset, state_bounds, state_offset, lr):
# adding lowest level
self.HAC = [DDPG(state_dim, action_dim, action_bounds, action_offset, lr, H)]
self.replay_buffer = [ReplayBuffer()]
# adding remaining levels
for _ in range(k_level-1):
self.HAC.append(DDPG(state_dim, state_dim, state_bounds, state_offset, lr, H))
self.replay_buffer.append(ReplayBuffer())
# set some parameters
self.k_level = k_level
self.H = H
self.action_dim = action_dim
self.state_dim = state_dim
self.threshold = threshold
self.render = render
# logging parameters
self.goals = [None]*self.k_level
self.reward = 0
self.timestep = 0
示例2: train_BCQ
# 需要導入模塊: import utils [as 別名]
# 或者: from utils import ReplayBuffer [as 別名]
def train_BCQ(state_dim, action_dim, max_action, device, args):
# For saving files
setting = f"{args.env}_{args.seed}"
buffer_name = f"{args.buffer_name}_{setting}"
# Initialize policy
policy = BCQ.BCQ(state_dim, action_dim, max_action, device, args.discount, args.tau, args.lmbda, args.phi)
# Load buffer
replay_buffer = utils.ReplayBuffer(state_dim, action_dim, device)
replay_buffer.load(f"./buffers/{buffer_name}")
evaluations = []
episode_num = 0
done = True
training_iters = 0
while training_iters < args.max_timesteps:
pol_vals = policy.train(replay_buffer, iterations=int(args.eval_freq), batch_size=args.batch_size)
evaluations.append(eval_policy(policy, args.env, args.seed))
np.save(f"./results/BCQ_{setting}", evaluations)
training_iters += args.eval_freq
print(f"Training iterations: {training_iters}")
# Runs policy for X episodes and returns average reward
# A fixed seed is used for the eval environment