本文整理汇总了Python中replay_buffer.ReplayBuffer方法的典型用法代码示例。如果您正苦于以下问题:Python replay_buffer.ReplayBuffer方法的具体用法?Python replay_buffer.ReplayBuffer怎么用?Python replay_buffer.ReplayBuffer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类replay_buffer
的用法示例。
在下文中一共展示了replay_buffer.ReplayBuffer方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, sess, env, test_env, args):
self.sess = sess
self.args = args
self.env = env
self.test_env = test_env
self.ob_dim = env.observation_space.shape[0]
self.ac_dim = env.action_space.shape[0]
# Construct the networks and the experience replay buffer.
self.actor = Actor(sess, env, args)
self.critic = Critic(sess, env, args)
self.rbuffer = ReplayBuffer(args.replay_size, self.ob_dim, self.ac_dim)
# Initialize then run, also setting current=target to start.
self._debug_print()
self.sess.run(tf.global_variables_initializer())
self.actor.update_target_net(smooth=False)
self.critic.update_target_net(smooth=False)
示例2: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, env):
self.name = 'DDPG' # name for uploading results
self.environment = env
# Randomly initialize actor network and critic network
# with both their target networks
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.shape[0]
self.sess = tf.InteractiveSession()
self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
# initialize replay buffer
self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)
# Initialize a random process the Ornstein-Uhlenbeck process for action exploration
self.exploration_noise = OUNoise(self.action_dim)
示例3: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, env):
self.name = 'DDPG' # name for uploading results
self.environment = env
# Randomly initialize actor network and critic network
# with both their target networks
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.shape[0]
self.sess = tf.InteractiveSession()
self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
# initialize replay buffer
self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)
# Initialize a random process the Ornstein-Uhlenbeck process for action exploration
self.exploration_noise = OUNoise(self.action_dim)
示例4: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, computation_graph_args, sample_trajectory_args, estimate_return_args):
super(Agent, self).__init__()
self.ob_dim = computation_graph_args['ob_dim']
self.ac_dim = computation_graph_args['ac_dim']
self.task_dim = computation_graph_args['task_dim']
self.reward_dim = 1
self.terminal_dim = 1
self.meta_ob_dim = self.ob_dim + self.ac_dim + self.reward_dim + self.terminal_dim
self.scope = 'continuous_logits'
self.size = computation_graph_args['size']
self.gru_size = computation_graph_args['gru_size']
self.n_layers = computation_graph_args['n_layers']
self.learning_rate = computation_graph_args['learning_rate']
self.history = computation_graph_args['history']
self.num_value_iters = computation_graph_args['num_value_iters']
self.l2reg = computation_graph_args['l2reg']
self.recurrent = computation_graph_args['recurrent']
self.animate = sample_trajectory_args['animate']
self.max_path_length = sample_trajectory_args['max_path_length']
self.min_timesteps_per_batch = sample_trajectory_args['min_timesteps_per_batch']
self.grain_size = sample_trajectory_args['grain_size']
self.gamma = estimate_return_args['gamma']
self.nn_critic = estimate_return_args['nn_critic']
self.normalize_advantages = estimate_return_args['normalize_advantages']
self.replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
self.val_replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
示例5: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(
self,
trainer,
exploration_data_collector: MdpPathCollector,
remote_eval_data_collector: RemoteMdpPathCollector,
replay_buffer: ReplayBuffer,
batch_size,
max_path_length,
num_epochs,
num_eval_steps_per_epoch,
num_expl_steps_per_train_loop,
num_trains_per_train_loop,
num_train_loops_per_epoch=1,
min_num_steps_before_training=0,
optimistic_exp_hp=None,
):
super().__init__()
"""
The class state which should not mutate
"""
self.batch_size = batch_size
self.max_path_length = max_path_length
self.num_epochs = num_epochs
self.num_eval_steps_per_epoch = num_eval_steps_per_epoch
self.num_trains_per_train_loop = num_trains_per_train_loop
self.num_train_loops_per_epoch = num_train_loops_per_epoch
self.num_expl_steps_per_train_loop = num_expl_steps_per_train_loop
self.min_num_steps_before_training = min_num_steps_before_training
self.optimistic_exp_hp = optimistic_exp_hp
"""
The class mutable state
"""
self._start_epoch = 0
"""
This class sets up the main training loop, so it needs reference to other
high level objects in the algorithm
But these high level object maintains their own states
and has their own responsibilities in saving and restoring their state for checkpointing
"""
self.trainer = trainer
self.expl_data_collector = exploration_data_collector
self.remote_eval_data_collector = remote_eval_data_collector
self.replay_buffer = replay_buffer
示例6: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, session,
optimizer,
q_network,
state_dim,
num_actions,
batch_size=32,
init_exp=0.5, # initial exploration prob
final_exp=0.1, # final exploration prob
anneal_steps=10000, # N steps for annealing exploration
replay_buffer_size=10000,
store_replay_every=5, # how frequent to store experience
discount_factor=0.9, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
double_q_learning=False,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.q_network = q_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# Q learning parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.num_actions = num_actions
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.double_q_learning = double_q_learning
# training parameters
self.max_gradient = max_gradient
self.reg_param = reg_param
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
self.session.run(tf.initialize_variables(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
示例7: __init__
# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, session,
optimizer,
actor_network,
critic_network,
state_dim,
action_dim,
batch_size=32,
replay_buffer_size=10000, # size of replay buffer
store_replay_every=1, # how frequent to store experience
discount_factor=0.99, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
noise_sigma=0.20,
noise_theta=0.15,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.actor_network = actor_network
self.critic_network = critic_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# training parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.action_dim = action_dim
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.max_gradient = max_gradient
self.reg_param = reg_param
# Ornstein-Uhlenbeck noise for exploration
self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
self.session.run(tf.initialize_variables(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every