本文整理匯總了Python中replay_buffer.ReplayBuffer方法的典型用法代碼示例。如果您正苦於以下問題:Python replay_buffer.ReplayBuffer方法的具體用法?Python replay_buffer.ReplayBuffer怎麽用?Python replay_buffer.ReplayBuffer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類replay_buffer
的用法示例。
在下文中一共展示了replay_buffer.ReplayBuffer方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, sess, env, test_env, args):
self.sess = sess
self.args = args
self.env = env
self.test_env = test_env
self.ob_dim = env.observation_space.shape[0]
self.ac_dim = env.action_space.shape[0]
# Construct the networks and the experience replay buffer.
self.actor = Actor(sess, env, args)
self.critic = Critic(sess, env, args)
self.rbuffer = ReplayBuffer(args.replay_size, self.ob_dim, self.ac_dim)
# Initialize then run, also setting current=target to start.
self._debug_print()
self.sess.run(tf.global_variables_initializer())
self.actor.update_target_net(smooth=False)
self.critic.update_target_net(smooth=False)
示例2: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, env):
self.name = 'DDPG' # name for uploading results
self.environment = env
# Randomly initialize actor network and critic network
# with both their target networks
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.shape[0]
self.sess = tf.InteractiveSession()
self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
# initialize replay buffer
self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)
# Initialize a random process the Ornstein-Uhlenbeck process for action exploration
self.exploration_noise = OUNoise(self.action_dim)
示例3: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, env):
self.name = 'DDPG' # name for uploading results
self.environment = env
# Randomly initialize actor network and critic network
# with both their target networks
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.shape[0]
self.sess = tf.InteractiveSession()
self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
# initialize replay buffer
self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)
# Initialize a random process the Ornstein-Uhlenbeck process for action exploration
self.exploration_noise = OUNoise(self.action_dim)
示例4: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, computation_graph_args, sample_trajectory_args, estimate_return_args):
super(Agent, self).__init__()
self.ob_dim = computation_graph_args['ob_dim']
self.ac_dim = computation_graph_args['ac_dim']
self.task_dim = computation_graph_args['task_dim']
self.reward_dim = 1
self.terminal_dim = 1
self.meta_ob_dim = self.ob_dim + self.ac_dim + self.reward_dim + self.terminal_dim
self.scope = 'continuous_logits'
self.size = computation_graph_args['size']
self.gru_size = computation_graph_args['gru_size']
self.n_layers = computation_graph_args['n_layers']
self.learning_rate = computation_graph_args['learning_rate']
self.history = computation_graph_args['history']
self.num_value_iters = computation_graph_args['num_value_iters']
self.l2reg = computation_graph_args['l2reg']
self.recurrent = computation_graph_args['recurrent']
self.animate = sample_trajectory_args['animate']
self.max_path_length = sample_trajectory_args['max_path_length']
self.min_timesteps_per_batch = sample_trajectory_args['min_timesteps_per_batch']
self.grain_size = sample_trajectory_args['grain_size']
self.gamma = estimate_return_args['gamma']
self.nn_critic = estimate_return_args['nn_critic']
self.normalize_advantages = estimate_return_args['normalize_advantages']
self.replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
self.val_replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
示例5: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(
self,
trainer,
exploration_data_collector: MdpPathCollector,
remote_eval_data_collector: RemoteMdpPathCollector,
replay_buffer: ReplayBuffer,
batch_size,
max_path_length,
num_epochs,
num_eval_steps_per_epoch,
num_expl_steps_per_train_loop,
num_trains_per_train_loop,
num_train_loops_per_epoch=1,
min_num_steps_before_training=0,
optimistic_exp_hp=None,
):
super().__init__()
"""
The class state which should not mutate
"""
self.batch_size = batch_size
self.max_path_length = max_path_length
self.num_epochs = num_epochs
self.num_eval_steps_per_epoch = num_eval_steps_per_epoch
self.num_trains_per_train_loop = num_trains_per_train_loop
self.num_train_loops_per_epoch = num_train_loops_per_epoch
self.num_expl_steps_per_train_loop = num_expl_steps_per_train_loop
self.min_num_steps_before_training = min_num_steps_before_training
self.optimistic_exp_hp = optimistic_exp_hp
"""
The class mutable state
"""
self._start_epoch = 0
"""
This class sets up the main training loop, so it needs reference to other
high level objects in the algorithm
But these high level object maintains their own states
and has their own responsibilities in saving and restoring their state for checkpointing
"""
self.trainer = trainer
self.expl_data_collector = exploration_data_collector
self.remote_eval_data_collector = remote_eval_data_collector
self.replay_buffer = replay_buffer
示例6: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, session,
optimizer,
q_network,
state_dim,
num_actions,
batch_size=32,
init_exp=0.5, # initial exploration prob
final_exp=0.1, # final exploration prob
anneal_steps=10000, # N steps for annealing exploration
replay_buffer_size=10000,
store_replay_every=5, # how frequent to store experience
discount_factor=0.9, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
double_q_learning=False,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.q_network = q_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# Q learning parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.num_actions = num_actions
self.exploration = init_exp
self.init_exp = init_exp
self.final_exp = final_exp
self.anneal_steps = anneal_steps
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.double_q_learning = double_q_learning
# training parameters
self.max_gradient = max_gradient
self.reg_param = reg_param
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
self.session.run(tf.initialize_variables(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every
示例7: __init__
# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, session,
optimizer,
actor_network,
critic_network,
state_dim,
action_dim,
batch_size=32,
replay_buffer_size=10000, # size of replay buffer
store_replay_every=1, # how frequent to store experience
discount_factor=0.99, # discount future rewards
target_update_rate=0.01,
reg_param=0.01, # regularization constants
max_gradient=5, # max gradient norms
noise_sigma=0.20,
noise_theta=0.15,
summary_writer=None,
summary_every=100):
# tensorflow machinery
self.session = session
self.optimizer = optimizer
self.summary_writer = summary_writer
# model components
self.actor_network = actor_network
self.critic_network = critic_network
self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)
# training parameters
self.batch_size = batch_size
self.state_dim = state_dim
self.action_dim = action_dim
self.discount_factor = discount_factor
self.target_update_rate = target_update_rate
self.max_gradient = max_gradient
self.reg_param = reg_param
# Ornstein-Uhlenbeck noise for exploration
self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)
# counters
self.store_replay_every = store_replay_every
self.store_experience_cnt = 0
self.train_iteration = 0
# create and initialize variables
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
self.session.run(tf.initialize_variables(var_lists))
# make sure all variables are initialized
self.session.run(tf.assert_variables_initialized())
if self.summary_writer is not None:
# graph was not available when journalist was created
self.summary_writer.add_graph(self.session.graph)
self.summary_every = summary_every