當前位置: 首頁>>代碼示例>>Python>>正文


Python replay_buffer.ReplayBuffer方法代碼示例

本文整理匯總了Python中replay_buffer.ReplayBuffer方法的典型用法代碼示例。如果您正苦於以下問題:Python replay_buffer.ReplayBuffer方法的具體用法?Python replay_buffer.ReplayBuffer怎麽用?Python replay_buffer.ReplayBuffer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在replay_buffer的用法示例。


在下文中一共展示了replay_buffer.ReplayBuffer方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, sess, env, test_env, args):
        self.sess = sess
        self.args = args
        self.env = env
        self.test_env = test_env
        self.ob_dim = env.observation_space.shape[0]
        self.ac_dim = env.action_space.shape[0]

        # Construct the networks and the experience replay buffer.
        self.actor   = Actor(sess, env, args)
        self.critic  = Critic(sess, env, args)
        self.rbuffer = ReplayBuffer(args.replay_size, self.ob_dim, self.ac_dim)

        # Initialize then run, also setting current=target to start.
        self._debug_print()
        self.sess.run(tf.global_variables_initializer())
        self.actor.update_target_net(smooth=False)
        self.critic.update_target_net(smooth=False) 
開發者ID:DanielTakeshi,項目名稱:rl_algorithms,代碼行數:20,代碼來源:ddpg.py

示例2: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, env):
        self.name = 'DDPG' # name for uploading results
        self.environment = env
        # Randomly initialize actor network and critic network
        # with both their target networks
        self.state_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        
        self.sess = tf.InteractiveSession()

        self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
        self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
        
        # initialize replay buffer
        self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)

        # Initialize a random process the Ornstein-Uhlenbeck process for action exploration
        self.exploration_noise = OUNoise(self.action_dim) 
開發者ID:jsikyoon,項目名稱:programmable-agents_tensorflow,代碼行數:20,代碼來源:ddpg.py

示例3: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, env):
        self.name = 'DDPG' # name for uploading results
        self.environment = env
        # Randomly initialize actor network and critic network
        # with both their target networks
        self.state_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]

        self.sess = tf.InteractiveSession()

        self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
        self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
        
        # initialize replay buffer
        self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)

        # Initialize a random process the Ornstein-Uhlenbeck process for action exploration
        self.exploration_noise = OUNoise(self.action_dim) 
開發者ID:floodsung,項目名稱:DDPG,代碼行數:20,代碼來源:ddpg.py

示例4: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, computation_graph_args, sample_trajectory_args, estimate_return_args):
        super(Agent, self).__init__()
        self.ob_dim = computation_graph_args['ob_dim']
        self.ac_dim = computation_graph_args['ac_dim']
        self.task_dim = computation_graph_args['task_dim']
        self.reward_dim = 1
        self.terminal_dim = 1

        self.meta_ob_dim = self.ob_dim + self.ac_dim + self.reward_dim + self.terminal_dim
        self.scope  = 'continuous_logits'
        self.size = computation_graph_args['size']
        self.gru_size = computation_graph_args['gru_size']
        self.n_layers = computation_graph_args['n_layers']
        self.learning_rate = computation_graph_args['learning_rate']
        self.history = computation_graph_args['history']
        self.num_value_iters = computation_graph_args['num_value_iters']
        self.l2reg = computation_graph_args['l2reg']
        self.recurrent = computation_graph_args['recurrent']

        self.animate = sample_trajectory_args['animate']
        self.max_path_length = sample_trajectory_args['max_path_length']
        self.min_timesteps_per_batch = sample_trajectory_args['min_timesteps_per_batch']
        self.grain_size = sample_trajectory_args['grain_size']

        self.gamma = estimate_return_args['gamma']
        self.nn_critic = estimate_return_args['nn_critic']
        self.normalize_advantages = estimate_return_args['normalize_advantages']

        self.replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
        self.val_replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim) 
開發者ID:xuwd11,項目名稱:cs294-112_hws,代碼行數:32,代碼來源:train_policy.py

示例5: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(
            self,
            trainer,
            exploration_data_collector: MdpPathCollector,
            remote_eval_data_collector: RemoteMdpPathCollector,
            replay_buffer: ReplayBuffer,
            batch_size,
            max_path_length,
            num_epochs,
            num_eval_steps_per_epoch,
            num_expl_steps_per_train_loop,
            num_trains_per_train_loop,
            num_train_loops_per_epoch=1,
            min_num_steps_before_training=0,
            optimistic_exp_hp=None,
    ):
        super().__init__()

        """
        The class state which should not mutate
        """
        self.batch_size = batch_size
        self.max_path_length = max_path_length
        self.num_epochs = num_epochs
        self.num_eval_steps_per_epoch = num_eval_steps_per_epoch
        self.num_trains_per_train_loop = num_trains_per_train_loop
        self.num_train_loops_per_epoch = num_train_loops_per_epoch
        self.num_expl_steps_per_train_loop = num_expl_steps_per_train_loop
        self.min_num_steps_before_training = min_num_steps_before_training
        self.optimistic_exp_hp = optimistic_exp_hp

        """
        The class mutable state
        """
        self._start_epoch = 0

        """
        This class sets up the main training loop, so it needs reference to other
        high level objects in the algorithm

        But these high level object maintains their own states
        and has their own responsibilities in saving and restoring their state for checkpointing
        """
        self.trainer = trainer

        self.expl_data_collector = exploration_data_collector
        self.remote_eval_data_collector = remote_eval_data_collector

        self.replay_buffer = replay_buffer 
開發者ID:microsoft,項目名稱:oac-explore,代碼行數:51,代碼來源:rl_algorithm.py

示例6: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, session,
                     optimizer,
                     q_network,
                     state_dim,
                     num_actions,
                     batch_size=32,
                     init_exp=0.5,       # initial exploration prob
                     final_exp=0.1,      # final exploration prob
                     anneal_steps=10000, # N steps for annealing exploration 
                     replay_buffer_size=10000,
                     store_replay_every=5, # how frequent to store experience
                     discount_factor=0.9, # discount future rewards
                     target_update_rate=0.01,
                     reg_param=0.01, # regularization constants
                     max_gradient=5, # max gradient norms
                     double_q_learning=False,
                     summary_writer=None,
                     summary_every=100):

    # tensorflow machinery
    self.session        = session
    self.optimizer      = optimizer
    self.summary_writer = summary_writer

    # model components
    self.q_network     = q_network
    self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)

    # Q learning parameters
    self.batch_size      = batch_size
    self.state_dim       = state_dim
    self.num_actions     = num_actions
    self.exploration     = init_exp
    self.init_exp        = init_exp
    self.final_exp       = final_exp
    self.anneal_steps    = anneal_steps
    self.discount_factor = discount_factor
    self.target_update_rate = target_update_rate
    self.double_q_learning = double_q_learning

    # training parameters
    self.max_gradient = max_gradient
    self.reg_param    = reg_param

    # counters
    self.store_replay_every   = store_replay_every
    self.store_experience_cnt = 0
    self.train_iteration      = 0

    # create and initialize variables
    self.create_variables()
    var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
    self.session.run(tf.initialize_variables(var_lists))

    # make sure all variables are initialized
    self.session.run(tf.assert_variables_initialized())

    if self.summary_writer is not None:
      # graph was not available when journalist was created
      self.summary_writer.add_graph(self.session.graph)
      self.summary_every = summary_every 
開發者ID:cardwing,項目名稱:Codes-for-RL-PER,代碼行數:63,代碼來源:neural_q_learner.py

示例7: __init__

# 需要導入模塊: import replay_buffer [as 別名]
# 或者: from replay_buffer import ReplayBuffer [as 別名]
def __init__(self, session,
                     optimizer,
                     actor_network,
                     critic_network,
                     state_dim,
                     action_dim,
                     batch_size=32,
                     replay_buffer_size=10000, # size of replay buffer
                     store_replay_every=1,       # how frequent to store experience
                     discount_factor=0.99,       # discount future rewards
                     target_update_rate=0.01,
                     reg_param=0.01,             # regularization constants
                     max_gradient=5,             # max gradient norms
                     noise_sigma=0.20,
                     noise_theta=0.15,
                     summary_writer=None,
                     summary_every=100):

    # tensorflow machinery
    self.session        = session
    self.optimizer      = optimizer
    self.summary_writer = summary_writer

    # model components
    self.actor_network  = actor_network
    self.critic_network = critic_network
    self.replay_buffer  = ReplayBuffer(buffer_size=replay_buffer_size)

    # training parameters
    self.batch_size         = batch_size
    self.state_dim          = state_dim
    self.action_dim         = action_dim
    self.discount_factor    = discount_factor
    self.target_update_rate = target_update_rate
    self.max_gradient       = max_gradient
    self.reg_param          = reg_param

    # Ornstein-Uhlenbeck noise for exploration
    self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
    noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
    self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)

    # counters
    self.store_replay_every   = store_replay_every
    self.store_experience_cnt = 0
    self.train_iteration      = 0

    # create and initialize variables
    self.create_variables()
    var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
    self.session.run(tf.initialize_variables(var_lists))

    # make sure all variables are initialized
    self.session.run(tf.assert_variables_initialized())

    if self.summary_writer is not None:
      # graph was not available when journalist was created
      self.summary_writer.add_graph(self.session.graph)
      self.summary_every = summary_every 
開發者ID:cardwing,項目名稱:Codes-for-RL-PER,代碼行數:61,代碼來源:pg_ddpg.py


注:本文中的replay_buffer.ReplayBuffer方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。