当前位置: 首页>>代码示例>>Python>>正文


Python replay_buffer.ReplayBuffer方法代码示例

本文整理汇总了Python中replay_buffer.ReplayBuffer方法的典型用法代码示例。如果您正苦于以下问题:Python replay_buffer.ReplayBuffer方法的具体用法?Python replay_buffer.ReplayBuffer怎么用?Python replay_buffer.ReplayBuffer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在replay_buffer的用法示例。


在下文中一共展示了replay_buffer.ReplayBuffer方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, sess, env, test_env, args):
        self.sess = sess
        self.args = args
        self.env = env
        self.test_env = test_env
        self.ob_dim = env.observation_space.shape[0]
        self.ac_dim = env.action_space.shape[0]

        # Construct the networks and the experience replay buffer.
        self.actor   = Actor(sess, env, args)
        self.critic  = Critic(sess, env, args)
        self.rbuffer = ReplayBuffer(args.replay_size, self.ob_dim, self.ac_dim)

        # Initialize then run, also setting current=target to start.
        self._debug_print()
        self.sess.run(tf.global_variables_initializer())
        self.actor.update_target_net(smooth=False)
        self.critic.update_target_net(smooth=False) 
开发者ID:DanielTakeshi,项目名称:rl_algorithms,代码行数:20,代码来源:ddpg.py

示例2: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, env):
        self.name = 'DDPG' # name for uploading results
        self.environment = env
        # Randomly initialize actor network and critic network
        # with both their target networks
        self.state_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        
        self.sess = tf.InteractiveSession()

        self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
        self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
        
        # initialize replay buffer
        self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)

        # Initialize a random process the Ornstein-Uhlenbeck process for action exploration
        self.exploration_noise = OUNoise(self.action_dim) 
开发者ID:jsikyoon,项目名称:programmable-agents_tensorflow,代码行数:20,代码来源:ddpg.py

示例3: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, env):
        self.name = 'DDPG' # name for uploading results
        self.environment = env
        # Randomly initialize actor network and critic network
        # with both their target networks
        self.state_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]

        self.sess = tf.InteractiveSession()

        self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim)
        self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim)
        
        # initialize replay buffer
        self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)

        # Initialize a random process the Ornstein-Uhlenbeck process for action exploration
        self.exploration_noise = OUNoise(self.action_dim) 
开发者ID:floodsung,项目名称:DDPG,代码行数:20,代码来源:ddpg.py

示例4: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, computation_graph_args, sample_trajectory_args, estimate_return_args):
        super(Agent, self).__init__()
        self.ob_dim = computation_graph_args['ob_dim']
        self.ac_dim = computation_graph_args['ac_dim']
        self.task_dim = computation_graph_args['task_dim']
        self.reward_dim = 1
        self.terminal_dim = 1

        self.meta_ob_dim = self.ob_dim + self.ac_dim + self.reward_dim + self.terminal_dim
        self.scope  = 'continuous_logits'
        self.size = computation_graph_args['size']
        self.gru_size = computation_graph_args['gru_size']
        self.n_layers = computation_graph_args['n_layers']
        self.learning_rate = computation_graph_args['learning_rate']
        self.history = computation_graph_args['history']
        self.num_value_iters = computation_graph_args['num_value_iters']
        self.l2reg = computation_graph_args['l2reg']
        self.recurrent = computation_graph_args['recurrent']

        self.animate = sample_trajectory_args['animate']
        self.max_path_length = sample_trajectory_args['max_path_length']
        self.min_timesteps_per_batch = sample_trajectory_args['min_timesteps_per_batch']
        self.grain_size = sample_trajectory_args['grain_size']

        self.gamma = estimate_return_args['gamma']
        self.nn_critic = estimate_return_args['nn_critic']
        self.normalize_advantages = estimate_return_args['normalize_advantages']

        self.replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim)
        self.val_replay_buffer = ReplayBuffer(100000, [self.history, self.meta_ob_dim], [self.ac_dim], self.gru_size, self.task_dim) 
开发者ID:xuwd11,项目名称:cs294-112_hws,代码行数:32,代码来源:train_policy.py

示例5: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(
            self,
            trainer,
            exploration_data_collector: MdpPathCollector,
            remote_eval_data_collector: RemoteMdpPathCollector,
            replay_buffer: ReplayBuffer,
            batch_size,
            max_path_length,
            num_epochs,
            num_eval_steps_per_epoch,
            num_expl_steps_per_train_loop,
            num_trains_per_train_loop,
            num_train_loops_per_epoch=1,
            min_num_steps_before_training=0,
            optimistic_exp_hp=None,
    ):
        super().__init__()

        """
        The class state which should not mutate
        """
        self.batch_size = batch_size
        self.max_path_length = max_path_length
        self.num_epochs = num_epochs
        self.num_eval_steps_per_epoch = num_eval_steps_per_epoch
        self.num_trains_per_train_loop = num_trains_per_train_loop
        self.num_train_loops_per_epoch = num_train_loops_per_epoch
        self.num_expl_steps_per_train_loop = num_expl_steps_per_train_loop
        self.min_num_steps_before_training = min_num_steps_before_training
        self.optimistic_exp_hp = optimistic_exp_hp

        """
        The class mutable state
        """
        self._start_epoch = 0

        """
        This class sets up the main training loop, so it needs reference to other
        high level objects in the algorithm

        But these high level object maintains their own states
        and has their own responsibilities in saving and restoring their state for checkpointing
        """
        self.trainer = trainer

        self.expl_data_collector = exploration_data_collector
        self.remote_eval_data_collector = remote_eval_data_collector

        self.replay_buffer = replay_buffer 
开发者ID:microsoft,项目名称:oac-explore,代码行数:51,代码来源:rl_algorithm.py

示例6: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, session,
                     optimizer,
                     q_network,
                     state_dim,
                     num_actions,
                     batch_size=32,
                     init_exp=0.5,       # initial exploration prob
                     final_exp=0.1,      # final exploration prob
                     anneal_steps=10000, # N steps for annealing exploration 
                     replay_buffer_size=10000,
                     store_replay_every=5, # how frequent to store experience
                     discount_factor=0.9, # discount future rewards
                     target_update_rate=0.01,
                     reg_param=0.01, # regularization constants
                     max_gradient=5, # max gradient norms
                     double_q_learning=False,
                     summary_writer=None,
                     summary_every=100):

    # tensorflow machinery
    self.session        = session
    self.optimizer      = optimizer
    self.summary_writer = summary_writer

    # model components
    self.q_network     = q_network
    self.replay_buffer = ReplayBuffer(buffer_size=replay_buffer_size)

    # Q learning parameters
    self.batch_size      = batch_size
    self.state_dim       = state_dim
    self.num_actions     = num_actions
    self.exploration     = init_exp
    self.init_exp        = init_exp
    self.final_exp       = final_exp
    self.anneal_steps    = anneal_steps
    self.discount_factor = discount_factor
    self.target_update_rate = target_update_rate
    self.double_q_learning = double_q_learning

    # training parameters
    self.max_gradient = max_gradient
    self.reg_param    = reg_param

    # counters
    self.store_replay_every   = store_replay_every
    self.store_experience_cnt = 0
    self.train_iteration      = 0

    # create and initialize variables
    self.create_variables()
    var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
    self.session.run(tf.initialize_variables(var_lists))

    # make sure all variables are initialized
    self.session.run(tf.assert_variables_initialized())

    if self.summary_writer is not None:
      # graph was not available when journalist was created
      self.summary_writer.add_graph(self.session.graph)
      self.summary_every = summary_every 
开发者ID:cardwing,项目名称:Codes-for-RL-PER,代码行数:63,代码来源:neural_q_learner.py

示例7: __init__

# 需要导入模块: import replay_buffer [as 别名]
# 或者: from replay_buffer import ReplayBuffer [as 别名]
def __init__(self, session,
                     optimizer,
                     actor_network,
                     critic_network,
                     state_dim,
                     action_dim,
                     batch_size=32,
                     replay_buffer_size=10000, # size of replay buffer
                     store_replay_every=1,       # how frequent to store experience
                     discount_factor=0.99,       # discount future rewards
                     target_update_rate=0.01,
                     reg_param=0.01,             # regularization constants
                     max_gradient=5,             # max gradient norms
                     noise_sigma=0.20,
                     noise_theta=0.15,
                     summary_writer=None,
                     summary_every=100):

    # tensorflow machinery
    self.session        = session
    self.optimizer      = optimizer
    self.summary_writer = summary_writer

    # model components
    self.actor_network  = actor_network
    self.critic_network = critic_network
    self.replay_buffer  = ReplayBuffer(buffer_size=replay_buffer_size)

    # training parameters
    self.batch_size         = batch_size
    self.state_dim          = state_dim
    self.action_dim         = action_dim
    self.discount_factor    = discount_factor
    self.target_update_rate = target_update_rate
    self.max_gradient       = max_gradient
    self.reg_param          = reg_param

    # Ornstein-Uhlenbeck noise for exploration
    self.noise_var = tf.Variable(tf.zeros([1, action_dim]))
    noise_random = tf.random_normal([1, action_dim], stddev=noise_sigma)
    self.noise = self.noise_var.assign_sub((noise_theta) * self.noise_var - noise_random)

    # counters
    self.store_replay_every   = store_replay_every
    self.store_experience_cnt = 0
    self.train_iteration      = 0

    # create and initialize variables
    self.create_variables()
    var_lists = tf.get_collection(tf.GraphKeys.VARIABLES)
    self.session.run(tf.initialize_variables(var_lists))

    # make sure all variables are initialized
    self.session.run(tf.assert_variables_initialized())

    if self.summary_writer is not None:
      # graph was not available when journalist was created
      self.summary_writer.add_graph(self.session.graph)
      self.summary_every = summary_every 
开发者ID:cardwing,项目名称:Codes-for-RL-PER,代码行数:61,代码来源:pg_ddpg.py


注:本文中的replay_buffer.ReplayBuffer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。