当前位置: 首页>>代码示例>>Python>>正文


Python replay_memory.ReplayMemory方法代码示例

本文整理汇总了Python中replay_memory.ReplayMemory方法的典型用法代码示例。如果您正苦于以下问题:Python replay_memory.ReplayMemory方法的具体用法?Python replay_memory.ReplayMemory怎么用?Python replay_memory.ReplayMemory使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在replay_memory的用法示例。


在下文中一共展示了replay_memory.ReplayMemory方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_soak

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def test_soak(self):
    state_shape = (50,50,6)
    rm = ReplayMemory(self.sess, buffer_size=10000, 
                      state_shape=state_shape, action_dim=2, load_factor=1.5)
    self.sess.run(tf.initialize_all_variables())
    def s_for(i):
      return np.random.random(state_shape)
    import random
    i = 0
    for e in xrange(10000):
      # add an episode to rm
      episode_len = random.choice([5,7,9,10,15])
      initial_state = s_for(i)
      action_reward_state = []
      for i in range(i+1, i+episode_len+1):
        a, r, s2 = (i*10)+7, (i*10)+8, s_for(i)
        action_reward_state.append((a, r, s2))
      rm.add_episode(initial_state, action_reward_state)
      i += episode_len + 1
      # dump
      print rm.current_stats()
      # fetch a batch, of all items, but do nothing with it.
      _ = rm.batch(idxs=range(10)) 
开发者ID:matpalm,项目名称:cartpoleplusplus,代码行数:25,代码来源:replay_memory_test.py

示例2: __init__

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self, env):
    self.env = env
    state_shape = self.env.observation_space.shape
    action_dim = self.env.action_space.shape[1]

    # for now, with single machine synchronous training, use a replay memory for training.
    # TODO: switch back to async training with multiple replicas (as in drivebot project)
    self.replay_memory = replay_memory.ReplayMemory(opts.replay_memory_size,
                                                    state_shape, action_dim)

    # s1 and s2 placeholders
    batched_state_shape = [None] + list(state_shape)
    s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
    s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)

    # initialise base models for value & naf networks. value subportion of net is
    # explicitly created seperate because it has a target network note: in the case of
    # --share-input-state-representation the input state network of the value_net will
    # be reused by the naf.l_value and naf.output_actions net
    self.value_net = ValueNetwork("value", s1, opts.hidden_layers)
    self.target_value_net = ValueNetwork("target_value", s2, opts.hidden_layers)
    self.naf = NafNetwork("naf", s1, s2,
                          self.value_net, self.target_value_net,
                          action_dim) 
开发者ID:matpalm,项目名称:cartpoleplusplus,代码行数:26,代码来源:naf_cartpole.py

示例3: main

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():

    args = get_args()

    nn.set_default_context(get_extension_context(
        args.extension, device_id=args.device_id))

    from atari_utils import make_atari_deepmind
    env = make_atari_deepmind(args.gym_env, valid=True)
    print('Observation:', env.observation_space)
    print('Action:', env.action_space)
    obs_sampler = ObsSampler(args.num_frames)
    val_replay_memory = ReplayMemory(env.observation_space.shape,
                                     env.action_space.shape, max_memory=args.num_frames)

    # for one file
    explorer = GreedyExplorer(
        env.action_space.n, use_nnp=True, nnp_file=args.nnp, name='qnet')
    validator = Validator(env, val_replay_memory, explorer, obs_sampler,
                          num_episodes=30, clip_episode_step=True,
                          render=not args.no_render)

    mean_reward = validator.step()
    with open(os.path.join(args.log_path, 'mean_reward.txt'), 'a') as f:
        print("{} {}".format(args.gym_env, str(mean_reward)), file=f) 
开发者ID:sony,项目名称:nnabla-examples,代码行数:27,代码来源:eval_atari.py

示例4: __init__

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self,
                 rom_path=_default_rom_path,
                 frame_skip=4, history_length=4,
                 resize_mode='scale', resized_rows=84, resized_cols=84, crop_offset=8,
                 display_screen=False, max_null_op=30,
                 replay_memory_size=1000000,
                 replay_start_size=100,
                 death_end_episode=True):
        super(AtariGame, self).__init__()
        self.rng = get_numpy_rng()
        self.ale = ale_load_from_rom(rom_path=rom_path, display_screen=display_screen)
        self.start_lives = self.ale.lives()
        self.action_set = self.ale.getMinimalActionSet()
        self.resize_mode = resize_mode
        self.resized_rows = resized_rows
        self.resized_cols = resized_cols
        self.crop_offset = crop_offset
        self.frame_skip = frame_skip
        self.history_length = history_length
        self.max_null_op = max_null_op
        self.death_end_episode = death_end_episode
        self.screen_buffer_length = 2
        self.screen_buffer = numpy.empty((self.screen_buffer_length,
                                          self.ale.getScreenDims()[1], self.ale.getScreenDims()[0]),
                                         dtype='uint8')
        self.replay_memory = ReplayMemory(state_dim=(resized_rows, resized_cols),
                                          history_length=history_length,
                                          memory_size=replay_memory_size,
                                          replay_start_size=replay_start_size)
        self.start() 
开发者ID:awslabs,项目名称:dynamic-training-with-apache-mxnet-on-aws,代码行数:32,代码来源:atari_game.py

示例5: __init__

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self, env):
    self.env = env
    state_shape = self.env.observation_space.shape
    action_dim = self.env.action_space.shape[1]

    # for now, with single machine synchronous training, use a replay memory for training.
    # this replay memory stores states in a Variable (ie potentially in gpu memory)
    # TODO: switch back to async training with multiple replicas (as in drivebot project)
    self.replay_memory = replay_memory.ReplayMemory(opts.replay_memory_size,
                                                    state_shape, action_dim)

    # s1 and s2 placeholders
    batched_state_shape = [None] + list(state_shape)
    s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
    s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)

    # initialise base models for actor / critic and their corresponding target networks
    # target_actor is never used for online sampling so doesn't need explore noise.
    self.actor = ActorNetwork("actor", s1, action_dim)
    self.critic = CriticNetwork("critic", self.actor)
    self.target_actor = ActorNetwork("target_actor", s2, action_dim)
    self.target_critic = CriticNetwork("target_critic", self.target_actor)

    # setup training ops;
    # training actor requires the critic (for getting gradients)
    # training critic requires target_critic (for RHS of bellman update)
    self.actor.init_ops_for_training(self.critic)
    self.critic.init_ops_for_training(self.target_critic) 
开发者ID:matpalm,项目名称:cartpoleplusplus,代码行数:30,代码来源:ddpg_cartpole.py

示例6: setUp

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def setUp(self):
    self.sess = tf.Session()
    self.rm = ReplayMemory(self.sess, buffer_size=3, state_shape=(2, 3), action_dim=2, load_factor=2)
    self.sess.run(tf.initialize_all_variables()) 
开发者ID:matpalm,项目名称:cartpoleplusplus,代码行数:6,代码来源:replay_memory_test.py

示例7: _init_modules

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def _init_modules(self):
        
        # Replay memory
        self.replay_memory = ReplayMemory(history_len=self.history_len, 
                                          capacity=self.capacity)
        # Actor critic network
        self.ac_network = ActorCriticNet(input_dim=self.state_dim, 
                                         action_dim=self.action_dim, 
                                         critic_layers=self.critic_layers, 
                                         actor_layers=self.actor_layers, 
                                         actor_activation=self.actor_activation,
                                         scope='ac_network')
        # Target network
        self.target_network = ActorCriticNet(input_dim=self.state_dim, 
                                             action_dim=self.action_dim, 
                                             critic_layers=self.critic_layers, 
                                             actor_layers=self.actor_layers, 
                                             actor_activation=self.actor_activation,
                                             scope='target_network')
        # Optimizer
        self.optimizer = Optimizer(config=self.config, 
                                   ac_network=self.ac_network, 
                                   target_network=self.target_network, 
                                   replay_memory=self.replay_memory)
        # Ops for updating target network
        self.clone_op = self.target_network.get_clone_op(self.ac_network, tau=self.tau)
        # For tensorboard
        self.t_score = tf.placeholder(dtype=tf.float32, shape=[], name='new_score')
        tf.summary.scalar("score", self.t_score, collections=['dpg'])
        self.summary_op = tf.summary.merge_all('dpg') 
开发者ID:PacktPublishing,项目名称:Python-Reinforcement-Learning-Projects,代码行数:32,代码来源:dpg.py

示例8: _init_modules

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def _init_modules(self):
        
        # Replay memory
        self.replay_memory = ReplayMemory(history_len=self.num_frames, 
                                          capacity=self.capacity, 
                                          batch_size=self.batch_size,
                                          input_scale=self.input_scale)
        
        input_shape = self.feedback_size + (self.num_frames,)
        # Q-network
        self.q_network = QNetwork(input_shape=input_shape, n_outputs=len(self.actions), 
                                  network_type=self.config['network_type'], scope='q_network')
        # Target network
        self.target_network = QNetwork(input_shape=input_shape, n_outputs=len(self.actions), 
                                       network_type=self.config['network_type'], scope='target_network')
        # Optimizer
        self.optimizer = Optimizer(config=self.config, 
                                   feedback_size=self.feedback_size, 
                                   q_network=self.q_network, 
                                   target_network=self.target_network, 
                                   replay_memory=self.replay_memory)
        # Ops for updating target network
        self.clone_op = self.target_network.get_clone_op(self.q_network)
        # For tensorboard
        self.t_score = tf.placeholder(dtype=tf.float32, shape=[], name='new_score')
        tf.summary.scalar("score", self.t_score, collections=['dqn'])
        self.summary_op = tf.summary.merge_all('dqn') 
开发者ID:PacktPublishing,项目名称:Python-Reinforcement-Learning-Projects,代码行数:29,代码来源:q_learning.py

示例9: main

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():

    args = get_args()

    nn.set_default_context(get_extension_context(
        args.extension, device_id=args.device_id))

    if args.nnp is None:
        local_nnp_dir = os.path.join("asset", args.gym_env)
        local_nnp_file = os.path.join(local_nnp_dir, "qnet.nnp")

        if not find_local_nnp(args.gym_env):
            logger.info("Downloading nnp data since you didn't specify...")
            nnp_uri = os.path.join("https://nnabla.org/pretrained-models/nnp_models/examples/dqn",
                                   args.gym_env,
                                   "qnet.nnp")
            if not os.path.exists(local_nnp_dir):
                os.mkdir(local_nnp_dir)
            download(nnp_uri, output_file=local_nnp_file, open_file=False)
            logger.info("Download done!")

        args.nnp = local_nnp_file

    from atari_utils import make_atari_deepmind
    env = make_atari_deepmind(args.gym_env, valid=False)
    print('Observation:', env.observation_space)
    print('Action:', env.action_space)
    obs_sampler = ObsSampler(args.num_frames)
    val_replay_memory = ReplayMemory(env.observation_space.shape,
                                     env.action_space.shape,
                                     max_memory=args.num_frames)
    # just play greedily
    explorer = GreedyExplorer(
        env.action_space.n, use_nnp=True, nnp_file=args.nnp, name='qnet')
    validator = Validator(env, val_replay_memory, explorer, obs_sampler,
                          num_episodes=1, render=not args.no_render)
    while True:
        validator.step() 
开发者ID:sony,项目名称:nnabla-examples,代码行数:40,代码来源:play_atari.py

示例10: test_large_var

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def test_large_var(self):
    ### python replay_memory_test.py TestReplayMemory.test_large_var

    s = StopWatch()

    state_shape = (50, 50, 6)
    s.reset()
    rm = ReplayMemory(self.sess, buffer_size=10000, state_shape=state_shape, action_dim=2, load_factor=1.5)
    self.sess.run(tf.initialize_all_variables())
    print "cstr_and_init", s.time()

    bs1, bs1i, bs2, bs2i = rm.batch_ops()

    # build a simple, useless, net that uses state_1 & state_2 idxs
    # we want this to reduce to a single value to minimise data coming
    # back from GPU
    added_states = bs1 + bs2
    total_value = tf.reduce_sum(added_states)

    def random_s():
      return np.random.random(state_shape)

    for i in xrange(10):
      # add an episode to rm
      episode_len = random.choice([5,7,9,10,15])
      initial_state = random_s()
      action_reward_state = []
      for i in range(i+1, i+episode_len+1):
        a, r, s2 = (i*10)+7, (i*10)+8, random_s()
        action_reward_state.append((a, r, s2))
      start = time.time()
      s.reset()
      rm.add_episode(initial_state, action_reward_state)
      t = s.time()
      num_states = len(action_reward_state)+1
      print "add_episode_time", t, "#states=", num_states, "=> s/state", t/num_states
      i += episode_len + 1

      # get a random batch state
      b = rm.batch(batch_size=128)
      s.reset()
      x = self.sess.run(total_value, feed_dict={bs1i: b.state_1_idx, 
                                                bs2i: b.state_2_idx})
      print "fetch_and_run", x, s.time() 
开发者ID:matpalm,项目名称:cartpoleplusplus,代码行数:46,代码来源:replay_memory_test.py

示例11: run_trainer

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def run_trainer(episodes, opts):
  # init replay memory
  render_shape = (opts.height, opts.width, 3)
  replay_memory = rm.ReplayMemory(opts=opts,
                                  state_shape=render_shape,
                                  action_dim=2,
                                  load_factor=1.1)
  if opts.event_log_in:
    replay_memory.reset_from_event_logs(opts.event_log_in,
                                        opts.event_log_in_num,
                                        opts.reset_smooth_reward_factor)

  # init network for training
  config = tf.ConfigProto()
  #config.gpu_options.allow_growth = True
  #config.log_device_placement = True
  config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
  sess = tf.Session(config=config)

  network = models.NafNetwork("naf", action_dim=2, opts=opts)

  with sess.as_default():
    # setup saver util and either load saved ckpt or init variables
    saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir, opts.ckpt_save_freq)
    for v in tf.all_variables():
      if '/biases:' not in v.name:
        print >>sys.stderr, v.name, util.shape_and_product_of(v)
    network.setup_target_network()

    # while true process episodes from run_agents
    print util.dts(), "waiting for episodes"
    while True:
      start_time = time.time()
      episode = episodes.get()
      wait_time = time.time() - start_time

      start_time = time.time()
      replay_memory.add_episode(episode,
                                smooth_reward_factor=opts.smooth_reward_factor)
      losses = []
      if replay_memory.burnt_in():
        for _ in xrange(opts.batches_per_new_episode):
          batch = replay_memory.batch(opts.batch_size)
          batch_losses = network.train(batch).T[0]  # .T[0] => (B, 1) -> (B,)
          replay_memory.update_priorities(batch.idxs, batch_losses)
          network.target_value_net.update_target_weights()
          losses.extend(batch_losses)
        saver.save_if_required()
      process_time = time.time() - start_time

      stats = {"wait_time": wait_time,
               "process_time": process_time,
               "pending": episodes.qsize(),
               "replay_memory": replay_memory.stats}
      if losses:
        stats['loss'] = {"min": float(np.min(losses)),
                         "median": float(np.median(losses)),
                         "mean": float(np.mean(losses)),
                         "max": float(np.max(losses))}
      print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats)) 
开发者ID:matpalm,项目名称:malmomo,代码行数:62,代码来源:run_trainer.py

示例12: main

# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():

    args = get_args()

    nn.set_default_context(get_extension_context(
        args.extension, device_id=args.device_id))

    if args.log_path:
        output_path = OutputPath(args.log_path)
    else:
        output_path = OutputPath()
    monitor = Monitor(output_path.path)

    tbw = SummaryWriter(output_path.path)

    # Create an atari env.
    from atari_utils import make_atari_deepmind
    env = make_atari_deepmind(args.gym_env, valid=False)
    env_val = make_atari_deepmind(args.gym_env, valid=True)
    print('Observation:', env.observation_space)
    print('Action:', env.action_space)

    # 10000 * 4 frames
    val_replay_memory = ReplayMemory(
        env.observation_space.shape, env.action_space.shape, max_memory=args.num_frames)
    replay_memory = ReplayMemory(
        env.observation_space.shape, env.action_space.shape, max_memory=40000)

    learner = QLearner(q_cnn, env.action_space.n, sync_freq=1000, save_freq=250000,
                       gamma=0.99, learning_rate=1e-4, name_q='q', save_path=output_path)

    explorer = LinearDecayEGreedyExplorer(
        env.action_space.n, eps_start=1.0, eps_end=0.01, eps_steps=1e6,
        q_builder=q_cnn, name='q')

    sampler = Sampler(args.num_frames)
    obs_sampler = ObsSampler(args.num_frames)

    validator = Validator(env_val, val_replay_memory, explorer, obs_sampler,
                          num_episodes=args.num_val_episodes, num_eval_steps=args.num_eval_steps,
                          render=args.render_val, monitor=monitor, tbw=tbw)

    trainer_with_validator = Trainer(env, replay_memory, learner, sampler, explorer, obs_sampler, inter_eval_steps=args.inter_eval_steps,
                                     num_episodes=args.num_episodes, train_start=10000, batch_size=32,
                                     render=args.render_train, validator=validator, monitor=monitor, tbw=tbw)

    for e in range(args.num_epochs):
        trainer_with_validator.step() 
开发者ID:sony,项目名称:nnabla-examples,代码行数:50,代码来源:train_atari.py


注:本文中的replay_memory.ReplayMemory方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。