本文整理汇总了Python中replay_memory.ReplayMemory方法的典型用法代码示例。如果您正苦于以下问题:Python replay_memory.ReplayMemory方法的具体用法?Python replay_memory.ReplayMemory怎么用?Python replay_memory.ReplayMemory使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类replay_memory
的用法示例。
在下文中一共展示了replay_memory.ReplayMemory方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_soak
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def test_soak(self):
state_shape = (50,50,6)
rm = ReplayMemory(self.sess, buffer_size=10000,
state_shape=state_shape, action_dim=2, load_factor=1.5)
self.sess.run(tf.initialize_all_variables())
def s_for(i):
return np.random.random(state_shape)
import random
i = 0
for e in xrange(10000):
# add an episode to rm
episode_len = random.choice([5,7,9,10,15])
initial_state = s_for(i)
action_reward_state = []
for i in range(i+1, i+episode_len+1):
a, r, s2 = (i*10)+7, (i*10)+8, s_for(i)
action_reward_state.append((a, r, s2))
rm.add_episode(initial_state, action_reward_state)
i += episode_len + 1
# dump
print rm.current_stats()
# fetch a batch, of all items, but do nothing with it.
_ = rm.batch(idxs=range(10))
示例2: __init__
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self, env):
self.env = env
state_shape = self.env.observation_space.shape
action_dim = self.env.action_space.shape[1]
# for now, with single machine synchronous training, use a replay memory for training.
# TODO: switch back to async training with multiple replicas (as in drivebot project)
self.replay_memory = replay_memory.ReplayMemory(opts.replay_memory_size,
state_shape, action_dim)
# s1 and s2 placeholders
batched_state_shape = [None] + list(state_shape)
s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
# initialise base models for value & naf networks. value subportion of net is
# explicitly created seperate because it has a target network note: in the case of
# --share-input-state-representation the input state network of the value_net will
# be reused by the naf.l_value and naf.output_actions net
self.value_net = ValueNetwork("value", s1, opts.hidden_layers)
self.target_value_net = ValueNetwork("target_value", s2, opts.hidden_layers)
self.naf = NafNetwork("naf", s1, s2,
self.value_net, self.target_value_net,
action_dim)
示例3: main
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():
args = get_args()
nn.set_default_context(get_extension_context(
args.extension, device_id=args.device_id))
from atari_utils import make_atari_deepmind
env = make_atari_deepmind(args.gym_env, valid=True)
print('Observation:', env.observation_space)
print('Action:', env.action_space)
obs_sampler = ObsSampler(args.num_frames)
val_replay_memory = ReplayMemory(env.observation_space.shape,
env.action_space.shape, max_memory=args.num_frames)
# for one file
explorer = GreedyExplorer(
env.action_space.n, use_nnp=True, nnp_file=args.nnp, name='qnet')
validator = Validator(env, val_replay_memory, explorer, obs_sampler,
num_episodes=30, clip_episode_step=True,
render=not args.no_render)
mean_reward = validator.step()
with open(os.path.join(args.log_path, 'mean_reward.txt'), 'a') as f:
print("{} {}".format(args.gym_env, str(mean_reward)), file=f)
示例4: __init__
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self,
rom_path=_default_rom_path,
frame_skip=4, history_length=4,
resize_mode='scale', resized_rows=84, resized_cols=84, crop_offset=8,
display_screen=False, max_null_op=30,
replay_memory_size=1000000,
replay_start_size=100,
death_end_episode=True):
super(AtariGame, self).__init__()
self.rng = get_numpy_rng()
self.ale = ale_load_from_rom(rom_path=rom_path, display_screen=display_screen)
self.start_lives = self.ale.lives()
self.action_set = self.ale.getMinimalActionSet()
self.resize_mode = resize_mode
self.resized_rows = resized_rows
self.resized_cols = resized_cols
self.crop_offset = crop_offset
self.frame_skip = frame_skip
self.history_length = history_length
self.max_null_op = max_null_op
self.death_end_episode = death_end_episode
self.screen_buffer_length = 2
self.screen_buffer = numpy.empty((self.screen_buffer_length,
self.ale.getScreenDims()[1], self.ale.getScreenDims()[0]),
dtype='uint8')
self.replay_memory = ReplayMemory(state_dim=(resized_rows, resized_cols),
history_length=history_length,
memory_size=replay_memory_size,
replay_start_size=replay_start_size)
self.start()
示例5: __init__
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def __init__(self, env):
self.env = env
state_shape = self.env.observation_space.shape
action_dim = self.env.action_space.shape[1]
# for now, with single machine synchronous training, use a replay memory for training.
# this replay memory stores states in a Variable (ie potentially in gpu memory)
# TODO: switch back to async training with multiple replicas (as in drivebot project)
self.replay_memory = replay_memory.ReplayMemory(opts.replay_memory_size,
state_shape, action_dim)
# s1 and s2 placeholders
batched_state_shape = [None] + list(state_shape)
s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
# initialise base models for actor / critic and their corresponding target networks
# target_actor is never used for online sampling so doesn't need explore noise.
self.actor = ActorNetwork("actor", s1, action_dim)
self.critic = CriticNetwork("critic", self.actor)
self.target_actor = ActorNetwork("target_actor", s2, action_dim)
self.target_critic = CriticNetwork("target_critic", self.target_actor)
# setup training ops;
# training actor requires the critic (for getting gradients)
# training critic requires target_critic (for RHS of bellman update)
self.actor.init_ops_for_training(self.critic)
self.critic.init_ops_for_training(self.target_critic)
示例6: setUp
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def setUp(self):
self.sess = tf.Session()
self.rm = ReplayMemory(self.sess, buffer_size=3, state_shape=(2, 3), action_dim=2, load_factor=2)
self.sess.run(tf.initialize_all_variables())
示例7: _init_modules
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def _init_modules(self):
# Replay memory
self.replay_memory = ReplayMemory(history_len=self.history_len,
capacity=self.capacity)
# Actor critic network
self.ac_network = ActorCriticNet(input_dim=self.state_dim,
action_dim=self.action_dim,
critic_layers=self.critic_layers,
actor_layers=self.actor_layers,
actor_activation=self.actor_activation,
scope='ac_network')
# Target network
self.target_network = ActorCriticNet(input_dim=self.state_dim,
action_dim=self.action_dim,
critic_layers=self.critic_layers,
actor_layers=self.actor_layers,
actor_activation=self.actor_activation,
scope='target_network')
# Optimizer
self.optimizer = Optimizer(config=self.config,
ac_network=self.ac_network,
target_network=self.target_network,
replay_memory=self.replay_memory)
# Ops for updating target network
self.clone_op = self.target_network.get_clone_op(self.ac_network, tau=self.tau)
# For tensorboard
self.t_score = tf.placeholder(dtype=tf.float32, shape=[], name='new_score')
tf.summary.scalar("score", self.t_score, collections=['dpg'])
self.summary_op = tf.summary.merge_all('dpg')
示例8: _init_modules
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def _init_modules(self):
# Replay memory
self.replay_memory = ReplayMemory(history_len=self.num_frames,
capacity=self.capacity,
batch_size=self.batch_size,
input_scale=self.input_scale)
input_shape = self.feedback_size + (self.num_frames,)
# Q-network
self.q_network = QNetwork(input_shape=input_shape, n_outputs=len(self.actions),
network_type=self.config['network_type'], scope='q_network')
# Target network
self.target_network = QNetwork(input_shape=input_shape, n_outputs=len(self.actions),
network_type=self.config['network_type'], scope='target_network')
# Optimizer
self.optimizer = Optimizer(config=self.config,
feedback_size=self.feedback_size,
q_network=self.q_network,
target_network=self.target_network,
replay_memory=self.replay_memory)
# Ops for updating target network
self.clone_op = self.target_network.get_clone_op(self.q_network)
# For tensorboard
self.t_score = tf.placeholder(dtype=tf.float32, shape=[], name='new_score')
tf.summary.scalar("score", self.t_score, collections=['dqn'])
self.summary_op = tf.summary.merge_all('dqn')
示例9: main
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():
args = get_args()
nn.set_default_context(get_extension_context(
args.extension, device_id=args.device_id))
if args.nnp is None:
local_nnp_dir = os.path.join("asset", args.gym_env)
local_nnp_file = os.path.join(local_nnp_dir, "qnet.nnp")
if not find_local_nnp(args.gym_env):
logger.info("Downloading nnp data since you didn't specify...")
nnp_uri = os.path.join("https://nnabla.org/pretrained-models/nnp_models/examples/dqn",
args.gym_env,
"qnet.nnp")
if not os.path.exists(local_nnp_dir):
os.mkdir(local_nnp_dir)
download(nnp_uri, output_file=local_nnp_file, open_file=False)
logger.info("Download done!")
args.nnp = local_nnp_file
from atari_utils import make_atari_deepmind
env = make_atari_deepmind(args.gym_env, valid=False)
print('Observation:', env.observation_space)
print('Action:', env.action_space)
obs_sampler = ObsSampler(args.num_frames)
val_replay_memory = ReplayMemory(env.observation_space.shape,
env.action_space.shape,
max_memory=args.num_frames)
# just play greedily
explorer = GreedyExplorer(
env.action_space.n, use_nnp=True, nnp_file=args.nnp, name='qnet')
validator = Validator(env, val_replay_memory, explorer, obs_sampler,
num_episodes=1, render=not args.no_render)
while True:
validator.step()
示例10: test_large_var
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def test_large_var(self):
### python replay_memory_test.py TestReplayMemory.test_large_var
s = StopWatch()
state_shape = (50, 50, 6)
s.reset()
rm = ReplayMemory(self.sess, buffer_size=10000, state_shape=state_shape, action_dim=2, load_factor=1.5)
self.sess.run(tf.initialize_all_variables())
print "cstr_and_init", s.time()
bs1, bs1i, bs2, bs2i = rm.batch_ops()
# build a simple, useless, net that uses state_1 & state_2 idxs
# we want this to reduce to a single value to minimise data coming
# back from GPU
added_states = bs1 + bs2
total_value = tf.reduce_sum(added_states)
def random_s():
return np.random.random(state_shape)
for i in xrange(10):
# add an episode to rm
episode_len = random.choice([5,7,9,10,15])
initial_state = random_s()
action_reward_state = []
for i in range(i+1, i+episode_len+1):
a, r, s2 = (i*10)+7, (i*10)+8, random_s()
action_reward_state.append((a, r, s2))
start = time.time()
s.reset()
rm.add_episode(initial_state, action_reward_state)
t = s.time()
num_states = len(action_reward_state)+1
print "add_episode_time", t, "#states=", num_states, "=> s/state", t/num_states
i += episode_len + 1
# get a random batch state
b = rm.batch(batch_size=128)
s.reset()
x = self.sess.run(total_value, feed_dict={bs1i: b.state_1_idx,
bs2i: b.state_2_idx})
print "fetch_and_run", x, s.time()
示例11: run_trainer
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def run_trainer(episodes, opts):
# init replay memory
render_shape = (opts.height, opts.width, 3)
replay_memory = rm.ReplayMemory(opts=opts,
state_shape=render_shape,
action_dim=2,
load_factor=1.1)
if opts.event_log_in:
replay_memory.reset_from_event_logs(opts.event_log_in,
opts.event_log_in_num,
opts.reset_smooth_reward_factor)
# init network for training
config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
#config.log_device_placement = True
config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
sess = tf.Session(config=config)
network = models.NafNetwork("naf", action_dim=2, opts=opts)
with sess.as_default():
# setup saver util and either load saved ckpt or init variables
saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir, opts.ckpt_save_freq)
for v in tf.all_variables():
if '/biases:' not in v.name:
print >>sys.stderr, v.name, util.shape_and_product_of(v)
network.setup_target_network()
# while true process episodes from run_agents
print util.dts(), "waiting for episodes"
while True:
start_time = time.time()
episode = episodes.get()
wait_time = time.time() - start_time
start_time = time.time()
replay_memory.add_episode(episode,
smooth_reward_factor=opts.smooth_reward_factor)
losses = []
if replay_memory.burnt_in():
for _ in xrange(opts.batches_per_new_episode):
batch = replay_memory.batch(opts.batch_size)
batch_losses = network.train(batch).T[0] # .T[0] => (B, 1) -> (B,)
replay_memory.update_priorities(batch.idxs, batch_losses)
network.target_value_net.update_target_weights()
losses.extend(batch_losses)
saver.save_if_required()
process_time = time.time() - start_time
stats = {"wait_time": wait_time,
"process_time": process_time,
"pending": episodes.qsize(),
"replay_memory": replay_memory.stats}
if losses:
stats['loss'] = {"min": float(np.min(losses)),
"median": float(np.median(losses)),
"mean": float(np.mean(losses)),
"max": float(np.max(losses))}
print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats))
示例12: main
# 需要导入模块: import replay_memory [as 别名]
# 或者: from replay_memory import ReplayMemory [as 别名]
def main():
args = get_args()
nn.set_default_context(get_extension_context(
args.extension, device_id=args.device_id))
if args.log_path:
output_path = OutputPath(args.log_path)
else:
output_path = OutputPath()
monitor = Monitor(output_path.path)
tbw = SummaryWriter(output_path.path)
# Create an atari env.
from atari_utils import make_atari_deepmind
env = make_atari_deepmind(args.gym_env, valid=False)
env_val = make_atari_deepmind(args.gym_env, valid=True)
print('Observation:', env.observation_space)
print('Action:', env.action_space)
# 10000 * 4 frames
val_replay_memory = ReplayMemory(
env.observation_space.shape, env.action_space.shape, max_memory=args.num_frames)
replay_memory = ReplayMemory(
env.observation_space.shape, env.action_space.shape, max_memory=40000)
learner = QLearner(q_cnn, env.action_space.n, sync_freq=1000, save_freq=250000,
gamma=0.99, learning_rate=1e-4, name_q='q', save_path=output_path)
explorer = LinearDecayEGreedyExplorer(
env.action_space.n, eps_start=1.0, eps_end=0.01, eps_steps=1e6,
q_builder=q_cnn, name='q')
sampler = Sampler(args.num_frames)
obs_sampler = ObsSampler(args.num_frames)
validator = Validator(env_val, val_replay_memory, explorer, obs_sampler,
num_episodes=args.num_val_episodes, num_eval_steps=args.num_eval_steps,
render=args.render_val, monitor=monitor, tbw=tbw)
trainer_with_validator = Trainer(env, replay_memory, learner, sampler, explorer, obs_sampler, inter_eval_steps=args.inter_eval_steps,
num_episodes=args.num_episodes, train_start=10000, batch_size=32,
render=args.render_train, validator=validator, monitor=monitor, tbw=tbw)
for e in range(args.num_epochs):
trainer_with_validator.step()