本文整理汇总了Python中baselines.logger.record_tabular方法的典型用法代码示例。如果您正苦于以下问题:Python logger.record_tabular方法的具体用法?Python logger.record_tabular怎么用?Python logger.record_tabular使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.logger
的用法示例。
在下文中一共展示了logger.record_tabular方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def fit(self, paths, targvals):
X = np.concatenate([self._preproc(p) for p in paths])
y = np.concatenate(targvals)
logger.record_tabular("EVBefore", common.explained_variance(self._predict(X), y))
for _ in range(25): self.do_update(X, y)
logger.record_tabular("EVAfter", common.explained_variance(self._predict(X), y))
示例2: main
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def main(policy_file, seed, n_test_rollouts, render):
set_global_seeds(seed)
# Load policy.
with open(policy_file, 'rb') as f:
policy = pickle.load(f)
env_name = policy.info['env_name']
# Prepare params.
params = config.DEFAULT_PARAMS
if env_name in config.DEFAULT_ENV_PARAMS:
params.update(config.DEFAULT_ENV_PARAMS[env_name]) # merge env-specific parameters in
params['env_name'] = env_name
params = config.prepare_params(params)
config.log_params(params, logger=logger)
dims = config.configure_dims(params)
eval_params = {
'exploit': True,
'use_target_net': params['test_with_polyak'],
'compute_Q': True,
'rollout_batch_size': 1,
'render': bool(render),
}
for name in ['T', 'gamma', 'noise_eps', 'random_eps']:
eval_params[name] = params[name]
evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params)
evaluator.seed(seed)
# Run evaluation.
evaluator.clear_history()
for _ in range(n_test_rollouts):
evaluator.generate_rollouts()
# record logs
for key, val in evaluator.logs('test'):
logger.record_tabular(key, np.mean(val))
logger.dump_tabular()
示例3: call
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def call(self, on_policy):
runner, model, buffer, steps = self.runner, self.model, self.buffer, self.steps
if on_policy:
enc_obs, obs, actions, rewards, mus, dones, masks = runner.run()
self.episode_stats.feed(rewards, dones)
if buffer is not None:
buffer.put(enc_obs, actions, rewards, mus, dones, masks)
else:
# get obs, actions, rewards, mus, dones from buffer.
obs, actions, rewards, mus, dones, masks = buffer.get()
# reshape stuff correctly
obs = obs.reshape(runner.batch_ob_shape)
actions = actions.reshape([runner.nbatch])
rewards = rewards.reshape([runner.nbatch])
mus = mus.reshape([runner.nbatch, runner.nact])
dones = dones.reshape([runner.nbatch])
masks = masks.reshape([runner.batch_ob_shape[0]])
names_ops, values_ops = model.train(obs, actions, rewards, dones, mus, model.initial_state, masks, steps)
if on_policy and (int(steps/runner.nbatch) % self.log_interval == 0):
logger.record_tabular("total_timesteps", steps)
logger.record_tabular("fps", int(steps/(time.time() - self.tstart)))
# IMP: In EpisodicLife env, during training, we get done=True at each loss of life, not just at the terminal state.
# Thus, this is mean until end of life, not end of episode.
# For true episode rewards, see the monitor files in the log folder.
logger.record_tabular("mean_episode_length", self.episode_stats.mean_length())
logger.record_tabular("mean_episode_reward", self.episode_stats.mean_reward())
for name, val in zip(names_ops, values_ops):
logger.record_tabular(name, float(val))
logger.dump_tabular()
示例4: learn
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def learn(policy, env, seed, nsteps=5, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100):
tf.reset_default_graph()
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule)
runner = Runner(env, model, nsteps=nsteps, gamma=gamma)
nbatch = nenvs*nsteps
tstart = time.time()
for update in range(1, total_timesteps//nbatch+1):
obs, states, rewards, masks, actions, values = runner.run()
policy_loss, value_loss, policy_entropy = model.train(obs, states, rewards, masks, actions, values)
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
if update % log_interval == 0 or update == 1:
ev = explained_variance(values, rewards)
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("policy_entropy", float(policy_entropy))
logger.record_tabular("value_loss", float(value_loss))
logger.record_tabular("explained_variance", float(ev))
logger.dump_tabular()
env.close()
示例5: main
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def main(policy_file, seed, n_test_rollouts, render):
set_global_seeds(seed)
# Load policy.
with open(policy_file, 'rb') as f:
policy = pickle.load(f)
env_name = policy.info['env_name']
# Prepare params.
params = config.DEFAULT_PARAMS
if env_name in config.DEFAULT_ENV_PARAMS:
params.update(config.DEFAULT_ENV_PARAMS[env_name]) # merge env-specific parameters in
params['env_name'] = env_name
params = config.prepare_params(params)
config.log_params(params, logger=logger)
dims = config.configure_dims(params)
eval_params = {
'exploit': True,
'use_target_net': params['test_with_polyak'],
'compute_Q': True,
'rollout_batch_size': 1,
'render': bool(render),
}
for name in ['T', 'gamma', 'noise_eps', 'random_eps']:
eval_params[name] = params[name]
evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params)
evaluator.seed(seed)
# Run evaluation.
evaluator.clear_history()
for _ in range(n_test_rollouts):
evaluator.generate_rollouts()
# record logs
for key, val in evaluator.logs('test'):
logger.record_tabular(key, np.mean(val))
logger.dump_tabular()
示例6: call
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def call(self, on_policy):
runner, model, buffer, steps = self.runner, self.model, self.buffer, self.steps
if on_policy:
enc_obs, obs, actions, rewards, mus, dones, masks = runner.run()
self.episode_stats.feed(rewards, dones)
if buffer is not None:
buffer.put(enc_obs, actions, rewards, mus, dones, masks)
else:
# get obs, actions, rewards, mus, dones from buffer.
obs, actions, rewards, mus, dones, masks = buffer.get()
# reshape stuff correctly
obs = obs.reshape(runner.batch_ob_shape)
actions = actions.reshape([runner.nbatch])
rewards = rewards.reshape([runner.nbatch])
mus = mus.reshape([runner.nbatch, runner.nact])
dones = dones.reshape([runner.nbatch])
masks = masks.reshape([runner.batch_ob_shape[0]])
names_ops, values_ops = model.train(obs, actions, rewards, dones, mus, model.initial_state, masks, steps)
if on_policy and (int(steps/runner.nbatch) % self.log_interval == 0):
logger.record_tabular("total_timesteps", steps)
logger.record_tabular("fps", int(steps/(time.time() - self.tstart)))
# IMP: In EpisodicLife env, during training, we get done=True at each loss of life, not just at the terminal state.
# Thus, this is mean until end of life, not end of episode.
# For true episode rewards, see the monitor files in the log folder.
logger.record_tabular("mean_episode_length", self.episode_stats.mean_length())
logger.record_tabular("mean_episode_reward", self.episode_stats.mean_reward())
for name, val in zip(names_ops, values_ops):
logger.record_tabular(name, float(val))
logger.dump_tabular()
示例7: log_scalars
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def log_scalars(writer, step):
def log_scalar(name, value):
logger.record_tabular(name, value)
summary = tf.Summary(value=[tf.Summary.Value(tag=name,
simple_value=value)])
writer.add_summary(summary, step)
return log_scalar
示例8: log
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def log(self):
if self.t > 0 and self.print_freq is not None and len(self.episode_rewards) % self.print_freq == 0:
mean_100ep_reward = np.mean(self.episode_rewards[-100:])
num_episodes = len(self.episode_rewards)
logger.record_tabular('steps', self.t)
logger.record_tabular('episodes', num_episodes)
logger.record_tabular('mean 100 episode reward', '{:.3f}'.format(mean_100ep_reward))
logger.record_tabular('exploration (target)', '{:.3f} %'.format(100 * self.exploration_schedule.value(self.t)))
logger.record_tabular('exploration (current)', '{:.3f} %'.format(100 * (1.0 - self.greedy_freq)))
logger.dump_tabular()
示例9: learn
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def learn(policy, env, seed, nsteps=5, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100):
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule)
runner = Runner(env, model, nsteps=nsteps, gamma=gamma)
nbatch = nenvs*nsteps
tstart = time.time()
for update in range(1, total_timesteps//nbatch+1):
obs, states, rewards, masks, actions, values = runner.run()
policy_loss, value_loss, policy_entropy = model.train(obs, states, rewards, masks, actions, values)
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
if update % log_interval == 0 or update == 1:
ev = explained_variance(values, rewards)
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("policy_entropy", float(policy_entropy))
logger.record_tabular("value_loss", float(value_loss))
logger.record_tabular("explained_variance", float(ev))
logger.dump_tabular()
env.close()
return model
示例10: learn
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def learn(policy, env, seed, nsteps=5, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100, sil_update=4, sil_beta=0.0):
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, sil_update=sil_update, sil_beta=sil_beta)
runner = Runner(env, model, nsteps=nsteps, gamma=gamma)
episode_stats = EpisodeStats(nsteps, nenvs)
nbatch = nenvs*nsteps
tstart = time.time()
for update in range(1, total_timesteps//nbatch+1):
obs, states, rewards, masks, actions, values, raw_rewards = runner.run()
episode_stats.feed(raw_rewards, masks)
policy_loss, value_loss, policy_entropy, v_avg = model.train(obs, states, rewards, masks, actions, values)
sil_loss, sil_adv, sil_samples, sil_nlogp = model.sil_train()
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
if update % log_interval == 0 or update == 1:
ev = explained_variance(values, rewards)
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("policy_entropy", float(policy_entropy))
logger.record_tabular("value_loss", float(value_loss))
logger.record_tabular("explained_variance", float(ev))
logger.record_tabular("episode_reward", episode_stats.mean_reward())
logger.record_tabular("best_episode_reward", float(model.sil.get_best_reward()))
if sil_update > 0:
logger.record_tabular("sil_num_episodes", float(model.sil.num_episodes()))
logger.record_tabular("sil_valid_samples", float(sil_samples))
logger.record_tabular("sil_steps", float(model.sil.num_steps()))
logger.dump_tabular()
env.close()
return model
示例11: learn
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def learn(policy, env, seed, nsteps=5, nstack=4, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100):
tf.reset_default_graph()
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
num_procs = len(env.remotes) # HACK
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, nstack=nstack, num_procs=num_procs, ent_coef=ent_coef, vf_coef=vf_coef,
max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule)
runner = Runner(env, model, nsteps=nsteps, nstack=nstack, gamma=gamma)
nbatch = nenvs*nsteps
tstart = time.time()
for update in range(1, total_timesteps//nbatch+1):
obs, states, rewards, masks, actions, values = runner.run()
policy_loss, value_loss, policy_entropy = model.train(obs, states, rewards, masks, actions, values)
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
if update % log_interval == 0 or update == 1:
ev = explained_variance(values, rewards)
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("policy_entropy", float(policy_entropy))
logger.record_tabular("value_loss", float(value_loss))
logger.record_tabular("explained_variance", float(ev))
logger.dump_tabular()
env.close()
示例12: learn
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import record_tabular [as 别名]
def learn(policy, env, seed, nsteps=5, total_timesteps=int(80e6), v_mix_coef=0.5, ent_coef=0.01, max_grad_norm=0.5,
lr_alpha=7e-4, lr_beta=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100,
v_ex_coef=1.0, r_ex_coef=0.0, r_in_coef=1.0):
tf.reset_default_graph()
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef,
v_ex_coef=v_ex_coef, max_grad_norm=max_grad_norm, lr_alpha=lr_alpha, lr_beta=lr_beta,
alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule,
v_mix_coef=v_mix_coef, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef)
runner = Runner(env, model, nsteps=nsteps, gamma=gamma, r_ex_coef=r_ex_coef, r_in_coef=r_in_coef)
nbatch = nenvs*nsteps
tstart = time.time()
epinfobuf = deque(maxlen=100)
eprexbuf = deque(maxlen=100)
eprinbuf = deque(maxlen=100)
eplenbuf = deque(maxlen=100)
for update in range(1, total_timesteps//nbatch+1):
obs, ac, policy_states, r_in, r_ex, ret_ex, ret_mix, \
v_ex, v_mix, last_v_ex, last_v_mix, masks, dones, \
epinfo, ep_r_ex, ep_r_in, ep_len = runner.run()
dis_v_mix_last = np.zeros([nbatch], np.float32)
coef_mat = np.zeros([nbatch, nbatch], np.float32)
for i in range(nbatch):
dis_v_mix_last[i] = gamma ** (nsteps - i % nsteps) * last_v_mix[i // nsteps]
coef = 1.0
for j in range(i, nbatch):
if j > i and j % nsteps == 0:
break
coef_mat[i][j] = coef
coef *= gamma
if dones[j]:
dis_v_mix_last[i] = 0
break
entropy = model.train(obs, policy_states[0], masks, ac, r_ex, ret_ex, v_ex, v_mix, dis_v_mix_last, coef_mat)
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
epinfobuf.extend(epinfo)
eprexbuf.extend(ep_r_ex)
eprinbuf.extend(ep_r_in)
eplenbuf.extend(ep_len)
if update % log_interval == 0 or update == 1:
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("entropy", float(entropy))
v_ex_ev = explained_variance(v_ex, ret_ex)
logger.record_tabular("v_ex_ev", float(v_ex_ev))
v_mix_ev = explained_variance(v_mix, ret_mix)
logger.record_tabular("v_mix_ev", float(v_mix_ev))
logger.record_tabular("gamescoremean", safemean([epinfo['r'] for epinfo in epinfobuf]))
logger.record_tabular("gamelenmean", safemean([epinfo['l'] for epinfo in epinfobuf]))
logger.dump_tabular()
env.close()