本文整理汇总了Python中baselines.a2c.utils.EpisodeStats方法的典型用法代码示例。如果您正苦于以下问题:Python utils.EpisodeStats方法的具体用法?Python utils.EpisodeStats怎么用?Python utils.EpisodeStats使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.a2c.utils
的用法示例。
在下文中一共展示了utils.EpisodeStats方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import EpisodeStats [as 别名]
def __init__(self, runner, model, buffer, log_interval):
self.runner = runner
self.model = model
self.buffer = buffer
self.log_interval = log_interval
self.tstart = None
self.episode_stats = EpisodeStats(runner.nsteps, runner.nenv)
self.steps = None
示例2: learn
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import EpisodeStats [as 别名]
def learn(policy, env, seed, nsteps=5, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100, sil_update=4, sil_beta=0.0):
set_global_seeds(seed)
nenvs = env.num_envs
ob_space = env.observation_space
ac_space = env.action_space
model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, sil_update=sil_update, sil_beta=sil_beta)
runner = Runner(env, model, nsteps=nsteps, gamma=gamma)
episode_stats = EpisodeStats(nsteps, nenvs)
nbatch = nenvs*nsteps
tstart = time.time()
for update in range(1, total_timesteps//nbatch+1):
obs, states, rewards, masks, actions, values, raw_rewards = runner.run()
episode_stats.feed(raw_rewards, masks)
policy_loss, value_loss, policy_entropy, v_avg = model.train(obs, states, rewards, masks, actions, values)
sil_loss, sil_adv, sil_samples, sil_nlogp = model.sil_train()
nseconds = time.time()-tstart
fps = int((update*nbatch)/nseconds)
if update % log_interval == 0 or update == 1:
ev = explained_variance(values, rewards)
logger.record_tabular("nupdates", update)
logger.record_tabular("total_timesteps", update*nbatch)
logger.record_tabular("fps", fps)
logger.record_tabular("policy_entropy", float(policy_entropy))
logger.record_tabular("value_loss", float(value_loss))
logger.record_tabular("explained_variance", float(ev))
logger.record_tabular("episode_reward", episode_stats.mean_reward())
logger.record_tabular("best_episode_reward", float(model.sil.get_best_reward()))
if sil_update > 0:
logger.record_tabular("sil_num_episodes", float(model.sil.num_episodes()))
logger.record_tabular("sil_valid_samples", float(sil_samples))
logger.record_tabular("sil_steps", float(model.sil.num_steps()))
logger.dump_tabular()
env.close()
return model