本文整理汇总了Python中utils.Logger.write方法的典型用法代码示例。如果您正苦于以下问题:Python Logger.write方法的具体用法?Python Logger.write怎么用?Python Logger.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.Logger
的用法示例。
在下文中一共展示了Logger.write方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from utils import Logger [as 别名]
# 或者: from utils.Logger import write [as 别名]
def main(env_name, num_episodes, render, gamma, lam, kl_targ, batch_size):
""" Main training loop
Args:
env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
num_episodes: maximum number of episodes to run
gamma: reward discount factor (float)
lam: lambda from Generalized Advantage Estimate
kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
batch_size: number of episodes per policy training batch
"""
killer = GracefulKiller()
env, obs_dim, act_dim = init_gym(env_name, render)
obs_dim += 1 # add 1 to obs dimension for time step feature (see run_episode())
now = datetime.utcnow().strftime("%b-%d_%H-%M-%S") # create unique directories
logger = Logger(logname=env_name, now=now)
scaler = Scaler(obs_dim, env_name)
val_func = NNValueFunction(obs_dim, env_name)
policy = Policy(obs_dim, act_dim, kl_targ, env_name)
# run a few episodes of untrained policy to initialize scaler:
run_policy(env, policy, scaler, logger, episodes=5)
episode = 0
#capture = False
while episode < num_episodes:
trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
episode += len(trajectories)
"""if episode > 600 and not capture:
env.ScreenCapture(5)
capture = True"""
add_value(trajectories, val_func) # add estimated values to episodes
add_disc_sum_rew(trajectories, gamma) # calculated discounted sum of Rs
add_gae(trajectories, gamma, lam) # calculate advantage
# concatenate all episodes into single NumPy arrays
observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
# add various stats to training log:
log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
policy.update(observes, actions, advantages, logger) # update policy
val_func.fit(observes, disc_sum_rew, logger) # update value function
logger.write(display=True) # write logger results to file and stdout
scaler.save()
if killer.kill_now:
if input('Terminate training (y/[n])? ') == 'y':
break
killer.kill_now = False
logger.close()
policy.close_sess()
val_func.close_sess()