本文整理汇总了Python中rllab.misc.logger.log方法的典型用法代码示例。如果您正苦于以下问题:Python logger.log方法的具体用法?Python logger.log怎么用?Python logger.log使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.misc.logger
的用法示例。
在下文中一共展示了logger.log方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
# logger.log("optimizing policy")
inputs = ext.extract(
samples_data,
"observations", "actions", "advantages"
)
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
inputs += tuple(state_info_list)
if self.policy.recurrent:
inputs += (samples_data["valids"],)
# dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
# loss_before = self.optimizer.loss(inputs)
self.optimizer.optimize(inputs)
# loss_after = self.optimizer.loss(inputs)
# logger.record_tabular("LossBefore", loss_before)
# logger.record_tabular("LossAfter", loss_after)
# mean_kl, max_kl = self.opt_info['f_kl'](*(list(inputs) + dist_info_list))
# logger.record_tabular('MeanKL', mean_kl)
# logger.record_tabular('MaxKL', max_kl)
示例2: get_logger
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def get_logger(logger_name, folderpath, level=logging.DEBUG):
logger = logging.getLogger(logger_name)
logger.setLevel(level)
to_generate = [('info.log',logging.INFO), ('debug.log',logging.DEBUG)]
for logname, handler_level in to_generate:
# Create a file handler
handler = logging.FileHandler(folderpath+'/' + logname)
handler.setLevel(handler_level)
# Create a logging format
if logname == 'debug.log':
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
else:
formatter = logging.Formatter('%(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
示例3: fit
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def fit(self, paths):
logger.log('fitting the regressor...')
if self.recurrent:
observations = np.array([p["observations"][:, self.obs_regressed] for p in paths])
actions = np.array([p["actions"][:, self.act_regressed] for p in paths])
obs_actions = np.concatenate([observations, actions], axis=2)
if self.noisify_traj_coef:
obs_actions += np.random.normal(loc=0.0,
scale=float(np.mean(np.abs(obs_actions))) * self.noisify_traj_coef,
size=np.shape(obs_actions))
latents = np.array([p['agent_infos']['latents'] for p in paths])
self._regressor.fit(obs_actions, latents) # the input shapes are (traj, time, dim)
else:
observations = np.concatenate([p["observations"][:, self.obs_regressed] for p in paths])
actions = np.concatenate([p["actions"][:, self.act_regressed] for p in paths])
obs_actions = np.concatenate([observations, actions], axis=1)
latents = np.concatenate([p['agent_infos']["latents"] for p in paths])
if self.noisify_traj_coef:
obs_actions += np.random.normal(loc=0.0,
scale=float(np.mean(np.abs(obs_actions))) * self.noisify_traj_coef,
size=np.shape(obs_actions))
self._regressor.fit(obs_actions, latents.reshape((-1, self.latent_dim))) # why reshape??
logger.log('done fitting the regressor')
示例4: __init__
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def __init__(
self,
# some extra logging. What of this could be included in the sampler?
log_individual_latents=False, # to log the progress of each individual latent
log_deterministic=False, # log the performance of the policy with std=0 (for each latent separate)
log_hierarchy=False,
bonus_evaluator=None,
reward_coef_bonus=None,
latent_regressor=None,
reward_regressor_mi=0, # kwargs to the sampler (that also processes)
switch_lat_every=0,
**kwargs):
# some logging
self.log_individual_latents = log_individual_latents
self.log_deterministic = log_deterministic
self.log_hierarchy = log_hierarchy
sampler_cls = BatchSampler_snn
sampler_args = {'switch_lat_every': switch_lat_every,
'latent_regressor': latent_regressor,
'bonus_evaluator': bonus_evaluator,
'reward_coef_bonus': reward_coef_bonus,
'reward_regressor_mi': reward_regressor_mi,
}
super(NPO_snn, self).__init__(sampler_cls=sampler_cls, sampler_args=sampler_args, **kwargs)
示例5: train
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self):
self.start_worker()
for itr in range(self.current_itr, self.n_itr):
with logger.prefix('itr #%d | ' % itr):
logger.log('Obtaining samples...')
paths = self.sampler.obtain_samples(itr)
logger.log('Processing samples...')
samples_data = self.sampler.process_samples(itr, paths)
logger.log('Logging diagnostics...')
self.log_diagnostics(paths)
logger.log('Optimizing policy...')
self.optimize_policy(itr, samples_data)
logger.log('Saving snapshot...')
params = self.get_itr_snapshot(itr, samples_data)
self.current_itr = itr + 1
params['algo'] = self
# Save the trajectories into the param
if self.store_paths:
params['paths'] = samples_data['paths']
logger.save_itr_params(itr, params)
logger.log('Saved')
logger.dump_tabular(with_prefix=False)
if self.plot:
self.update_plot()
if self.pause_for_plot:
input('Plotting evaluation run: Press Enter to '
'continue...')
self.shutdown_worker()
示例6: evaluate
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def evaluate(self, epoch, pool):
logger.log("Collecting samples for evaluation")
paths = rollouts(self.env, self.policy,
self.max_path_length, self.n_eval_episodes)
average_discounted_return = np.mean(
[special.discount_return(path["rewards"], self.discount) for path in paths]
)
returns = [sum(path["rewards"]) for path in paths]
all_qs = np.concatenate(self.q_averages)
all_ys = np.concatenate(self.y_averages)
average_q_loss = np.mean(self.qf_loss_averages)
qfun_param_norm = self.qf.get_param_values().norm()
logger.record_tabular('Epoch', epoch)
logger.record_tabular('AverageReturn', np.mean(returns))
logger.record_tabular('StdReturn', np.std(returns))
logger.record_tabular('MaxReturn', np.max(returns))
logger.record_tabular('MinReturn', np.min(returns))
logger.record_tabular('AverageDiscountedReturn', average_discounted_return)
logger.record_tabular('AverageQLoss', average_q_loss)
logger.record_tabular('AverageQ', np.mean(all_qs))
logger.record_tabular('AverageAbsQ', np.mean(np.abs(all_qs)))
logger.record_tabular('AverageY', np.mean(all_ys))
logger.record_tabular('AverageAbsY', np.mean(np.abs(all_ys)))
logger.record_tabular('AverageAbsQYDiff', np.mean(np.abs(all_qs - all_ys)))
logger.record_tabular('QFunParamNorm', qfun_param_norm)
self.qf_loss_averages = []
self.policy_surr_averages = []
self.q_averages = []
self.y_averages = []
示例7: train
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self):
if self.init_pol_params is not None:
self.policy.set_param_values(self.init_pol_params)
if self.init_irl_params is not None:
self.irl_model.set_params(self.init_irl_params)
self.start_worker()
start_time = time.time()
returns = []
for itr in range(self.start_itr, self.n_itr):
itr_start_time = time.time()
with logger.prefix('itr #%d | ' % itr):
logger.log('Obtaining samples...')
paths = self.sampler.obtain_samples(itr)
logger.log('Processing samples...')
# Update the Reward function
paths = self.compute_irl(paths, itr=itr)
# returns.append(self.log_avg_returns(paths))
samples_data = self.sampler.process_samples(itr, paths)
logger.log('Logging diagnostics...')
self.log_diagnostics(paths)
logger.log('Optimizing policy...')
self.optimize_policy(itr, samples_data)
logger.log('Saving snapshot...')
params = self.get_itr_snapshot(itr, samples_data) # , **kwargs)
if self.store_paths:
params['paths'] = samples_data['paths']
logger.save_itr_params(itr, params)
logger.log('Saved')
logger.record_tabular('Time', time.time() - start_time)
logger.record_tabular('ItrTime', time.time() - itr_start_time)
logger.dump_tabular(with_prefix=False)
if self.plot:
self.update_plot()
if self.pause_for_plot:
input('Plotting evaluation run: Press Enter to '
'continue...')
self.shutdown_worker()
return
示例8: populate_task
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def populate_task(env, policy, scope=None):
logger.log("Populating workers...")
if singleton_pool.n_parallel > 1:
singleton_pool.run_each(
_worker_populate_task,
[(pickle.dumps(env), pickle.dumps(policy), scope)] * singleton_pool.n_parallel
)
else:
# avoid unnecessary copying
G = _get_scoped_G(singleton_pool.G, scope)
G.env = env
G.policy = policy
logger.log("Populated")
示例9: __init__
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True,
force_reset=False):
if log_dir is None:
if logger.get_snapshot_dir() is None:
logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
else:
log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
Serializable.quick_init(self, locals())
env = gym.envs.make(env_name)
self.env = env
self.env_id = env.spec.id
assert not (not record_log and record_video)
if log_dir is None or record_log is False:
self.monitoring = False
else:
if not record_video:
video_schedule = NoVideoSchedule()
else:
if video_schedule is None:
video_schedule = CappedCubicVideoSchedule()
self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True)
self.monitoring = True
self._observation_space = convert_gym_space(env.observation_space)
logger.log("observation space: {}".format(self._observation_space))
self._action_space = convert_gym_space(env.action_space)
logger.log("action space: {}".format(self._action_space))
self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
self._log_dir = log_dir
self._force_reset = force_reset
示例10: advance_until_terminate
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def advance_until_terminate(self):
skip = self.get_skip_flag()
n_skips = 0
old_top = self._top
new_top = (old_top + 1) % self._max_pool_size
while skip and old_top != new_top and n_skips < self._max_skip_episode:
n_skips += 1
self.advance()
while not self._initials[self._top]:
self.advance()
skip = self.get_skip_flag()
new_top = self._top
logger.log("add_sample, skipped %d episodes, top=%d->%d"%(
n_skips, old_top, new_top))
示例11: train
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self, sess=None):
created_session = True if (sess is None) else False
if sess is None:
sess = tf.Session()
sess.__enter__()
sess.run(tf.global_variables_initializer())
self.start_worker()
start_time = time.time()
for itr in range(self.start_itr, self.n_itr):
itr_start_time = time.time()
with logger.prefix('itr #%d | ' % itr):
logger.log("Obtaining samples...")
paths = self.obtain_samples(itr)
logger.log("Processing samples...")
samples_data = self.process_samples(itr, paths)
logger.log("Logging diagnostics...")
self.log_diagnostics(paths)
logger.log("Optimizing policy...")
self.optimize_policy(itr, samples_data)
logger.log("Saving snapshot...")
params = self.get_itr_snapshot(itr, samples_data) # , **kwargs)
if self.store_paths:
params["paths"] = samples_data["paths"]
logger.save_itr_params(itr, params)
logger.log("Saved")
logger.record_tabular('Time', time.time() - start_time)
logger.record_tabular('ItrTime', time.time() - itr_start_time)
logger.dump_tabular(with_prefix=False)
if self.plot:
rollout(self.env, self.policy, animated=True, max_path_length=self.max_path_length)
if self.pause_for_plot:
input("Plotting evaluation run: Press Enter to "
"continue...")
self.shutdown_worker()
if created_session:
sess.close()
示例12: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations", "actions", "advantages"
))
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
logger.log("Computing loss before")
loss_before = self.optimizer.loss(all_input_values)
logger.log("Computing KL before")
mean_kl_before = self.optimizer.constraint_val(all_input_values)
logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
logger.log("Computing KL after")
mean_kl = self.optimizer.constraint_val(all_input_values)
logger.log("Computing loss after")
loss_after = self.optimizer.loss(all_input_values)
logger.record_tabular('LossBefore', loss_before)
logger.record_tabular('LossAfter', loss_after)
logger.record_tabular('MeanKLBefore', mean_kl_before)
logger.record_tabular('MeanKL', mean_kl)
logger.record_tabular('dLoss', loss_before - loss_after)
return dict()
示例13: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations","observations_next", "actions", "advantages",
))
obs = samples_data["observations"]
empwr = self.empw.eval(obs)
ep=[]
for i in range(0,len(empwr)):
ep.append(empwr[i][0])
all_input_values+=(np.array(ep).reshape(-1,1)),
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
logger.log("Computing loss before")
loss_before = self.optimizer.loss(all_input_values)
logger.log("Computing KL before")
mean_kl_before = self.optimizer.constraint_val(all_input_values)
logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
logger.log("Computing KL after")
mean_kl = self.optimizer.constraint_val(all_input_values)
logger.log("Computing loss after")
loss_after = self.optimizer.loss(all_input_values)
logger.record_tabular('LossBefore', loss_before)
logger.record_tabular('LossAfter', loss_after)
logger.record_tabular('MeanKLBefore', mean_kl_before)
logger.record_tabular('MeanKL', mean_kl)
logger.record_tabular('dLoss', loss_before - loss_after)
return dict()
示例14: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations", "actions", "advantages"
))
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
# logger.log("Computing loss before")
loss_before = self.optimizer.loss(all_input_values)
# logger.log("Computing KL before")
mean_kl_before = self.optimizer.constraint_val(all_input_values)
# logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
# logger.log("Computing KL after")
mean_kl = self.optimizer.constraint_val(all_input_values)
# logger.log("Computing loss after")
loss_after = self.optimizer.loss(all_input_values)
# logger.record_tabular('LossBefore', loss_before)
# logger.record_tabular('LossAfter', loss_after)
# logger.record_tabular('MeanKLBefore', mean_kl_before)
# logger.record_tabular('MeanKL', mean_kl)
# logger.record_tabular('dLoss', loss_before - loss_after)
return dict()
示例15: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations", "actions", "advantages"
))
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
# logger.log("Computing loss before")
# loss_before = self.optimizer.loss(all_input_values)
# logger.log("Computing KL before")
# mean_kl_before = self.optimizer.constraint_val(all_input_values)
# logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
# logger.log("Computing KL after")
# mean_kl = self.optimizer.constraint_val(all_input_values)
# logger.log("Computing loss after")
# loss_after = self.optimizer.loss(all_input_values)
# logger.record_tabular('LossBefore', loss_before)
# logger.record_tabular('LossAfter', loss_after)
# logger.record_tabular('MeanKLBefore', mean_kl_before)
# logger.record_tabular('MeanKL', mean_kl)
# logger.record_tabular('dLoss', loss_before - loss_after)
return dict()