本文整理汇总了Python中rllab.misc.logger.record_tabular方法的典型用法代码示例。如果您正苦于以下问题:Python logger.record_tabular方法的具体用法?Python logger.record_tabular怎么用?Python logger.record_tabular使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.misc.logger
的用法示例。
在下文中一共展示了logger.record_tabular方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):
# logger.log("optimizing policy")
inputs = ext.extract(
samples_data,
"observations", "actions", "advantages"
)
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
inputs += tuple(state_info_list)
if self.policy.recurrent:
inputs += (samples_data["valids"],)
# dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
# loss_before = self.optimizer.loss(inputs)
self.optimizer.optimize(inputs)
# loss_after = self.optimizer.loss(inputs)
# logger.record_tabular("LossBefore", loss_before)
# logger.record_tabular("LossAfter", loss_after)
# mean_kl, max_kl = self.opt_info['f_kl'](*(list(inputs) + dist_info_list))
# logger.record_tabular('MeanKL', mean_kl)
# logger.record_tabular('MaxKL', max_kl)
示例2: fit
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
if self._normalize_inputs:
# recompute normalizing constants for inputs
self._x_mean_var.set_value(np.mean(xs, axis=(0, 1), keepdims=True))
self._x_std_var.set_value(np.std(xs, axis=(0, 1), keepdims=True) + 1e-8)
if self._use_trust_region:
old_prob = self._f_prob(xs)
inputs = [xs, ys, old_prob]
else:
inputs = [xs, ys]
loss_before = self._optimizer.loss(inputs)
if self._name:
prefix = self._name + "_"
else:
prefix = ""
logger.record_tabular(prefix + 'LossBefore', loss_before)
self._optimizer.optimize(inputs)
loss_after = self._optimizer.loss(inputs)
logger.record_tabular(prefix + 'LossAfter', loss_after)
logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
示例3: fit
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
if self._normalize_inputs:
# recompute normalizing constants for inputs
self._x_mean_var.set_value(np.mean(xs, axis=(0, 1), keepdims=True)) #the mean taken over batches AND steps
self._x_std_var.set_value(np.std(xs, axis=(0, 1), keepdims=True) + 1e-8)
if self._use_trust_region:
old_p = self._f_p(xs)
inputs = [xs, ys, old_p]
else:
inputs = [xs, ys]
loss_before = self._optimizer.loss(inputs)
if self._name:
prefix = self._name + "_"
else:
prefix = ""
logger.record_tabular(prefix + 'LossBefore', loss_before)
self._optimizer.optimize(inputs)
loss_after = self._optimizer.loss(inputs)
logger.record_tabular(prefix + 'LossAfter', loss_after)
logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
示例4: fit
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
if self._normalize_inputs:
# recompute normalizing constants for inputs
self._x_mean_var.set_value(np.mean(xs, axis=0, keepdims=True))
self._x_std_var.set_value(np.std(xs, axis=0, keepdims=True) + 1e-8)
old_p = self._f_p(xs) #this is only needed for TR or for logging the mean KL
if self._use_trust_region:
inputs = [xs, ys, old_p]
else:
inputs = [xs, ys]
loss_before = self._optimizer.loss(inputs)
if self._name:
prefix = self._name + "_"
else:
prefix = ""
mean_kl_before = self._mean_kl(xs, old_p)
logger.record_tabular(prefix + 'MeanKL_Before', mean_kl_before)
logger.record_tabular(prefix + 'LossBefore', loss_before)
self._optimizer.optimize(inputs)
loss_after = self._optimizer.loss(inputs)
mean_kl_after = self._mean_kl(xs, old_p)
logger.record_tabular(prefix + 'LossAfter', loss_after)
logger.record_tabular(prefix + 'MeanKL_After', mean_kl_after)
logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
示例5: log_diagnostics
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def log_diagnostics(self, paths, *args, **kwargs):
# we call here any logging related to the follow, strip the maze obs and call log_diag with the stripped paths
# we need to log the purely follow reward!!
with logger.tabular_prefix('Follow_'):
follow_undiscounted_returns = [sum(path['env_infos']['outer_rew']) for path in paths]
logger.record_tabular_misc_stat('Return', follow_undiscounted_returns, placement='front')
dist_undiscounted_returns = [sum(path['env_infos']['dist_rew']) for path in paths]
logger.record_tabular_misc_stat('DistReturn', dist_undiscounted_returns, placement='front')
stripped_paths = []
for path in paths:
stripped_path = {}
for k, v in path.items():
stripped_path[k] = v
stripped_path['observations'] = \
stripped_path['observations'][:, :self.wrapped_env.observation_space.flat_dim]
# this breaks if the obs of the robot are d>1 dimensional (not a vector)
stripped_paths.append(stripped_path)
with logger.tabular_prefix('wrapped_'):
if 'env_infos' in paths[0].keys() and 'inner_rew' in paths[0]['env_infos'].keys():
wrapped_undiscounted_return = np.mean([np.sum(path['env_infos']['inner_rew']) for path in paths])
logger.record_tabular('AverageReturn', wrapped_undiscounted_return)
self.wrapped_env.log_diagnostics(stripped_paths) # see swimmer_env.py for a scketch of the maze plotting!
示例6: evaluate
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def evaluate(self, epoch, pool):
logger.log("Collecting samples for evaluation")
paths = rollouts(self.env, self.policy,
self.max_path_length, self.n_eval_episodes)
average_discounted_return = np.mean(
[special.discount_return(path["rewards"], self.discount) for path in paths]
)
returns = [sum(path["rewards"]) for path in paths]
all_qs = np.concatenate(self.q_averages)
all_ys = np.concatenate(self.y_averages)
average_q_loss = np.mean(self.qf_loss_averages)
qfun_param_norm = self.qf.get_param_values().norm()
logger.record_tabular('Epoch', epoch)
logger.record_tabular('AverageReturn', np.mean(returns))
logger.record_tabular('StdReturn', np.std(returns))
logger.record_tabular('MaxReturn', np.max(returns))
logger.record_tabular('MinReturn', np.min(returns))
logger.record_tabular('AverageDiscountedReturn', average_discounted_return)
logger.record_tabular('AverageQLoss', average_q_loss)
logger.record_tabular('AverageQ', np.mean(all_qs))
logger.record_tabular('AverageAbsQ', np.mean(np.abs(all_qs)))
logger.record_tabular('AverageY', np.mean(all_ys))
logger.record_tabular('AverageAbsY', np.mean(np.abs(all_ys)))
logger.record_tabular('AverageAbsQYDiff', np.mean(np.abs(all_qs - all_ys)))
logger.record_tabular('QFunParamNorm', qfun_param_norm)
self.qf_loss_averages = []
self.policy_surr_averages = []
self.q_averages = []
self.y_averages = []
示例7: compute_irl
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_irl(self, paths, itr=0):
# Zero the original reward signal
if self.no_reward:
tot_rew = 0
for path in paths:
tot_rew += np.sum(path['rewards'])
path['rewards'] *= 0
logger.record_tabular('OriginalTaskAverageReturn', tot_rew/float(len(paths)))
if self.irl_model_wt <=0:
return paths
if self.train_irl:
max_itrs = self.discrim_train_itrs
mean_loss = self.irl_model.fit(
paths, policy=self.policy, itr=itr,
batch_size=self.discrim_batch_size,
max_itrs=max_itrs, logger=logger)
logger.record_tabular('IRLLoss', mean_loss)
self.irl_params = self.irl_model.get_params()
estimated_rewards = self.irl_model.eval(paths, gamma=self.discount, itr=itr)
logger.record_tabular('IRLRewardMean', np.mean(np.concatenate(estimated_rewards)))
logger.record_tabular('IRLRewardMax', np.max(np.concatenate(estimated_rewards)))
logger.record_tabular('IRLRewardMin', np.min(np.concatenate(estimated_rewards)))
# Replace the original reward signal with learned reward signal
# This will be used by agents to learn policy
if self.irl_model.score_trajectories:
# TODO: should I add to reward here or after advantage computation?
for i, path in enumerate(paths):
path['rewards'][-1] += self.irl_model_wt * estimated_rewards[i]
else:
for i, path in enumerate(paths):
path['rewards'] += self.irl_model_wt * estimated_rewards[i]
return paths
示例8: train
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def train(self):
if self.init_pol_params is not None:
self.policy.set_param_values(self.init_pol_params)
if self.init_irl_params is not None:
self.irl_model.set_params(self.init_irl_params)
self.start_worker()
start_time = time.time()
returns = []
for itr in range(self.start_itr, self.n_itr):
itr_start_time = time.time()
with logger.prefix('itr #%d | ' % itr):
logger.log('Obtaining samples...')
paths = self.sampler.obtain_samples(itr)
logger.log('Processing samples...')
# Update the Reward function
paths = self.compute_irl(paths, itr=itr)
# returns.append(self.log_avg_returns(paths))
samples_data = self.sampler.process_samples(itr, paths)
logger.log('Logging diagnostics...')
self.log_diagnostics(paths)
logger.log('Optimizing policy...')
self.optimize_policy(itr, samples_data)
logger.log('Saving snapshot...')
params = self.get_itr_snapshot(itr, samples_data) # , **kwargs)
if self.store_paths:
params['paths'] = samples_data['paths']
logger.save_itr_params(itr, params)
logger.log('Saved')
logger.record_tabular('Time', time.time() - start_time)
logger.record_tabular('ItrTime', time.time() - itr_start_time)
logger.dump_tabular(with_prefix=False)
if self.plot:
self.update_plot()
if self.pause_for_plot:
input('Plotting evaluation run: Press Enter to '
'continue...')
self.shutdown_worker()
return
示例9: log_diagnostics
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def log_diagnostics(self, paths):
n_goal = len(self.goal_positions)
goal_reached = [False] * n_goal
for path in paths:
last_obs = path["observations"][-1]
for i, goal in enumerate(self.goal_positions):
if np.linalg.norm(last_obs - goal) < self.goal_threshold:
goal_reached[i] = True
logger.record_tabular('env:goals_reached', goal_reached.count(True))
示例10: train
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def train(self, sess=None):
created_session = True if (sess is None) else False
if sess is None:
sess = tf.Session()
sess.__enter__()
sess.run(tf.global_variables_initializer())
self.start_worker()
start_time = time.time()
for itr in range(self.start_itr, self.n_itr):
itr_start_time = time.time()
with logger.prefix('itr #%d | ' % itr):
logger.log("Obtaining samples...")
paths = self.obtain_samples(itr)
logger.log("Processing samples...")
samples_data = self.process_samples(itr, paths)
logger.log("Logging diagnostics...")
self.log_diagnostics(paths)
logger.log("Optimizing policy...")
self.optimize_policy(itr, samples_data)
logger.log("Saving snapshot...")
params = self.get_itr_snapshot(itr, samples_data) # , **kwargs)
if self.store_paths:
params["paths"] = samples_data["paths"]
logger.save_itr_params(itr, params)
logger.log("Saved")
logger.record_tabular('Time', time.time() - start_time)
logger.record_tabular('ItrTime', time.time() - itr_start_time)
logger.dump_tabular(with_prefix=False)
if self.plot:
rollout(self.env, self.policy, animated=True, max_path_length=self.max_path_length)
if self.pause_for_plot:
input("Plotting evaluation run: Press Enter to "
"continue...")
self.shutdown_worker()
if created_session:
sess.close()
示例11: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations", "actions", "advantages"
))
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
logger.log("Computing loss before")
loss_before = self.optimizer.loss(all_input_values)
logger.log("Computing KL before")
mean_kl_before = self.optimizer.constraint_val(all_input_values)
logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
logger.log("Computing KL after")
mean_kl = self.optimizer.constraint_val(all_input_values)
logger.log("Computing loss after")
loss_after = self.optimizer.loss(all_input_values)
logger.record_tabular('LossBefore', loss_before)
logger.record_tabular('LossAfter', loss_after)
logger.record_tabular('MeanKLBefore', mean_kl_before)
logger.record_tabular('MeanKL', mean_kl)
logger.record_tabular('dLoss', loss_before - loss_after)
return dict()
示例12: compute_irl
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_irl(self, paths, itr=0):
r=0
if self.no_reward:
tot_rew = 0
for path in paths:
tot_rew += np.sum(path['rewards'])
path['rewards'] *= 0
logger.record_tabular('OriginalTaskAverageReturn', tot_rew/float(len(paths)))
r=tot_rew/float(len(paths))
if self.irl_model_wt <=0:
return paths
if self.train_irl:
max_itrs = self.discrim_train_itrs
lr=1e-3
mean_loss_irl= self.irl_model.fit(paths, policy=self.policy,empw_model=self.empw,t_empw_model=self.tempw, itr=itr, max_itrs=max_itrs, lr=lr,
logger=logger)
logger.record_tabular('IRLLoss', mean_loss_irl)
self.__irl_params = self.irl_model.get_params()
else:
self.irl_model.next_state(paths)
probs = self.irl_model.eval(paths,empw_model=self.empw,t_empw_model=self.tempw, gamma=self.discount, itr=itr)
logger.record_tabular('IRLRewardMean', np.mean(probs))
logger.record_tabular('IRLRewardMax', np.max(probs))
logger.record_tabular('IRLRewardMin', np.min(probs))
if self.irl_model.score_trajectories:
# TODO: should I add to reward here or after advantage computation?
for i, path in enumerate(paths):
path['rewards'][-1] += self.irl_model_wt * probs[i]
else:
for i, path in enumerate(paths):
path['rewards'] += self.irl_model_wt * probs[i]
return paths,r
示例13: compute_qvar
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_qvar(self, paths, itr=0):
if self.train_qvar:
max_itrs = self.discrim_train_itrs
lr=1e-3
mean_loss_q = self.qvar_model.fit(paths, itr=itr, max_itrs=max_itrs, lr=lr,logger=logger)
logger.record_tabular('Qvar_Loss', mean_loss_q)
self.__qvar_params = self.qvar_model.get_params()
示例14: compute_empw
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_empw(self, paths, itr=0):
if self.train_empw:
max_itrs = self.discrim_train_itrs
lr=1e-3
mean_loss_empw = self.empw.fit(paths, irl_model=self.irl_model, tempw=self.tempw, policy=self.policy, qvar_model=self.qvar_model, itr=itr, max_itrs=max_itrs, lr=lr,logger=logger)
logger.record_tabular('EmpwLoss', mean_loss_empw)
self.__empw_params = self.empw.get_params()
示例15: optimize_policy
# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):
all_input_values = tuple(ext.extract(
samples_data,
"observations","observations_next", "actions", "advantages",
))
obs = samples_data["observations"]
empwr = self.empw.eval(obs)
ep=[]
for i in range(0,len(empwr)):
ep.append(empwr[i][0])
all_input_values+=(np.array(ep).reshape(-1,1)),
agent_infos = samples_data["agent_infos"]
state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
all_input_values += tuple(state_info_list) + tuple(dist_info_list)
if self.policy.recurrent:
all_input_values += (samples_data["valids"],)
logger.log("Computing loss before")
loss_before = self.optimizer.loss(all_input_values)
logger.log("Computing KL before")
mean_kl_before = self.optimizer.constraint_val(all_input_values)
logger.log("Optimizing")
self.optimizer.optimize(all_input_values)
logger.log("Computing KL after")
mean_kl = self.optimizer.constraint_val(all_input_values)
logger.log("Computing loss after")
loss_after = self.optimizer.loss(all_input_values)
logger.record_tabular('LossBefore', loss_before)
logger.record_tabular('LossAfter', loss_after)
logger.record_tabular('MeanKLBefore', mean_kl_before)
logger.record_tabular('MeanKL', mean_kl)
logger.record_tabular('dLoss', loss_before - loss_after)
return dict()