当前位置: 首页>>代码示例>>Python>>正文


Python logger.record_tabular方法代码示例

本文整理汇总了Python中rllab.misc.logger.record_tabular方法的典型用法代码示例。如果您正苦于以下问题:Python logger.record_tabular方法的具体用法?Python logger.record_tabular怎么用?Python logger.record_tabular使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rllab.misc.logger的用法示例。


在下文中一共展示了logger.record_tabular方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):
        # logger.log("optimizing policy")
        inputs = ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        )
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        inputs += tuple(state_info_list)
        if self.policy.recurrent:
            inputs += (samples_data["valids"],)
        # dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        # loss_before = self.optimizer.loss(inputs)
        self.optimizer.optimize(inputs)
        # loss_after = self.optimizer.loss(inputs)
        # logger.record_tabular("LossBefore", loss_before)
        # logger.record_tabular("LossAfter", loss_after)

        # mean_kl, max_kl = self.opt_info['f_kl'](*(list(inputs) + dist_info_list))
        # logger.record_tabular('MeanKL', mean_kl)
        # logger.record_tabular('MaxKL', max_kl) 
开发者ID:thanard,项目名称:me-trpo,代码行数:23,代码来源:vpg.py

示例2: fit

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(np.mean(xs, axis=(0, 1), keepdims=True))
            self._x_std_var.set_value(np.std(xs, axis=(0, 1), keepdims=True) + 1e-8)
        if self._use_trust_region:
            old_prob = self._f_prob(xs)
            inputs = [xs, ys, old_prob]
        else:
            inputs = [xs, ys]
        loss_before = self._optimizer.loss(inputs)
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        logger.record_tabular(prefix + 'LossBefore', loss_before)
        self._optimizer.optimize(inputs)
        loss_after = self._optimizer.loss(inputs)
        logger.record_tabular(prefix + 'LossAfter', loss_after)
        logger.record_tabular(prefix + 'dLoss', loss_before - loss_after) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:22,代码来源:categorical_recurrent_regressor.py

示例3: fit

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(np.mean(xs, axis=(0, 1), keepdims=True)) #the mean taken over batches AND steps
            self._x_std_var.set_value(np.std(xs, axis=(0, 1), keepdims=True) + 1e-8)
        if self._use_trust_region:
            old_p = self._f_p(xs)
            inputs = [xs, ys, old_p]
        else:
            inputs = [xs, ys]
        loss_before = self._optimizer.loss(inputs)
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        logger.record_tabular(prefix + 'LossBefore', loss_before)
        self._optimizer.optimize(inputs)
        loss_after = self._optimizer.loss(inputs)
        logger.record_tabular(prefix + 'LossAfter', loss_after)
        logger.record_tabular(prefix + 'dLoss', loss_before - loss_after) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:22,代码来源:bernoulli_recurrent_regressor.py

示例4: fit

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def fit(self, xs, ys):
        if self._normalize_inputs:
            # recompute normalizing constants for inputs
            self._x_mean_var.set_value(np.mean(xs, axis=0, keepdims=True))
            self._x_std_var.set_value(np.std(xs, axis=0, keepdims=True) + 1e-8)
        old_p = self._f_p(xs)  #this is only needed for TR or for logging the mean KL
        if self._use_trust_region:
            inputs = [xs, ys, old_p]
        else:
            inputs = [xs, ys]
        loss_before = self._optimizer.loss(inputs)
        if self._name:
            prefix = self._name + "_"
        else:
            prefix = ""
        mean_kl_before = self._mean_kl(xs, old_p)
        logger.record_tabular(prefix + 'MeanKL_Before', mean_kl_before)
        logger.record_tabular(prefix + 'LossBefore', loss_before)
        self._optimizer.optimize(inputs)
        loss_after = self._optimizer.loss(inputs)
        mean_kl_after = self._mean_kl(xs, old_p)
        logger.record_tabular(prefix + 'LossAfter', loss_after)
        logger.record_tabular(prefix + 'MeanKL_After', mean_kl_after)
        logger.record_tabular(prefix + 'dLoss', loss_before - loss_after) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:26,代码来源:bernoulli_mlp_regressor.py

示例5: log_diagnostics

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def log_diagnostics(self, paths, *args, **kwargs):
        # we call here any logging related to the follow, strip the maze obs and call log_diag with the stripped paths
        # we need to log the purely follow reward!!
        with logger.tabular_prefix('Follow_'):
            follow_undiscounted_returns = [sum(path['env_infos']['outer_rew']) for path in paths]
            logger.record_tabular_misc_stat('Return', follow_undiscounted_returns, placement='front')
            dist_undiscounted_returns = [sum(path['env_infos']['dist_rew']) for path in paths]
            logger.record_tabular_misc_stat('DistReturn', dist_undiscounted_returns, placement='front')
        stripped_paths = []
        for path in paths:
            stripped_path = {}
            for k, v in path.items():
                stripped_path[k] = v
            stripped_path['observations'] = \
                stripped_path['observations'][:, :self.wrapped_env.observation_space.flat_dim]
            #  this breaks if the obs of the robot are d>1 dimensional (not a vector)
            stripped_paths.append(stripped_path)
        with logger.tabular_prefix('wrapped_'):
            if 'env_infos' in paths[0].keys() and 'inner_rew' in paths[0]['env_infos'].keys():
                wrapped_undiscounted_return = np.mean([np.sum(path['env_infos']['inner_rew']) for path in paths])
                logger.record_tabular('AverageReturn', wrapped_undiscounted_return)
            self.wrapped_env.log_diagnostics(stripped_paths)  # see swimmer_env.py for a scketch of the maze plotting! 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:24,代码来源:follow_env.py

示例6: evaluate

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def evaluate(self, epoch, pool):
        logger.log("Collecting samples for evaluation")

        paths = rollouts(self.env, self.policy,
                         self.max_path_length, self.n_eval_episodes)

        average_discounted_return = np.mean(
            [special.discount_return(path["rewards"], self.discount) for path in paths]
        )

        returns = [sum(path["rewards"]) for path in paths]

        all_qs = np.concatenate(self.q_averages)
        all_ys = np.concatenate(self.y_averages)

        average_q_loss = np.mean(self.qf_loss_averages)

        qfun_param_norm = self.qf.get_param_values().norm()

        logger.record_tabular('Epoch', epoch)
        logger.record_tabular('AverageReturn', np.mean(returns))
        logger.record_tabular('StdReturn', np.std(returns))
        logger.record_tabular('MaxReturn', np.max(returns))
        logger.record_tabular('MinReturn', np.min(returns))
        logger.record_tabular('AverageDiscountedReturn', average_discounted_return)
        logger.record_tabular('AverageQLoss', average_q_loss)
        logger.record_tabular('AverageQ', np.mean(all_qs))
        logger.record_tabular('AverageAbsQ', np.mean(np.abs(all_qs)))
        logger.record_tabular('AverageY', np.mean(all_ys))
        logger.record_tabular('AverageAbsY', np.mean(np.abs(all_ys)))
        logger.record_tabular('AverageAbsQYDiff', np.mean(np.abs(all_qs - all_ys)))
        logger.record_tabular('QFunParamNorm', qfun_param_norm)

        self.qf_loss_averages = []
        self.policy_surr_averages = []
        self.q_averages = []
        self.y_averages = [] 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:39,代码来源:softq.py

示例7: compute_irl

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_irl(self, paths, itr=0):

        # Zero the original reward signal
        if self.no_reward:
            tot_rew = 0
            for path in paths:
                tot_rew += np.sum(path['rewards'])
                path['rewards'] *= 0
            logger.record_tabular('OriginalTaskAverageReturn', tot_rew/float(len(paths)))

        if self.irl_model_wt <=0:
            return paths

        if self.train_irl:
            max_itrs = self.discrim_train_itrs
            mean_loss = self.irl_model.fit(
                paths, policy=self.policy, itr=itr,
                batch_size=self.discrim_batch_size,
                max_itrs=max_itrs, logger=logger)

            logger.record_tabular('IRLLoss', mean_loss)
            self.irl_params = self.irl_model.get_params()

        estimated_rewards = self.irl_model.eval(paths, gamma=self.discount, itr=itr)

        logger.record_tabular('IRLRewardMean', np.mean(np.concatenate(estimated_rewards)))
        logger.record_tabular('IRLRewardMax', np.max(np.concatenate(estimated_rewards)))
        logger.record_tabular('IRLRewardMin', np.min(np.concatenate(estimated_rewards)))

        # Replace the original reward signal with learned reward signal
        # This will be used by agents to learn policy
        if self.irl_model.score_trajectories:
            # TODO: should I add to reward here or after advantage computation?
            for i, path in enumerate(paths):
                path['rewards'][-1] += self.irl_model_wt * estimated_rewards[i]
        else:
            for i, path in enumerate(paths):
                path['rewards'] += self.irl_model_wt * estimated_rewards[i]
        return paths 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:41,代码来源:irl_batch_polopt.py

示例8: train

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def train(self):
        if self.init_pol_params is not None:
            self.policy.set_param_values(self.init_pol_params)
        if self.init_irl_params is not None:
            self.irl_model.set_params(self.init_irl_params)
        self.start_worker()
        start_time = time.time()

        returns = []
        for itr in range(self.start_itr, self.n_itr):
            itr_start_time = time.time()
            with logger.prefix('itr #%d | ' % itr):
                logger.log('Obtaining samples...')
                paths = self.sampler.obtain_samples(itr)
                logger.log('Processing samples...')
                # Update the Reward function
                paths = self.compute_irl(paths, itr=itr)
                # returns.append(self.log_avg_returns(paths))
                samples_data = self.sampler.process_samples(itr, paths)

                logger.log('Logging diagnostics...')
                self.log_diagnostics(paths)
                logger.log('Optimizing policy...')
                self.optimize_policy(itr, samples_data)
                logger.log('Saving snapshot...')
                params = self.get_itr_snapshot(itr, samples_data)  # , **kwargs)
                if self.store_paths:
                    params['paths'] = samples_data['paths']
                logger.save_itr_params(itr, params)
                logger.log('Saved')
                logger.record_tabular('Time', time.time() - start_time)
                logger.record_tabular('ItrTime', time.time() - itr_start_time)
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    self.update_plot()
                    if self.pause_for_plot:
                        input('Plotting evaluation run: Press Enter to '
                              'continue...')
        self.shutdown_worker()
        return 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:42,代码来源:irl_batch_polopt.py

示例9: log_diagnostics

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def log_diagnostics(self, paths):
        n_goal = len(self.goal_positions)
        goal_reached = [False] * n_goal

        for path in paths:
            last_obs = path["observations"][-1]
            for i, goal in enumerate(self.goal_positions):
                if np.linalg.norm(last_obs - goal) < self.goal_threshold:
                    goal_reached[i] = True

        logger.record_tabular('env:goals_reached', goal_reached.count(True)) 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:13,代码来源:multigoal_env.py

示例10: train

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def train(self, sess=None):
        created_session = True if (sess is None) else False
        if sess is None:
            sess = tf.Session()
            sess.__enter__()

        sess.run(tf.global_variables_initializer())
        self.start_worker()
        start_time = time.time()
        for itr in range(self.start_itr, self.n_itr):
            itr_start_time = time.time()
            with logger.prefix('itr #%d | ' % itr):
                logger.log("Obtaining samples...")
                paths = self.obtain_samples(itr)
                logger.log("Processing samples...")
                samples_data = self.process_samples(itr, paths)
                logger.log("Logging diagnostics...")
                self.log_diagnostics(paths)
                logger.log("Optimizing policy...")
                self.optimize_policy(itr, samples_data)
                logger.log("Saving snapshot...")
                params = self.get_itr_snapshot(itr, samples_data)  # , **kwargs)
                if self.store_paths:
                    params["paths"] = samples_data["paths"]
                logger.save_itr_params(itr, params)
                logger.log("Saved")
                logger.record_tabular('Time', time.time() - start_time)
                logger.record_tabular('ItrTime', time.time() - itr_start_time)
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    rollout(self.env, self.policy, animated=True, max_path_length=self.max_path_length)
                    if self.pause_for_plot:
                        input("Plotting evaluation run: Press Enter to "
                              "continue...")
        self.shutdown_worker()
        if created_session:
            sess.close() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:39,代码来源:batch_polopt.py

示例11: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):
        all_input_values = tuple(ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        ))
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        logger.log("Computing loss before")
        loss_before = self.optimizer.loss(all_input_values)
        logger.log("Computing KL before")
        mean_kl_before = self.optimizer.constraint_val(all_input_values)
        logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        logger.log("Computing KL after")
        mean_kl = self.optimizer.constraint_val(all_input_values)
        logger.log("Computing loss after")
        loss_after = self.optimizer.loss(all_input_values)
        logger.record_tabular('LossBefore', loss_before)
        logger.record_tabular('LossAfter', loss_after)
        logger.record_tabular('MeanKLBefore', mean_kl_before)
        logger.record_tabular('MeanKL', mean_kl)
        logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:29,代码来源:npo.py

示例12: compute_irl

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_irl(self, paths, itr=0):
        r=0
        if self.no_reward:
            tot_rew = 0
            for path in paths:
                tot_rew += np.sum(path['rewards'])
                path['rewards'] *= 0
            logger.record_tabular('OriginalTaskAverageReturn', tot_rew/float(len(paths)))
            r=tot_rew/float(len(paths))

        if self.irl_model_wt <=0:
            return paths

        if self.train_irl:
            max_itrs = self.discrim_train_itrs
            lr=1e-3
            mean_loss_irl= self.irl_model.fit(paths, policy=self.policy,empw_model=self.empw,t_empw_model=self.tempw, itr=itr, max_itrs=max_itrs, lr=lr,
                                           logger=logger)

            logger.record_tabular('IRLLoss', mean_loss_irl)
            self.__irl_params = self.irl_model.get_params()
        else:
            self.irl_model.next_state(paths)
        probs = self.irl_model.eval(paths,empw_model=self.empw,t_empw_model=self.tempw, gamma=self.discount, itr=itr)

        logger.record_tabular('IRLRewardMean', np.mean(probs))
        logger.record_tabular('IRLRewardMax', np.max(probs))
        logger.record_tabular('IRLRewardMin', np.min(probs))


        if self.irl_model.score_trajectories:
            # TODO: should I add to reward here or after advantage computation?
            for i, path in enumerate(paths):
                path['rewards'][-1] += self.irl_model_wt * probs[i]
        else:
            for i, path in enumerate(paths):
                path['rewards'] += self.irl_model_wt * probs[i]
        return paths,r 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:40,代码来源:irl_batch_polopt.py

示例13: compute_qvar

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_qvar(self, paths, itr=0):


        if self.train_qvar:
            max_itrs = self.discrim_train_itrs
            lr=1e-3
            mean_loss_q = self.qvar_model.fit(paths, itr=itr, max_itrs=max_itrs, lr=lr,logger=logger)

            logger.record_tabular('Qvar_Loss', mean_loss_q)
            self.__qvar_params = self.qvar_model.get_params() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:12,代码来源:irl_batch_polopt.py

示例14: compute_empw

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def compute_empw(self, paths, itr=0):
        if self.train_empw:
            max_itrs = self.discrim_train_itrs
            lr=1e-3
            mean_loss_empw = self.empw.fit(paths, irl_model=self.irl_model, tempw=self.tempw, policy=self.policy, qvar_model=self.qvar_model, itr=itr, max_itrs=max_itrs, lr=lr,logger=logger)

            logger.record_tabular('EmpwLoss', mean_loss_empw)
            self.__empw_params = self.empw.get_params() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:10,代码来源:irl_batch_polopt.py

示例15: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import record_tabular [as 别名]
def optimize_policy(self, itr, samples_data):

        all_input_values = tuple(ext.extract(
            samples_data,
            "observations","observations_next", "actions", "advantages",
        ))


        obs = samples_data["observations"]
        empwr = self.empw.eval(obs)
        ep=[]
        for i in range(0,len(empwr)):
            ep.append(empwr[i][0])
        all_input_values+=(np.array(ep).reshape(-1,1)),
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        logger.log("Computing loss before")
        loss_before = self.optimizer.loss(all_input_values)
        logger.log("Computing KL before")
        mean_kl_before = self.optimizer.constraint_val(all_input_values)
        logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        logger.log("Computing KL after")
        mean_kl = self.optimizer.constraint_val(all_input_values)
        logger.log("Computing loss after")
        loss_after = self.optimizer.loss(all_input_values)
        logger.record_tabular('LossBefore', loss_before)
        logger.record_tabular('LossAfter', loss_after)
        logger.record_tabular('MeanKLBefore', mean_kl_before)
        logger.record_tabular('MeanKL', mean_kl)
        logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:38,代码来源:irl_npo.py


注:本文中的rllab.misc.logger.record_tabular方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。