当前位置: 首页>>代码示例>>Python>>正文


Python logger.log方法代码示例

本文整理汇总了Python中rllab.misc.logger.log方法的典型用法代码示例。如果您正苦于以下问题:Python logger.log方法的具体用法?Python logger.log怎么用?Python logger.log使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rllab.misc.logger的用法示例。


在下文中一共展示了logger.log方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
        # logger.log("optimizing policy")
        inputs = ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        )
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        inputs += tuple(state_info_list)
        if self.policy.recurrent:
            inputs += (samples_data["valids"],)
        # dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        # loss_before = self.optimizer.loss(inputs)
        self.optimizer.optimize(inputs)
        # loss_after = self.optimizer.loss(inputs)
        # logger.record_tabular("LossBefore", loss_before)
        # logger.record_tabular("LossAfter", loss_after)

        # mean_kl, max_kl = self.opt_info['f_kl'](*(list(inputs) + dist_info_list))
        # logger.record_tabular('MeanKL', mean_kl)
        # logger.record_tabular('MaxKL', max_kl) 
开发者ID:thanard,项目名称:me-trpo,代码行数:23,代码来源:vpg.py

示例2: get_logger

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def get_logger(logger_name, folderpath, level=logging.DEBUG):
    logger = logging.getLogger(logger_name)
    logger.setLevel(level)

    to_generate = [('info.log',logging.INFO), ('debug.log',logging.DEBUG)]
    for logname, handler_level in to_generate:
        # Create a file handler
        handler = logging.FileHandler(folderpath+'/' + logname)
        handler.setLevel(handler_level)

        # Create a logging format
        if logname == 'debug.log':
            formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        else:
            formatter = logging.Formatter('%(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)
    return logger 
开发者ID:thanard,项目名称:me-trpo,代码行数:20,代码来源:utils.py

示例3: fit

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def fit(self, paths):
        logger.log('fitting the regressor...')
        if self.recurrent:
            observations = np.array([p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.array([p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=2)
            if self.noisify_traj_coef:
                obs_actions += np.random.normal(loc=0.0,
                                                scale=float(np.mean(np.abs(obs_actions))) * self.noisify_traj_coef,
                                                size=np.shape(obs_actions))
            latents = np.array([p['agent_infos']['latents'] for p in paths])
            self._regressor.fit(obs_actions, latents)  # the input shapes are (traj, time, dim)
        else:
            observations = np.concatenate([p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.concatenate([p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=1)
            latents = np.concatenate([p['agent_infos']["latents"] for p in paths])
            if self.noisify_traj_coef:
                obs_actions += np.random.normal(loc=0.0,
                                                scale=float(np.mean(np.abs(obs_actions))) * self.noisify_traj_coef,
                                                size=np.shape(obs_actions))
            self._regressor.fit(obs_actions, latents.reshape((-1, self.latent_dim)))  # why reshape??
        logger.log('done fitting the regressor') 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:25,代码来源:latent_regressor.py

示例4: __init__

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def __init__(
            self,
            # some extra logging. What of this could be included in the sampler?
            log_individual_latents=False,  # to log the progress of each individual latent
            log_deterministic=False,  # log the performance of the policy with std=0 (for each latent separate)
            log_hierarchy=False,
            bonus_evaluator=None,
            reward_coef_bonus=None,
            latent_regressor=None,
            reward_regressor_mi=0,  # kwargs to the sampler (that also processes)
            switch_lat_every=0,
            **kwargs):
        # some logging
        self.log_individual_latents = log_individual_latents
        self.log_deterministic = log_deterministic
        self.log_hierarchy = log_hierarchy

        sampler_cls = BatchSampler_snn
        sampler_args = {'switch_lat_every': switch_lat_every,
                        'latent_regressor': latent_regressor,
                        'bonus_evaluator': bonus_evaluator,
                        'reward_coef_bonus': reward_coef_bonus,
                        'reward_regressor_mi': reward_regressor_mi,
                        }
        super(NPO_snn, self).__init__(sampler_cls=sampler_cls, sampler_args=sampler_args, **kwargs) 
开发者ID:florensacc,项目名称:snn4hrl,代码行数:27,代码来源:npo_snn_rewards.py

示例5: train

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self):
        self.start_worker()
        for itr in range(self.current_itr, self.n_itr):
            with logger.prefix('itr #%d | ' % itr):
                logger.log('Obtaining samples...')
                paths = self.sampler.obtain_samples(itr)
                logger.log('Processing samples...')
                samples_data = self.sampler.process_samples(itr, paths)
                logger.log('Logging diagnostics...')
                self.log_diagnostics(paths)
                logger.log('Optimizing policy...')
                self.optimize_policy(itr, samples_data)
                logger.log('Saving snapshot...')
                params = self.get_itr_snapshot(itr, samples_data)
                self.current_itr = itr + 1
                params['algo'] = self
                # Save the trajectories into the param
                if self.store_paths:
                    params['paths'] = samples_data['paths']
                logger.save_itr_params(itr, params)
                logger.log('Saved')
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    self.update_plot()
                    if self.pause_for_plot:
                        input('Plotting evaluation run: Press Enter to '
                                  'continue...')

        self.shutdown_worker() 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:31,代码来源:batch_polopt.py

示例6: evaluate

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def evaluate(self, epoch, pool):
        logger.log("Collecting samples for evaluation")

        paths = rollouts(self.env, self.policy,
                         self.max_path_length, self.n_eval_episodes)

        average_discounted_return = np.mean(
            [special.discount_return(path["rewards"], self.discount) for path in paths]
        )

        returns = [sum(path["rewards"]) for path in paths]

        all_qs = np.concatenate(self.q_averages)
        all_ys = np.concatenate(self.y_averages)

        average_q_loss = np.mean(self.qf_loss_averages)

        qfun_param_norm = self.qf.get_param_values().norm()

        logger.record_tabular('Epoch', epoch)
        logger.record_tabular('AverageReturn', np.mean(returns))
        logger.record_tabular('StdReturn', np.std(returns))
        logger.record_tabular('MaxReturn', np.max(returns))
        logger.record_tabular('MinReturn', np.min(returns))
        logger.record_tabular('AverageDiscountedReturn', average_discounted_return)
        logger.record_tabular('AverageQLoss', average_q_loss)
        logger.record_tabular('AverageQ', np.mean(all_qs))
        logger.record_tabular('AverageAbsQ', np.mean(np.abs(all_qs)))
        logger.record_tabular('AverageY', np.mean(all_ys))
        logger.record_tabular('AverageAbsY', np.mean(np.abs(all_ys)))
        logger.record_tabular('AverageAbsQYDiff', np.mean(np.abs(all_qs - all_ys)))
        logger.record_tabular('QFunParamNorm', qfun_param_norm)

        self.qf_loss_averages = []
        self.policy_surr_averages = []
        self.q_averages = []
        self.y_averages = [] 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:39,代码来源:softq.py

示例7: train

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self):
        if self.init_pol_params is not None:
            self.policy.set_param_values(self.init_pol_params)
        if self.init_irl_params is not None:
            self.irl_model.set_params(self.init_irl_params)
        self.start_worker()
        start_time = time.time()

        returns = []
        for itr in range(self.start_itr, self.n_itr):
            itr_start_time = time.time()
            with logger.prefix('itr #%d | ' % itr):
                logger.log('Obtaining samples...')
                paths = self.sampler.obtain_samples(itr)
                logger.log('Processing samples...')
                # Update the Reward function
                paths = self.compute_irl(paths, itr=itr)
                # returns.append(self.log_avg_returns(paths))
                samples_data = self.sampler.process_samples(itr, paths)

                logger.log('Logging diagnostics...')
                self.log_diagnostics(paths)
                logger.log('Optimizing policy...')
                self.optimize_policy(itr, samples_data)
                logger.log('Saving snapshot...')
                params = self.get_itr_snapshot(itr, samples_data)  # , **kwargs)
                if self.store_paths:
                    params['paths'] = samples_data['paths']
                logger.save_itr_params(itr, params)
                logger.log('Saved')
                logger.record_tabular('Time', time.time() - start_time)
                logger.record_tabular('ItrTime', time.time() - itr_start_time)
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    self.update_plot()
                    if self.pause_for_plot:
                        input('Plotting evaluation run: Press Enter to '
                              'continue...')
        self.shutdown_worker()
        return 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:42,代码来源:irl_batch_polopt.py

示例8: populate_task

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def populate_task(env, policy, scope=None):
    logger.log("Populating workers...")
    if singleton_pool.n_parallel > 1:
        singleton_pool.run_each(
            _worker_populate_task,
            [(pickle.dumps(env), pickle.dumps(policy), scope)] * singleton_pool.n_parallel
        )
    else:
        # avoid unnecessary copying
        G = _get_scoped_G(singleton_pool.G, scope)
        G.env = env
        G.policy = policy
    logger.log("Populated") 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:15,代码来源:parallel_sampler.py

示例9: __init__

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True,
                 force_reset=False):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)
        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:35,代码来源:gym_env.py

示例10: advance_until_terminate

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def advance_until_terminate(self):
        skip = self.get_skip_flag()
        n_skips = 0
        old_top = self._top
        new_top = (old_top + 1) % self._max_pool_size
        while skip and old_top != new_top and n_skips < self._max_skip_episode:
            n_skips += 1
            self.advance()
            while not self._initials[self._top]:
                self.advance()
            skip = self.get_skip_flag()
            new_top = self._top
        logger.log("add_sample, skipped %d episodes, top=%d->%d"%(
            n_skips, old_top, new_top)) 
开发者ID:nosyndicate,项目名称:pytorchrl,代码行数:16,代码来源:replay.py

示例11: train

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def train(self, sess=None):
        created_session = True if (sess is None) else False
        if sess is None:
            sess = tf.Session()
            sess.__enter__()

        sess.run(tf.global_variables_initializer())
        self.start_worker()
        start_time = time.time()
        for itr in range(self.start_itr, self.n_itr):
            itr_start_time = time.time()
            with logger.prefix('itr #%d | ' % itr):
                logger.log("Obtaining samples...")
                paths = self.obtain_samples(itr)
                logger.log("Processing samples...")
                samples_data = self.process_samples(itr, paths)
                logger.log("Logging diagnostics...")
                self.log_diagnostics(paths)
                logger.log("Optimizing policy...")
                self.optimize_policy(itr, samples_data)
                logger.log("Saving snapshot...")
                params = self.get_itr_snapshot(itr, samples_data)  # , **kwargs)
                if self.store_paths:
                    params["paths"] = samples_data["paths"]
                logger.save_itr_params(itr, params)
                logger.log("Saved")
                logger.record_tabular('Time', time.time() - start_time)
                logger.record_tabular('ItrTime', time.time() - itr_start_time)
                logger.dump_tabular(with_prefix=False)
                if self.plot:
                    rollout(self.env, self.policy, animated=True, max_path_length=self.max_path_length)
                    if self.pause_for_plot:
                        input("Plotting evaluation run: Press Enter to "
                              "continue...")
        self.shutdown_worker()
        if created_session:
            sess.close() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:39,代码来源:batch_polopt.py

示例12: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
        all_input_values = tuple(ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        ))
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        logger.log("Computing loss before")
        loss_before = self.optimizer.loss(all_input_values)
        logger.log("Computing KL before")
        mean_kl_before = self.optimizer.constraint_val(all_input_values)
        logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        logger.log("Computing KL after")
        mean_kl = self.optimizer.constraint_val(all_input_values)
        logger.log("Computing loss after")
        loss_after = self.optimizer.loss(all_input_values)
        logger.record_tabular('LossBefore', loss_before)
        logger.record_tabular('LossAfter', loss_after)
        logger.record_tabular('MeanKLBefore', mean_kl_before)
        logger.record_tabular('MeanKL', mean_kl)
        logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:29,代码来源:npo.py

示例13: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):

        all_input_values = tuple(ext.extract(
            samples_data,
            "observations","observations_next", "actions", "advantages",
        ))


        obs = samples_data["observations"]
        empwr = self.empw.eval(obs)
        ep=[]
        for i in range(0,len(empwr)):
            ep.append(empwr[i][0])
        all_input_values+=(np.array(ep).reshape(-1,1)),
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        logger.log("Computing loss before")
        loss_before = self.optimizer.loss(all_input_values)
        logger.log("Computing KL before")
        mean_kl_before = self.optimizer.constraint_val(all_input_values)
        logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        logger.log("Computing KL after")
        mean_kl = self.optimizer.constraint_val(all_input_values)
        logger.log("Computing loss after")
        loss_after = self.optimizer.loss(all_input_values)
        logger.record_tabular('LossBefore', loss_before)
        logger.record_tabular('LossAfter', loss_after)
        logger.record_tabular('MeanKLBefore', mean_kl_before)
        logger.record_tabular('MeanKL', mean_kl)
        logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:ahq1993,项目名称:inverse_rl,代码行数:38,代码来源:irl_npo.py

示例14: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
        all_input_values = tuple(ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        ))
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        # logger.log("Computing loss before")
        loss_before = self.optimizer.loss(all_input_values)
        # logger.log("Computing KL before")
        mean_kl_before = self.optimizer.constraint_val(all_input_values)
        # logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        # logger.log("Computing KL after")
        mean_kl = self.optimizer.constraint_val(all_input_values)
        # logger.log("Computing loss after")
        loss_after = self.optimizer.loss(all_input_values)
        # logger.record_tabular('LossBefore', loss_before)
        # logger.record_tabular('LossAfter', loss_after)
        # logger.record_tabular('MeanKLBefore', mean_kl_before)
        # logger.record_tabular('MeanKL', mean_kl)
        # logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:thanard,项目名称:me-trpo,代码行数:29,代码来源:ppo.py

示例15: optimize_policy

# 需要导入模块: from rllab.misc import logger [as 别名]
# 或者: from rllab.misc.logger import log [as 别名]
def optimize_policy(self, itr, samples_data):
        all_input_values = tuple(ext.extract(
            samples_data,
            "observations", "actions", "advantages"
        ))
        agent_infos = samples_data["agent_infos"]
        state_info_list = [agent_infos[k] for k in self.policy.state_info_keys]
        dist_info_list = [agent_infos[k] for k in self.policy.distribution.dist_info_keys]
        all_input_values += tuple(state_info_list) + tuple(dist_info_list)
        if self.policy.recurrent:
            all_input_values += (samples_data["valids"],)
        # logger.log("Computing loss before")
        # loss_before = self.optimizer.loss(all_input_values)
        # logger.log("Computing KL before")
        # mean_kl_before = self.optimizer.constraint_val(all_input_values)
        # logger.log("Optimizing")
        self.optimizer.optimize(all_input_values)
        # logger.log("Computing KL after")
        # mean_kl = self.optimizer.constraint_val(all_input_values)
        # logger.log("Computing loss after")
        # loss_after = self.optimizer.loss(all_input_values)
        # logger.record_tabular('LossBefore', loss_before)
        # logger.record_tabular('LossAfter', loss_after)
        # logger.record_tabular('MeanKLBefore', mean_kl_before)
        # logger.record_tabular('MeanKL', mean_kl)
        # logger.record_tabular('dLoss', loss_before - loss_after)
        return dict() 
开发者ID:thanard,项目名称:me-trpo,代码行数:29,代码来源:npo.py


注:本文中的rllab.misc.logger.log方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。