当前位置: 首页>>代码示例>>Python>>正文


Python mpi_moments.mpi_moments方法代码示例

本文整理汇总了Python中baselines.common.mpi_moments.mpi_moments方法的典型用法代码示例。如果您正苦于以下问题:Python mpi_moments.mpi_moments方法的具体用法?Python mpi_moments.mpi_moments怎么用?Python mpi_moments.mpi_moments使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在baselines.common.mpi_moments的用法示例。


在下文中一共展示了mpi_moments.mpi_moments方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mpi_average

# 需要导入模块: from baselines.common import mpi_moments [as 别名]
# 或者: from baselines.common.mpi_moments import mpi_moments [as 别名]
def mpi_average(value):
    if value == []:
        value = [0.]
    if not isinstance(value, list):
        value = [value]
    return mpi_moments(np.array(value))[0] 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:8,代码来源:train.py

示例2: mpi_mean

# 需要导入模块: from baselines.common import mpi_moments [as 别名]
# 或者: from baselines.common.mpi_moments import mpi_moments [as 别名]
def mpi_mean(value):
    if value == []:
        value = [0.]
    if not isinstance(value, list):
        value = [value]
    return mpi_moments(np.array(value))[0][0] 
开发者ID:AdamStelmaszczyk,项目名称:learning2run,代码行数:8,代码来源:util.py

示例3: mpi_std

# 需要导入模块: from baselines.common import mpi_moments [as 别名]
# 或者: from baselines.common.mpi_moments import mpi_moments [as 别名]
def mpi_std(value):
    if value == []:
        value = [0.]
    if not isinstance(value, list):
        value = [value]
    return mpi_moments(np.array(value))[1][0] 
开发者ID:AdamStelmaszczyk,项目名称:learning2run,代码行数:8,代码来源:util.py

示例4: mpi_average

# 需要导入模块: from baselines.common import mpi_moments [as 别名]
# 或者: from baselines.common.mpi_moments import mpi_moments [as 别名]
def mpi_average(value):
    if not isinstance(value, list):
        value = [value]
    if not any(value):
        value = [0.]
    return mpi_moments(np.array(value))[0] 
开发者ID:openai,项目名称:baselines,代码行数:8,代码来源:her.py

示例5: run

# 需要导入模块: from baselines.common import mpi_moments [as 别名]
# 或者: from baselines.common.mpi_moments import mpi_moments [as 别名]
def run(self):
        # shift forward
        if len(self.mb_stuff[2]) >= self.nsteps+self.num_steps_to_cut_left+self.num_steps_to_cut_right:
            self.mb_stuff = [l[self.nsteps:] for l in self.mb_stuff]

        mb_obs, mb_increase_ent, mb_rewards, mb_reward_avg, mb_actions, mb_values, mb_valids, mb_random_resets, \
            mb_dones, mb_neglogpacs, mb_states = self.mb_stuff
        epinfos = []
        while len(mb_rewards) < self.nsteps+self.num_steps_to_cut_left+self.num_steps_to_cut_right:
            actions, values, states, neglogpacs = self.model.step(mb_obs[-1], mb_states[-1], mb_dones[-1], mb_increase_ent[-1])
            mb_actions.append(actions)
            mb_values.append(values)
            mb_states.append(states)
            mb_neglogpacs.append(neglogpacs)

            obs, rewards, dones, infos = self.env.step(actions)
            mb_obs.append(np.cast[self.model.train_model.X.dtype.name](obs))
            mb_increase_ent.append(np.asarray([info.get('increase_entropy', False) for info in infos], dtype=np.uint8))
            mb_rewards.append(rewards)
            mb_dones.append(dones)
            mb_valids.append([(not info.get('replay_reset.invalid_transition', False)) for info in infos])
            mb_random_resets.append(np.array([info.get('replay_reset.random_reset', False) for info in infos]))

            for info in infos:
                maybeepinfo = info.get('episode')
                if maybeepinfo: epinfos.append(maybeepinfo)

        # GAE
        mb_advs = [np.zeros_like(mb_values[0])] * (len(mb_rewards) + 1)
        for t in reversed(range(len(mb_rewards))):
            if t < self.num_steps_to_cut_left:
                mb_valids[t] = np.zeros_like(mb_valids[t])
            else:
                if t == len(mb_values)-1:
                    next_value = self.model.value(mb_obs[-1], mb_states[-1], mb_dones[-1])
                else:
                    next_value = mb_values[t+1]
                use_next = np.logical_not(mb_dones[t+1])
                adv_mask = np.logical_not(mb_random_resets[t+1])
                delta = mb_rewards[t] + self.gamma * use_next * next_value - mb_values[t]
                mb_advs[t] = adv_mask * (delta + self.gamma * self.lam * use_next * mb_advs[t + 1])

        # extract arrays
        end = self.nsteps + self.num_steps_to_cut_left
        ar_mb_obs = np.asarray(mb_obs[:end], dtype=self.model.train_model.X.dtype.name)
        ar_mb_ent = np.stack(mb_increase_ent[:end], axis=0)
        ar_mb_valids = np.asarray(mb_valids[:end], dtype=np.float32)
        ar_mb_actions = np.asarray(mb_actions[:end])
        ar_mb_values = np.asarray(mb_values[:end], dtype=np.float32)
        ar_mb_neglogpacs = np.asarray(mb_neglogpacs[:end], dtype=np.float32)
        ar_mb_dones = np.asarray(mb_dones[:end], dtype=np.bool)
        ar_mb_advs = np.asarray(mb_advs[:end], dtype=np.float32)
        ar_mb_rets = ar_mb_values + ar_mb_advs

        if self.norm_adv:
            adv_mean, adv_std, _ = mpi_moments(ar_mb_advs.ravel())
            ar_mb_advs = (ar_mb_advs - adv_mean) / (adv_std + 1e-7)

        # obs, increase_ent, advantages, masks, actions, values, neglogpacs, valids, returns, states, epinfos = runner.run()
        return (*map(sf01, (ar_mb_obs, ar_mb_ent, ar_mb_advs, ar_mb_dones, ar_mb_actions, ar_mb_values, ar_mb_neglogpacs, ar_mb_valids, ar_mb_rets)),
            mb_states[0], epinfos) 
开发者ID:openai,项目名称:atari-reset,代码行数:63,代码来源:ppo.py


注:本文中的baselines.common.mpi_moments.mpi_moments方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。