本文整理汇总了Python中baselines.common.running_mean_std.RunningMeanStd方法的典型用法代码示例。如果您正苦于以下问题:Python running_mean_std.RunningMeanStd方法的具体用法?Python running_mean_std.RunningMeanStd怎么用?Python running_mean_std.RunningMeanStd使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.running_mean_std
的用法示例。
在下文中一共展示了running_mean_std.RunningMeanStd方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
VecEnvWrapper.__init__(self, venv)
try:
self.num_agents = num_agents = len(self.observation_space)
self.ob_rms = [RunningMeanStd(shape=self.observation_space[k].shape) for k in range(num_agents)] if ob else None
except:
self.num_agents = num_agents = len(self.observation_space.spaces)
self.ob_rms = [RunningMeanStd(shape=self.observation_space.spaces[k].shape) for k in range(num_agents)] if ob else None
self.ret_rms = RunningMeanStd(shape=()) if ret else None
#[RunningMeanStd(shape=()) for k in range(num_agents)] if ret else None
self.clipob = clipob
self.cliprew = cliprew
# self.ret = [np.zeros(self.num_envs) for _ in range(num_agents)]
self.ret = np.zeros(self.num_envs)
self.gamma = gamma
self.epsilon = epsilon
示例2: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, input_dim, hidden_dim, device):
super(Discriminator, self).__init__()
self.device = device
self.trunk = nn.Sequential(
nn.Linear(input_dim, hidden_dim), nn.Tanh(),
nn.Linear(hidden_dim, hidden_dim), nn.Tanh(),
nn.Linear(hidden_dim, 1)).to(device)
self.trunk.train()
self.optimizer = torch.optim.Adam(self.trunk.parameters())
self.returns = None
self.ret_rms = RunningMeanStd(shape=())
示例3: test_runningmeanstd
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def test_runningmeanstd():
for (x1, x2, x3) in [
(np.random.randn(3), np.random.randn(4), np.random.randn(5)),
(np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
]:
rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
x = np.concatenate([x1, x2, x3], axis=0)
ms1 = [x.mean(axis=0), x.var(axis=0)]
rms.update(x1)
rms.update(x2)
rms.update(x3)
ms2 = [rms.mean, rms.var]
assert np.allclose(ms1, ms2)
示例4: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
VecEnvWrapper.__init__(self, venv)
self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
self.ret_rms = RunningMeanStd(shape=()) if ret else None
self.clipob = clipob
self.cliprew = cliprew
self.ret = np.zeros(self.num_envs)
self.gamma = gamma
self.epsilon = epsilon
示例5: __call__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __call__(self, x):
x = np.asarray(x)
if self.rms is None:
self.rms = RunningMeanStd(shape=(1,) + x.shape[1:])
if not self.read_only:
self.rms.update(x)
return np.clip((x - self.rms.mean) / np.sqrt(self.rms.var + self.epsilon),
-self.clip, self.clip)
示例6: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8, eval=False):
VecEnvWrapper.__init__(self, venv)
self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
self.ret_rms = RunningMeanStd(shape=()) if ret else None
self.clipob = clipob
self.cliprew = cliprew
self.ret = np.zeros(self.num_envs)
self.gamma = gamma
self.epsilon = epsilon
self.eval = eval
示例7: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, num_models, model_dir, include_action, num_layers, embedding_dims, ctrl_coeff=0., alive_bonus=0.):
super().__init__(venv, num_models, model_dir, include_action, num_layers, embedding_dims, ctrl_coeff, alive_bonus)
self.rew_rms = [RunningMeanStd(shape=()) for _ in range(num_models)]
self.cliprew = 10.
self.epsilon = 1e-8
示例8: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, reward_net_path, env_name):
VecEnvWrapper.__init__(self, venv)
self.reward_net = AtariNet()
self.reward_net.load_state_dict(torch.load(reward_net_path))
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.reward_net.to(self.device)
self.rew_rms = RunningMeanStd(shape=())
self.epsilon = 1e-8
self.cliprew = 10.
self.env_name = env_name
示例9: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8, use_tf=False):
VecEnvWrapper.__init__(self, venv)
if use_tf:
from baselines.common.running_mean_std import TfRunningMeanStd
self.ob_rms = TfRunningMeanStd(shape=self.observation_space.shape, scope='ob_rms') if ob else None
self.ret_rms = TfRunningMeanStd(shape=(), scope='ret_rms') if ret else None
else:
from baselines.common.running_mean_std import RunningMeanStd
self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
self.ret_rms = RunningMeanStd(shape=()) if ret else None
self.clipob = clipob
self.cliprew = cliprew
self.ret = np.zeros(self.num_envs)
self.gamma = gamma
self.epsilon = epsilon
示例10: __init__
# 需要导入模块: from baselines.common import running_mean_std [as 别名]
# 或者: from baselines.common.running_mean_std import RunningMeanStd [as 别名]
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
self.venv = venv
self._observation_space = self.venv.observation_space
self._action_space = venv.action_space
self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None
self.ret_rms = RunningMeanStd(shape=()) if ret else None
self.clipob = clipob
self.cliprew = cliprew
self.ret = np.zeros(self.num_envs)
self.gamma = gamma
self.epsilon = epsilon