本文整理匯總了Python中stable_baselines.SAC屬性的典型用法代碼示例。如果您正苦於以下問題:Python stable_baselines.SAC屬性的具體用法?Python stable_baselines.SAC怎麽用?Python stable_baselines.SAC使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類stable_baselines
的用法示例。
在下文中一共展示了stable_baselines.SAC屬性的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_deterministic_training_common
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_deterministic_training_common(algo):
results = [[], []]
rewards = [[], []]
kwargs = {'n_cpu_tf_sess': 1}
if algo in [DDPG, TD3, SAC]:
env_id = 'Pendulum-v0'
kwargs.update({'action_noise': NormalActionNoise(0.0, 0.1)})
else:
env_id = 'CartPole-v1'
if algo == DQN:
kwargs.update({'learning_starts': 100})
for i in range(2):
model = algo('MlpPolicy', env_id, seed=SEED, **kwargs)
model.learn(N_STEPS_TRAINING)
env = model.get_env()
obs = env.reset()
for _ in range(100):
action, _ = model.predict(obs, deterministic=False)
obs, reward, _, _ = env.step(action)
results[i].append(action)
rewards[i].append(reward)
assert sum(results[0]) == sum(results[1]), results
assert sum(rewards[0]) == sum(rewards[1]), rewards
示例2: test_long_episode
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_long_episode(model_class):
"""
Check that the model does not break when the replay buffer is still empty
after the first rollout (because the episode is not over).
"""
# n_bits > nb_rollout_steps
n_bits = 10
env = BitFlippingEnv(n_bits, continuous=model_class in [DDPG, SAC, TD3],
max_steps=n_bits)
kwargs = {}
if model_class == DDPG:
kwargs['nb_rollout_steps'] = 9 # < n_bits
elif model_class in [DQN, SAC, TD3]:
kwargs['batch_size'] = 8 # < n_bits
kwargs['learning_starts'] = 0
model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy='future',
verbose=0, **kwargs)
model.learn(200)
示例3: sample_her_params
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sample_her_params(trial):
"""
Sampler for HER hyperparams.
:param trial: (optuna.trial)
:return: (dict)
"""
if trial.model_class == SAC:
hyperparams = sample_sac_params(trial)
elif trial.model_class == DDPG:
hyperparams = sample_ddpg_params(trial)
elif trial.model_class == TD3:
hyperparams = sample_td3_params(trial)
hyperparams['random_exploration'] = trial.suggest_uniform('random_exploration', 0, 1)
hyperparams['n_sampled_goal'] = trial.suggest_categorical('n_sampled_goal', [1, 2, 4, 6, 8])
return hyperparams
示例4: mpi_unavailable_error
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def mpi_unavailable_error(*args, **kwargs):
raise ImportError("This algorithm requires MPI, which is not available.")
# Lazy import for PPO1 and SAC, which have optional mpi dependency
示例5: sac
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sac(batch_size, learning_rate, **kwargs):
return _stable(
stable_baselines.SAC,
our_type="sac",
callback_key="step",
callback_mul=1,
batch_size=batch_size,
learning_rate=learning_rate,
**kwargs,
)
示例6: test_her
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_her(model_class, goal_selection_strategy, discrete_obs_space):
env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3],
max_steps=N_BITS, discrete_obs_space=discrete_obs_space)
# Take random actions 10% of the time
kwargs = {'random_exploration': 0.1} if model_class in [DDPG, SAC, TD3] else {}
model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
verbose=0, **kwargs)
model.learn(1000)
示例7: test_buffer_actions_scaling
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_buffer_actions_scaling(model_class, model_kwargs):
"""
Test if actions are scaled to tanh co-domain before being put in a buffer
for algorithms that use tanh-squashing, i.e., DDPG, TD3, SAC
:param model_class: (BaseRLModel) A RL Model
:param model_kwargs: (dict) Dictionary containing named arguments to the given algorithm
"""
# check random and inferred actions as they possibly have different flows
for random_coeff in [0.0, 1.0]:
env = IdentityEnvBox(-2000, 1000)
model = model_class("MlpPolicy", env, seed=1, random_exploration=random_coeff, **model_kwargs)
model.learn(total_timesteps=ROLLOUT_STEPS)
assert hasattr(model, 'replay_buffer')
buffer = model.replay_buffer
assert buffer.can_sample(ROLLOUT_STEPS)
_, actions, _, _, _ = buffer.sample(ROLLOUT_STEPS)
assert not np.any(actions > np.ones_like(actions))
assert not np.any(actions < -np.ones_like(actions))
示例8: sample_sac_params
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sample_sac_params(trial):
"""
Sampler for SAC hyperparams.
:param trial: (optuna.trial)
:return: (dict)
"""
gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512])
buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)])
learning_starts = trial.suggest_categorical('learning_starts', [0, 1000, 10000, 20000])
train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300])
# gradient_steps takes too much time
# gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300])
gradient_steps = train_freq
ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001])
net_arch = trial.suggest_categorical('net_arch', ["small", "medium", "big"])
net_arch = {
'small': [64, 64],
'medium': [256, 256],
'big': [400, 300],
}[net_arch]
target_entropy = 'auto'
if ent_coef == 'auto':
target_entropy = trial.suggest_categorical('target_entropy', ['auto', -1, -10, -20, -50, -100])
return {
'gamma': gamma,
'learning_rate': learning_rate,
'batch_size': batch_size,
'buffer_size': buffer_size,
'learning_starts': learning_starts,
'train_freq': train_freq,
'gradient_steps': gradient_steps,
'ent_coef': ent_coef,
'target_entropy': target_entropy,
'policy_kwargs': dict(layers=net_arch)
}
示例9: __init__
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def __init__(self):
super(SACModel, self).__init__(name="sac", model_class=SAC)
示例10: main
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def main(load_policy=False):
global log_dir, log_dir_policy
if (load_policy):
log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS'
model_class = TD3 # works also with SAC and DDPG
action_space = 7
fixed = True
normalize_observations = False
gamma = 0.9
memory_limit = 1000000
normalize_returns = True
timesteps = 1500000
discreteAction = 0
rend = False
env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
isDiscrete=discreteAction, action_space = action_space,
fixedPositionObj = fixed, includeVelObs = True)
env = Monitor(env, log_dir, allow_early_resets=True)
# Available strategies (cf paper): future, final, episode, random
goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
# Wrap the model
model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256,
random_exploration=0.3, action_noise=action_noise)
if (load_policy):
model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
goal_selection_strategy=goal_selection_strategy,
tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS",
buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)
# Train the model starting from a previous policy
model.learn(timesteps, callback = callback )
model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND")
print("Finished train1")
示例11: main
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def main(load_policy=False):
global log_dir
model_class = TD3 # works also with SAC and DDPG
action_space = 7
normalize_observations = False
gamma = 0.9
memory_limit = 1000000
normalize_returns = True
timesteps = 8000000
rend = False
obj_pose_rnd_std = 0
env = pandaPushGymGoalEnv(renders=rend, use_IK=0, numControlledJoints = action_space, obj_pose_rnd_std = obj_pose_rnd_std, includeVelObs = True)
env = Monitor(env, log_dir, allow_early_resets=True)
goal_selection_strategy = 'future'
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
# Wrap the model
model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed", buffer_size=1000000,batch_size=256,
random_exploration=0.3, action_noise=action_noise)
if (load_policy):
model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
goal_selection_strategy=goal_selection_strategy,
tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed",
buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)
model.learn(timesteps,log_interval=100, callback = callback)
print("Saving Policy PHASE_1")
model.save("../policies/TD3_phase1_target_fixed")
示例12: test_model_manipulation
# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_model_manipulation(model_class, goal_selection_strategy):
env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
env = DummyVecEnv([lambda: env])
model = HER('MlpPolicy', env, model_class, n_sampled_goal=3, goal_selection_strategy=goal_selection_strategy,
verbose=0)
model.learn(1000)
model_predict(model, env, n_steps=100, additional_check=None)
model.save('./test_her.zip')
del model
# NOTE: HER does not support VecEnvWrapper yet
with pytest.raises(AssertionError):
model = HER.load('./test_her.zip', env=VecNormalize(env))
model = HER.load('./test_her.zip')
# Check that the model raises an error when the env
# is not wrapped (or no env passed to the model)
with pytest.raises(ValueError):
model.predict(env.reset())
env_ = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
env_ = HERGoalEnvWrapper(env_)
model_predict(model, env_, n_steps=100, additional_check=None)
model.set_env(env)
model.learn(1000)
model_predict(model, env_, n_steps=100, additional_check=None)
assert model.n_sampled_goal == 3
del model
env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
model = HER.load('./test_her', env=env)
model.learn(1000)
model_predict(model, env_, n_steps=100, additional_check=None)
assert model.n_sampled_goal == 3
if os.path.isfile('./test_her.zip'):
os.remove('./test_her.zip')