當前位置: 首頁>>代碼示例>>Python>>正文


Python stable_baselines.SAC屬性代碼示例

本文整理匯總了Python中stable_baselines.SAC屬性的典型用法代碼示例。如果您正苦於以下問題:Python stable_baselines.SAC屬性的具體用法?Python stable_baselines.SAC怎麽用?Python stable_baselines.SAC使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在stable_baselines的用法示例。


在下文中一共展示了stable_baselines.SAC屬性的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_deterministic_training_common

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_deterministic_training_common(algo):
    results = [[], []]
    rewards = [[], []]
    kwargs = {'n_cpu_tf_sess': 1}
    if algo in [DDPG, TD3, SAC]:
        env_id = 'Pendulum-v0'
        kwargs.update({'action_noise': NormalActionNoise(0.0, 0.1)})
    else:
        env_id = 'CartPole-v1'
        if algo == DQN:
            kwargs.update({'learning_starts': 100})

    for i in range(2):
        model = algo('MlpPolicy', env_id, seed=SEED, **kwargs)
        model.learn(N_STEPS_TRAINING)
        env = model.get_env()
        obs = env.reset()
        for _ in range(100):
            action, _ = model.predict(obs, deterministic=False)
            obs, reward, _, _ = env.step(action)
            results[i].append(action)
            rewards[i].append(reward)
    assert sum(results[0]) == sum(results[1]), results
    assert sum(rewards[0]) == sum(rewards[1]), rewards 
開發者ID:Stable-Baselines-Team,項目名稱:stable-baselines,代碼行數:26,代碼來源:test_0deterministic.py

示例2: test_long_episode

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_long_episode(model_class):
    """
    Check that the model does not break when the replay buffer is still empty
    after the first rollout (because the episode is not over).
    """
    # n_bits > nb_rollout_steps
    n_bits = 10
    env = BitFlippingEnv(n_bits, continuous=model_class in [DDPG, SAC, TD3],
                         max_steps=n_bits)
    kwargs = {}
    if model_class == DDPG:
        kwargs['nb_rollout_steps'] = 9  # < n_bits
    elif model_class in [DQN, SAC, TD3]:
        kwargs['batch_size'] = 8  # < n_bits
        kwargs['learning_starts'] = 0

    model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy='future',
                verbose=0, **kwargs)
    model.learn(200) 
開發者ID:Stable-Baselines-Team,項目名稱:stable-baselines,代碼行數:21,代碼來源:test_her.py

示例3: sample_her_params

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sample_her_params(trial):
    """
    Sampler for HER hyperparams.

    :param trial: (optuna.trial)
    :return: (dict)
    """
    if trial.model_class == SAC:
        hyperparams = sample_sac_params(trial)
    elif trial.model_class == DDPG:
        hyperparams = sample_ddpg_params(trial)
    elif trial.model_class == TD3:
        hyperparams = sample_td3_params(trial)

    hyperparams['random_exploration'] = trial.suggest_uniform('random_exploration', 0, 1)
    hyperparams['n_sampled_goal'] = trial.suggest_categorical('n_sampled_goal', [1, 2, 4, 6, 8])

    return hyperparams 
開發者ID:araffin,項目名稱:rl-baselines-zoo,代碼行數:20,代碼來源:hyperparams_opt.py

示例4: mpi_unavailable_error

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def mpi_unavailable_error(*args, **kwargs):
    raise ImportError("This algorithm requires MPI, which is not available.")


# Lazy import for PPO1 and SAC, which have optional mpi dependency 
開發者ID:HumanCompatibleAI,項目名稱:adversarial-policies,代碼行數:7,代碼來源:loader.py

示例5: sac

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sac(batch_size, learning_rate, **kwargs):
    return _stable(
        stable_baselines.SAC,
        our_type="sac",
        callback_key="step",
        callback_mul=1,
        batch_size=batch_size,
        learning_rate=learning_rate,
        **kwargs,
    ) 
開發者ID:HumanCompatibleAI,項目名稱:adversarial-policies,代碼行數:12,代碼來源:train.py

示例6: test_her

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_her(model_class, goal_selection_strategy, discrete_obs_space):
    env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3],
                         max_steps=N_BITS, discrete_obs_space=discrete_obs_space)

    # Take random actions 10% of the time
    kwargs = {'random_exploration': 0.1} if model_class in [DDPG, SAC, TD3] else {}
    model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=0, **kwargs)
    model.learn(1000) 
開發者ID:Stable-Baselines-Team,項目名稱:stable-baselines,代碼行數:11,代碼來源:test_her.py

示例7: test_buffer_actions_scaling

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_buffer_actions_scaling(model_class, model_kwargs):
    """
    Test if actions are scaled to tanh co-domain before being put in a buffer
    for algorithms that use tanh-squashing, i.e., DDPG, TD3, SAC

    :param model_class: (BaseRLModel) A RL Model
    :param model_kwargs: (dict) Dictionary containing named arguments to the given algorithm
    """

    # check random and inferred actions as they possibly have different flows
    for random_coeff in [0.0, 1.0]:

        env = IdentityEnvBox(-2000, 1000)

        model = model_class("MlpPolicy", env, seed=1, random_exploration=random_coeff, **model_kwargs)
        model.learn(total_timesteps=ROLLOUT_STEPS)

        assert hasattr(model, 'replay_buffer')

        buffer = model.replay_buffer

        assert buffer.can_sample(ROLLOUT_STEPS)

        _, actions, _, _, _ = buffer.sample(ROLLOUT_STEPS)

        assert not np.any(actions > np.ones_like(actions))
        assert not np.any(actions < -np.ones_like(actions)) 
開發者ID:Stable-Baselines-Team,項目名稱:stable-baselines,代碼行數:29,代碼來源:test_action_scaling.py

示例8: sample_sac_params

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def sample_sac_params(trial):
    """
    Sampler for SAC hyperparams.

    :param trial: (optuna.trial)
    :return: (dict)
    """
    gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512])
    buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)])
    learning_starts = trial.suggest_categorical('learning_starts', [0, 1000, 10000, 20000])
    train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300])
    # gradient_steps takes too much time
    # gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300])
    gradient_steps = train_freq
    ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001])
    net_arch = trial.suggest_categorical('net_arch', ["small", "medium", "big"])

    net_arch = {
        'small': [64, 64],
        'medium': [256, 256],
        'big': [400, 300],
    }[net_arch]

    target_entropy = 'auto'
    if ent_coef == 'auto':
        target_entropy = trial.suggest_categorical('target_entropy', ['auto', -1, -10, -20, -50, -100])

    return {
        'gamma': gamma,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'buffer_size': buffer_size,
        'learning_starts': learning_starts,
        'train_freq': train_freq,
        'gradient_steps': gradient_steps,
        'ent_coef': ent_coef,
        'target_entropy': target_entropy,
        'policy_kwargs': dict(layers=net_arch)
    } 
開發者ID:araffin,項目名稱:rl-baselines-zoo,代碼行數:43,代碼來源:hyperparams_opt.py

示例9: __init__

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def __init__(self):
        super(SACModel, self).__init__(name="sac", model_class=SAC) 
開發者ID:araffin,項目名稱:robotics-rl-srl,代碼行數:4,代碼來源:sac.py

示例10: main

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def main(load_policy=False):

    global log_dir, log_dir_policy
    if (load_policy):
          log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS'
    model_class = TD3  # works also with SAC and DDPG
    action_space = 7
    fixed = True
    normalize_observations = False
    gamma = 0.9
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1500000
    discreteAction = 0
    rend = False
    env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
            isDiscrete=discreteAction, action_space = action_space,
            fixedPositionObj = fixed, includeVelObs = True)


    env = Monitor(env, log_dir, allow_early_resets=True)
    # Available strategies (cf paper): future, final, episode, random
    goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
    # Wrap the model

    model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256,
                random_exploration=0.3, action_noise=action_noise)

    if (load_policy):
        model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS",
        buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)

    # Train the model starting from a previous policy
    model.learn(timesteps, callback = callback )
    model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND")
    print("Finished train1") 
開發者ID:robotology-playground,項目名稱:pybullet-robot-envs,代碼行數:43,代碼來源:train_TD3_pushing_HER_Dyn_Rand.py

示例11: main

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def main(load_policy=False):
    global log_dir
    model_class = TD3  # works also with SAC and DDPG
    action_space = 7
    normalize_observations = False
    gamma = 0.9
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 8000000
    rend = False

    obj_pose_rnd_std = 0

    env = pandaPushGymGoalEnv(renders=rend, use_IK=0, numControlledJoints = action_space, obj_pose_rnd_std = obj_pose_rnd_std, includeVelObs = True)

    env = Monitor(env, log_dir, allow_early_resets=True)

    goal_selection_strategy = 'future'
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
    # Wrap the model

    model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed", buffer_size=1000000,batch_size=256,
                random_exploration=0.3, action_noise=action_noise)

    if (load_policy):
        model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed",
        buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)

    model.learn(timesteps,log_interval=100, callback = callback)
    print("Saving Policy PHASE_1")
    model.save("../policies/TD3_phase1_target_fixed") 
開發者ID:robotology-playground,項目名稱:pybullet-robot-envs,代碼行數:37,代碼來源:train_TD3_pushing_HER.py

示例12: test_model_manipulation

# 需要導入模塊: import stable_baselines [as 別名]
# 或者: from stable_baselines import SAC [as 別名]
def test_model_manipulation(model_class, goal_selection_strategy):
    env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
    env = DummyVecEnv([lambda: env])

    model = HER('MlpPolicy', env, model_class, n_sampled_goal=3, goal_selection_strategy=goal_selection_strategy,
                verbose=0)
    model.learn(1000)

    model_predict(model, env, n_steps=100, additional_check=None)

    model.save('./test_her.zip')
    del model

    # NOTE: HER does not support VecEnvWrapper yet
    with pytest.raises(AssertionError):
        model = HER.load('./test_her.zip', env=VecNormalize(env))

    model = HER.load('./test_her.zip')

    # Check that the model raises an error when the env
    # is not wrapped (or no env passed to the model)
    with pytest.raises(ValueError):
        model.predict(env.reset())

    env_ = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
    env_ = HERGoalEnvWrapper(env_)

    model_predict(model, env_, n_steps=100, additional_check=None)

    model.set_env(env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    del model

    env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS)
    model = HER.load('./test_her', env=env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    if os.path.isfile('./test_her.zip'):
        os.remove('./test_her.zip') 
開發者ID:Stable-Baselines-Team,項目名稱:stable-baselines,代碼行數:50,代碼來源:test_her.py


注:本文中的stable_baselines.SAC屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。