当前位置: 首页>>代码示例>>Python>>正文


Python vec_normalize.VecNormalize方法代码示例

本文整理汇总了Python中baselines.common.vec_env.vec_normalize.VecNormalize方法的典型用法代码示例。如果您正苦于以下问题:Python vec_normalize.VecNormalize方法的具体用法?Python vec_normalize.VecNormalize怎么用?Python vec_normalize.VecNormalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在baselines.common.vec_env.vec_normalize的用法示例。


在下文中一共展示了vec_normalize.VecNormalize方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def train(env_id, num_timesteps, seed, policy, r_ex_coef, r_in_coef, lr_alpha, lr_beta, reward_freq):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy, MlpPolicyIntrinsicReward
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True
    tf.Session(config=config).__enter__()
    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir())
        return env
    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    if policy == 'mlp':
        policy = MlpPolicy
    elif policy == 'mlp_int':
        policy = MlpPolicyIntrinsicReward
    else:
        raise NotImplementedError
    ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
        lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
        ent_coef=0.0,
        lr_alpha=lr_alpha,
        cliprange=0.2,
        total_timesteps=num_timesteps,
        r_ex_coef=r_ex_coef,
        r_in_coef=r_in_coef,
        lr_beta=lr_beta,
        reward_freq=reward_freq) 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:40,代码来源:run_mujoco.py

示例2: train

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def train(env_id, num_timesteps, seed):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.Session(config=config).__enter__()
    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir())
        return env
    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    policy = MlpPolicy
    ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
        lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
        ent_coef=0.0,
        lr=3e-4,
        cliprange=0.2,
        total_timesteps=num_timesteps) 
开发者ID:bowenliu16,项目名称:rl_graph_generation,代码行数:30,代码来源:run_mujoco.py

示例3: build_env

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       flatten_dict_observations = alg not in {'her'}
       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

       if env_type == 'mujoco':
           env = VecNormalize(env)

    return env 
开发者ID:jiewwantan,项目名称:StarTrader,代码行数:35,代码来源:run.py

示例4: train

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def train(env_id, num_timesteps, seed, lr,
        sil_update, sil_value, sil_alpha, sil_beta):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2_sil
    from baselines.ppo2.policies import MlpPolicy
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.Session(config=config).__enter__()

    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return env

    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    policy = MlpPolicy
    model = ppo2_sil.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
                       lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
                       ent_coef=0.0,
                       lr=lr,
                       cliprange=0.2,
                       total_timesteps=num_timesteps,
                       sil_update=sil_update,
                       sil_value=sil_value,
                       sil_alpha=sil_alpha,
                       sil_beta=sil_beta,
                       )

    return model, env 
开发者ID:junhyukoh,项目名称:self-imitation-learning,代码行数:40,代码来源:run_mujoco_sil.py

示例5: train

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def train(env_id, num_timesteps, seed):
    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2 import ppo2
    from baselines.ppo2.policies import MlpPolicy
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.Session(config=config).__enter__()

    def make_env():
        env = gym.make(env_id)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return env

    env = DummyVecEnv([make_env])
    env = VecNormalize(env)

    set_global_seeds(seed)
    policy = MlpPolicy
    model = ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32,
                       lam=0.95, gamma=0.99, noptepochs=10, log_interval=1,
                       ent_coef=0.0,
                       lr=3e-4,
                       cliprange=0.2,
                       total_timesteps=num_timesteps)

    return model, env 
开发者ID:junhyukoh,项目名称:self-imitation-learning,代码行数:34,代码来源:run_mujoco.py

示例6: build_env

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed    

    env_type, env_id = get_env_type(args.env)
    if env_type == 'mujoco':
        get_session(tf.ConfigProto(allow_soft_placement=True,
                                   intra_op_parallelism_threads=1, 
                                   inter_op_parallelism_threads=1))

        if args.num_env:
            env = SubprocVecEnv([lambda: make_mujoco_env(env_id, seed + i if seed is not None else None, args.reward_scale) for i in range(args.num_env)])    
        else:
            env = DummyVecEnv([lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        env = VecNormalize(env)

    elif env_type == 'atari':
        if alg == 'acer':
            env = make_atari_env(env_id, nenv, seed)#, wrapper_kwargs={'clip_rewards': False})
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_atari_env(env_id, nenv, seed), frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)
        
    elif env_type == 'classic_control':
        def make_env():
            e = gym.make(env_id)
            e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
            e.seed(seed)
            return e
            
        env = DummyVecEnv([make_env])

    else:
        raise ValueError('Unknown env_type {}'.format(env_type))

    return env 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:63,代码来源:run.py

示例7: build_env

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

    if args.custom_reward != '':
        from baselines.common.vec_env import VecEnv, VecEnvWrapper
        import baselines.common.custom_reward_wrapper as W
        assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)

        custom_reward_kwargs = eval(args.custom_reward_kwargs)

        if args.custom_reward == 'live_long':
            env = W.VecLiveLongReward(env,**custom_reward_kwargs)
        elif args.custom_reward == 'random_tf':
            env = W.VecTFRandomReward(env,**custom_reward_kwargs)
        elif args.custom_reward == 'preference':
            env = W.VecTFPreferenceReward(env,**custom_reward_kwargs)
        elif args.custom_reward == 'preference_normalized':
            env = W.VecTFPreferenceRewardNormalized(env,**custom_reward_kwargs)
        else:
            assert False, 'no such wrapper exist'

    if env_type == 'mujoco':
        env = VecNormalize(env)

    return env 
开发者ID:hiwonjoon,项目名称:ICML2019-TREX,代码行数:52,代码来源:run.py

示例8: build_env

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    print(env_id)
    #extract the agc_env_name
    noskip_idx = env_id.find("NoFrameskip")
    env_name = env_id[:noskip_idx].lower()
    print("Env Name for Masking:", env_name)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

    if args.custom_reward != '':
        from baselines.common.vec_env import VecEnv, VecEnvWrapper
        import baselines.common.custom_reward_wrapper as W
        assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)

        custom_reward_kwargs = eval(args.custom_reward_kwargs)

        if args.custom_reward == 'pytorch':
            if args.custom_reward_path == '':
                assert False, 'no path for reward model'
            else:
                env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name)
        else:
            assert False, 'no such wrapper exist'

    if env_type == 'mujoco':
        env = VecNormalize(env)
    # if env_type == 'atari':
    #     input("Normalizing for ATari game: okay? [Enter]")
    #     #normalize rewards but not observations for atari
    #     env = VecNormalizeRewards(env)

    return env 
开发者ID:hiwonjoon,项目名称:ICML2019-TREX,代码行数:59,代码来源:run.py

示例9: build_env

# 需要导入模块: from baselines.common.vec_env import vec_normalize [as 别名]
# 或者: from baselines.common.vec_env.vec_normalize import VecNormalize [as 别名]
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type == 'atari':
        if alg == 'acer':
            env = make_vec_env(env_id, env_type, nenv, seed)
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000,
                                        use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)

    else:
       get_session(tf.ConfigProto(allow_soft_placement=True,
                                   intra_op_parallelism_threads=1,
                                   inter_op_parallelism_threads=1))

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

       if env_type == 'mujoco':
           env = VecNormalize(env)

    return env 
开发者ID:hust512,项目名称:DQN-DDPG_Stock_Trading,代码行数:51,代码来源:run.py


注:本文中的baselines.common.vec_env.vec_normalize.VecNormalize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。