当前位置: 首页>>代码示例>>Python>>正文


Python subproc_vec_env.SubprocVecEnv方法代码示例

本文整理汇总了Python中baselines.common.vec_env.subproc_vec_env.SubprocVecEnv方法的典型用法代码示例。如果您正苦于以下问题:Python subproc_vec_env.SubprocVecEnv方法的具体用法?Python subproc_vec_env.SubprocVecEnv怎么用?Python subproc_vec_env.SubprocVecEnv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在baselines.common.vec_env.subproc_vec_env的用法示例。


在下文中一共展示了subproc_vec_env.SubprocVecEnv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_env_after_learn

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def test_env_after_learn(algo):
    def make_env():
        # acktr requires too much RAM, fails on travis
        env = gym.make('CartPole-v1' if algo == 'acktr' else 'PongNoFrameskip-v4')
        return env

    make_session(make_default=True, graph=tf.Graph())
    env = SubprocVecEnv([make_env])

    learn = get_learn_function(algo)

    # Commenting out the following line resolves the issue, though crash happens at env.reset().
    learn(network='mlp', env=env, total_timesteps=0, load_path=None, seed=None)

    env.reset()
    env.close() 
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:18,代码来源:test_env_after_learn.py

示例2: make_vec_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, gamestate=None):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
    """
    if wrapper_kwargs is None: wrapper_kwargs = {}
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = seed + 10000 * mpi_rank if seed is not None else None
    def make_thunk(rank):
        return lambda: make_env(
            env_id=env_id,
            env_type=env_type,
            subrank = rank,
            seed=seed,
            reward_scale=reward_scale,
            gamestate=gamestate,
            wrapper_kwargs=wrapper_kwargs
        )

    set_global_seeds(seed)
    if num_env > 1:
        return SubprocVecEnv([make_thunk(i + start_index) for i in range(num_env)])
    else:
        return DummyVecEnv([make_thunk(start_index)]) 
开发者ID:hiwonjoon,项目名称:ICML2019-TREX,代码行数:25,代码来源:cmd_util.py

示例3: make_atari_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_atari_env(env_id, num_env, seed, hparams=None, wrapper_kwargs=None, start_index=0, nsteps=5, **kwargs):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari.
    """
    if wrapper_kwargs is None: wrapper_kwargs = {}
    def make_env(rank): # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))

            if rank == start_index and 'video_log_dir' in kwargs:
                env = VideoLogMonitor(env, kwargs['video_log_dir'] + '_rgb', write_attention_video=kwargs['write_attention_video'], hparams=hparams, nsteps=nsteps)

            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)

    env_fns = [make_env(i + start_index) for i in range(num_env)]

    global my_subproc_vec_env
    assert my_subproc_vec_env == None
    my_subproc_vec_env = SubprocVecEnv(env_fns)

    return my_subproc_vec_env 
开发者ID:vik-goel,项目名称:MOREL,代码行数:27,代码来源:cmd_util.py

示例4: main

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101
    env_fns, env_names = create_envs()
    with tf.Session(config=config):
        # Take more timesteps than we need to be sure that
        # we stop due to an exception.
        ppo2.learn(policy=policies.CnnPolicy,
                   env=SubprocVecEnv(env_fns),
                   nsteps=4096, 
                   nminibatches=8, 
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=3, 
                   log_interval=1, 
                   ent_coef=0.001,
                   lr=lambda _: 2e-4,
                   cliprange=lambda _: 0.1, 
                   total_timesteps=int(1e9),
                   save_interval=10,
                   save_path='checkpoints_expert_'+expert_name,
                   load_path='./checkpoints_joint_ppo2/00300') # Pretrained model 
开发者ID:flyyufelix,项目名称:sonic_contest,代码行数:25,代码来源:ppo2_expert.py

示例5: main

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def main():
    """Run PPO until the environment throws an exception."""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101
    env_fns, env_names = create_envs()
    with tf.Session(config=config):
        # Take more timesteps than we need to be sure that
        # we stop due to an exception.
        ppo2.learn(policy=policies.CnnPolicy,
                   env=SubprocVecEnv(env_fns),
                   nsteps=4096, 
                   nminibatches=8, 
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=3, 
                   log_interval=1, 
                   ent_coef=0.01,
                   lr=lambda _: 2e-4,
                   cliprange=lambda _: 0.1, 
                   total_timesteps=int(1e9),
                   save_interval=10,
                   save_path='./checkpoints_joint_ppo2',
                   load_path=None) 
开发者ID:flyyufelix,项目名称:sonic_contest,代码行数:25,代码来源:ppo2_joint.py

示例6: _make_atari_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def _make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari. This is the same as the one used
     in OpenAI Baselines.
    """
    from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv

    wrapper_kwargs = {} if wrapper_kwargs is None else wrapper_kwargs

    def make_env(rank):  # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(
                env,
                logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
                allow_early_resets=True,
            )
            return wrap_deepmind(env, **wrapper_kwargs)

        return _thunk

    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) 
开发者ID:Guillemdb,项目名称:FractalAI,代码行数:26,代码来源:baselines.py

示例7: _make_atari_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def _make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari. This is the same as the one used
     in OpenAI Baselines.
    """
    from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
    wrapper_kwargs = {} if wrapper_kwargs is None else wrapper_kwargs
    
    def make_env(rank):  # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
                          allow_early_resets=True)
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) 
开发者ID:FragileTech,项目名称:FractalAI,代码行数:20,代码来源:baselines.py

示例8: train

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            gym.logger.setLevel(logging.WARN)
            return wrap_deepmind(env)
        return _thunk
    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
开发者ID:cxxgtxy,项目名称:deeprl-baselines,代码行数:21,代码来源:run_atari.py

示例9: train

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            gym.logger.setLevel(logging.WARN)
            return wrap_deepmind(env)
        return _thunk
    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    if policy == 'cnn':
        policy_fn = AcerCnnPolicy
    elif policy == 'lstm':
        policy_fn = AcerLstmPolicy
    else:
        print("Policy {} not implemented".format(policy))
        return
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close() 
开发者ID:cxxgtxy,项目名称:deeprl-baselines,代码行数:22,代码来源:run_atari.py

示例10: train

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def train(env_id, num_frames, seed, policy, lrschedule, num_cpu):
    num_timesteps = int(num_frames / 4 * 1.1) 
    # divide by 4 due to frameskip, then do a little extras so episodes end
    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and 
                os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
            gym.logger.setLevel(logging.WARN)
            return wrap_deepmind(env)
        return _thunk
    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'linear':
        policy_fn = LinearPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    learn(policy_fn, env, seed, total_timesteps=num_timesteps, lrschedule=lrschedule)
    env.close() 
开发者ID:wgrathwohl,项目名称:BackpropThroughTheVoidRL,代码行数:26,代码来源:run_atari.py

示例11: make_atari_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari.
    """
    if wrapper_kwargs is None: wrapper_kwargs = {}
    def make_env(rank): # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:16,代码来源:cmd_util.py

示例12: make_atari_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari.
    """
    if wrapper_kwargs is None: wrapper_kwargs = {}
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    def make_env(rank): # pylint: disable=C0111
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)))
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)
    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:17,代码来源:cmd_util.py

示例13: _make_a2c

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def _make_a2c(env_path, num_env, seed, reward_range, base_port, unity_arguments):
    """
    Create wrapped SubprocVecEnv for using A2C on a Unity-Environment
    """
    def make_env(rank):
        def _thunk():
            return _wrap_unity_env(env_path, seed, base_port, unity_arguments, rank, reward_range)
        return _thunk
    return SubprocVecEnv([make_env(i) for i in range(num_env)]) 
开发者ID:ArztSamuel,项目名称:DRL_DeliveryDuel,代码行数:11,代码来源:run_a2c.py

示例14: make_vec_envs

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep,
                  device, allow_early_resets, num_frame_stack=None):
    envs = [make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets)
            for i in range(num_processes)]

    if len(envs) > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

    envs = VecPyTorch(envs, device)

    # if num_frame_stack is not None:
    #     envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    # elif len(envs.observation_space.shape) == 3:
    #     envs = VecPyTorchFrameStack(envs, 4, device)
    
    return envs


# Can be used to test recurrent policies for Reacher-v2 
开发者ID:montrealrobotics,项目名称:dal,代码行数:29,代码来源:envs.py

示例15: make_vec_env

# 需要导入模块: from baselines.common.vec_env import subproc_vec_env [as 别名]
# 或者: from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv [as 别名]
def make_vec_env(env_id, env_type, num_env, seed,
                 wrapper_kwargs=None,
                 env_kwargs=None,
                 start_index=0,
                 reward_scale=1.0,
                 flatten_dict_observations=True,
                 gamestate=None,
                 initializer=None,
                 force_dummy=False):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
    """
    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = seed + 10000 * mpi_rank if seed is not None else None
    logger_dir = logger.get_dir()
    def make_thunk(rank, initializer=None):
        return lambda: make_env(
            env_id=env_id,
            env_type=env_type,
            mpi_rank=mpi_rank,
            subrank=rank,
            seed=seed,
            reward_scale=reward_scale,
            gamestate=gamestate,
            flatten_dict_observations=flatten_dict_observations,
            wrapper_kwargs=wrapper_kwargs,
            env_kwargs=env_kwargs,
            logger_dir=logger_dir,
            initializer=initializer
        )

    set_global_seeds(seed)
    if not force_dummy and num_env > 1:
        return SubprocVecEnv([make_thunk(i + start_index, initializer=initializer) for i in range(num_env)])
    else:
        return DummyVecEnv([make_thunk(i + start_index, initializer=None) for i in range(num_env)]) 
开发者ID:openai,项目名称:baselines,代码行数:40,代码来源:cmd_util.py


注:本文中的baselines.common.vec_env.subproc_vec_env.SubprocVecEnv方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。