当前位置: 首页>>代码示例>>Python>>正文


Python gym.make方法代码示例

本文整理汇总了Python中gym.make方法的典型用法代码示例。如果您正苦于以下问题:Python gym.make方法的具体用法?Python gym.make怎么用?Python gym.make使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gym的用法示例。


在下文中一共展示了gym.make方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _create_environment

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def _create_environment(config):
  """Constructor for an instance of the environment.

  Args:
    config: Object providing configurations via attributes.

  Returns:
    Wrapped OpenAI Gym environment.
  """
  if isinstance(config.env, str):
    env = gym.make(config.env)
  else:
    env = config.env()
  if config.max_length:
    env = tools.wrappers.LimitDuration(env, config.max_length)
  env = tools.wrappers.RangeNormalize(env)
  env = tools.wrappers.ClipAction(env)
  env = tools.wrappers.ConvertTo32Bit(env)
  return env 
开发者ID:utra-robosoccer,项目名称:soccer-matlab,代码行数:21,代码来源:train.py

示例2: main

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
    env = gym.make('CartPoleBulletEnv-v0')
    act = deepq.load("cartpole_model.pkl")

    while True:
        obs, done = env.reset(), False
        print("obs")
        print(obs)
        print("type(obs)")
        print(type(obs))
        episode_rew = 0
        while not done:
            env.render()
           
            o = obs[None]
            aa = act(o)
            a = aa[0]
            obs, rew, done, _ = env.step(a)
            episode_rew += rew
        print("Episode reward", episode_rew) 
开发者ID:utra-robosoccer,项目名称:soccer-matlab,代码行数:22,代码来源:enjoy_pybullet_cartpole.py

示例3: main

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('exp_name', type=str)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--double_q', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    args = parser.parse_args()
    
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    
    if not(os.path.exists('data')):
        os.makedirs('data')
    
    # Get Atari games.
    task = gym.make('PongNoFrameskip-v4')

    # Run training
    seed = random.randint(0, 9999)
    print('random seed = %d' % seed)
    env = get_env(task, seed)
    session = get_session()
    atari_learn(env, session, args, num_timesteps=5e7) 
开发者ID:xuwd11,项目名称:cs294-112_hws,代码行数:25,代码来源:run_dqn_atari.py

示例4: main

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
    env = gym.make("MountainCar-v0")
    # Enabling layer_norm here is import for parameter space noise!
    model = deepq.models.mlp([64], layer_norm=True)
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        print_freq=10,
        param_noise=True
    )
    print("Saving model to mountaincar_model.pkl")
    act.save("mountaincar_model.pkl") 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:19,代码来源:train_mountaincar.py

示例5: main

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
    env = gym.make("CartPole-v0")
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl") 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:18,代码来源:train_cartpole.py

示例6: test_monitor

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_monitor():
    env = gym.make("CartPole-v1")
    env.seed(0)
    mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4()
    menv = Monitor(env, mon_file)
    menv.reset()
    for _ in range(1000):
        _, _, done, _ = menv.step(0)
        if done:
            menv.reset()

    f = open(mon_file, 'rt')

    firstline = f.readline()
    assert firstline.startswith('#')
    metadata = json.loads(firstline[1:])
    assert metadata['env_id'] == "CartPole-v1"
    assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'},  "Incorrect keys in monitor metadata"

    last_logline = pandas.read_csv(f, index_col=None)
    assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
    f.close()
    os.remove(mon_file) 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:25,代码来源:monitor.py

示例7: test_cartpole

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_cartpole(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn to balance the cartpole
    '''

    kwargs = common_kwargs.copy()
    kwargs.update(learn_kwargs[alg])

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    def env_fn(): 
        
        env = gym.make('CartPole-v0')
        env.seed(0)
        return env

    reward_per_episode_test(env_fn, learn_fn, 100) 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:19,代码来源:test_cartpole.py

示例8: make_mujoco_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)

    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)

    return env 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:19,代码来源:cmd_util.py

示例9: main

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy
    import numpy as np

    env = gym.make(env_id)
    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        while True:
            rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return 
开发者ID:openai,项目名称:evolution-strategies-starter,代码行数:27,代码来源:viz.py

示例10: make_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(args, seed, test):
    if args.env.startswith('Roboschool'):
        # Check gym version because roboschool does not work with gym>=0.15.6
        from distutils.version import StrictVersion
        gym_version = StrictVersion(gym.__version__)
        if gym_version >= StrictVersion('0.15.6'):
            raise RuntimeError('roboschool does not work with gym>=0.15.6')
        import roboschool  # NOQA
    env = gym.make(args.env)
    # Unwrap TimiLimit wrapper
    assert isinstance(env, gym.wrappers.TimeLimit)
    env = env.env
    # Use different random seeds for train and test envs
    env_seed = 2 ** 32 - 1 - seed if test else seed
    env.seed(int(env_seed))
    # Cast observations to float32 because our model uses float32
    env = chainerrl.wrappers.CastObservationToFloat32(env)
    # Normalize action space to [-1, 1]^n
    env = chainerrl.wrappers.NormalizeActionSpace(env)
    if args.monitor:
        env = chainerrl.wrappers.Monitor(
            env, args.outdir, force=True, video_callable=lambda _: True)
    if args.render:
        env = chainerrl.wrappers.Render(env, mode='human')
    return env 
开发者ID:chainer,项目名称:chainerrl,代码行数:27,代码来源:train_soft_actor_critic_atlas.py

示例11: test_scale_reward

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_scale_reward(self):
        env = chainerrl.wrappers.ScaleReward(
            gym.make(self.env_id), scale=self.scale)
        self.assertIsNone(env.original_reward)
        self.assertAlmostEqual(env.scale, self.scale)

        _ = env.reset()
        _, r, _, _ = env.step(env.action_space.sample())

        if self.env_id == 'CartPole-v1':
            # Original reward must be 1
            self.assertAlmostEqual(env.original_reward, 1)
            self.assertAlmostEqual(r, self.scale)
        elif self.env_id == 'MountainCar-v0':
            # Original reward must be -1
            self.assertAlmostEqual(env.original_reward, -1)
            self.assertAlmostEqual(r, -self.scale)
        else:
            assert False 
开发者ID:chainer,项目名称:chainerrl,代码行数:21,代码来源:test_scale_reward.py

示例12: make_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = SimpleMonitor(env)
    env = wrap_dqn(env)
    return env 
开发者ID:StephanZheng,项目名称:neural-fingerprinting,代码行数:7,代码来源:enjoy-adv.py

示例13: make_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = SimpleMonitor(env)
    env = wrap_dqn(monitored_env)
    return env, monitored_env 
开发者ID:StephanZheng,项目名称:neural-fingerprinting,代码行数:7,代码来源:train.py

示例14: make_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(self):
        return gym.make(self.game) 
开发者ID:awslabs,项目名称:dynamic-training-with-apache-mxnet-on-aws,代码行数:4,代码来源:rl_data.py

示例15: get_env

# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def get_env(env_str):
  return gym.make(env_str) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:4,代码来源:gym_wrapper.py


注:本文中的gym.make方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。