本文整理汇总了Python中gym.make方法的典型用法代码示例。如果您正苦于以下问题:Python gym.make方法的具体用法?Python gym.make怎么用?Python gym.make使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym
的用法示例。
在下文中一共展示了gym.make方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_environment
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def _create_environment(config):
"""Constructor for an instance of the environment.
Args:
config: Object providing configurations via attributes.
Returns:
Wrapped OpenAI Gym environment.
"""
if isinstance(config.env, str):
env = gym.make(config.env)
else:
env = config.env()
if config.max_length:
env = tools.wrappers.LimitDuration(env, config.max_length)
env = tools.wrappers.RangeNormalize(env)
env = tools.wrappers.ClipAction(env)
env = tools.wrappers.ConvertTo32Bit(env)
return env
示例2: main
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
env = gym.make('CartPoleBulletEnv-v0')
act = deepq.load("cartpole_model.pkl")
while True:
obs, done = env.reset(), False
print("obs")
print(obs)
print("type(obs)")
print(type(obs))
episode_rew = 0
while not done:
env.render()
o = obs[None]
aa = act(o)
a = aa[0]
obs, rew, done, _ = env.step(a)
episode_rew += rew
print("Episode reward", episode_rew)
示例3: main
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('exp_name', type=str)
parser.add_argument('--gamma', type=float, default=0.99)
parser.add_argument('--double_q', action='store_true')
parser.add_argument('--gpu', type=int, default=0)
args = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
if not(os.path.exists('data')):
os.makedirs('data')
# Get Atari games.
task = gym.make('PongNoFrameskip-v4')
# Run training
seed = random.randint(0, 9999)
print('random seed = %d' % seed)
env = get_env(task, seed)
session = get_session()
atari_learn(env, session, args, num_timesteps=5e7)
示例4: main
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
env = gym.make("MountainCar-v0")
# Enabling layer_norm here is import for parameter space noise!
model = deepq.models.mlp([64], layer_norm=True)
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.1,
print_freq=10,
param_noise=True
)
print("Saving model to mountaincar_model.pkl")
act.save("mountaincar_model.pkl")
示例5: main
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main():
env = gym.make("CartPole-v0")
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to cartpole_model.pkl")
act.save("cartpole_model.pkl")
示例6: test_monitor
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_monitor():
env = gym.make("CartPole-v1")
env.seed(0)
mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4()
menv = Monitor(env, mon_file)
menv.reset()
for _ in range(1000):
_, _, done, _ = menv.step(0)
if done:
menv.reset()
f = open(mon_file, 'rt')
firstline = f.readline()
assert firstline.startswith('#')
metadata = json.loads(firstline[1:])
assert metadata['env_id'] == "CartPole-v1"
assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'}, "Incorrect keys in monitor metadata"
last_logline = pandas.read_csv(f, index_col=None)
assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
f.close()
os.remove(mon_file)
示例7: test_cartpole
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_cartpole(alg):
'''
Test if the algorithm (with an mlp policy)
can learn to balance the cartpole
'''
kwargs = common_kwargs.copy()
kwargs.update(learn_kwargs[alg])
learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
def env_fn():
env = gym.make('CartPole-v0')
env.seed(0)
return env
reward_per_episode_test(env_fn, learn_fn, 100)
示例8: make_mujoco_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_mujoco_env(env_id, seed, reward_scale=1.0):
"""
Create a wrapped, monitored gym.Env for MuJoCo.
"""
rank = MPI.COMM_WORLD.Get_rank()
myseed = seed + 1000 * rank if seed is not None else None
set_global_seeds(myseed)
env = gym.make(env_id)
logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
env = Monitor(env, logger_path, allow_early_resets=True)
env.seed(seed)
if reward_scale != 1.0:
from baselines.common.retro_wrappers import RewardScaler
env = RewardScaler(env, reward_scale)
return env
示例9: main
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def main(env_id, policy_file, record, stochastic, extra_kwargs):
import gym
from gym import wrappers
import tensorflow as tf
from es_distributed.policies import MujocoPolicy
import numpy as np
env = gym.make(env_id)
if record:
import uuid
env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)
if extra_kwargs:
import json
extra_kwargs = json.loads(extra_kwargs)
with tf.Session():
pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
while True:
rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
print('return={:.4f} len={}'.format(rews.sum(), t))
if record:
env.close()
return
示例10: make_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(args, seed, test):
if args.env.startswith('Roboschool'):
# Check gym version because roboschool does not work with gym>=0.15.6
from distutils.version import StrictVersion
gym_version = StrictVersion(gym.__version__)
if gym_version >= StrictVersion('0.15.6'):
raise RuntimeError('roboschool does not work with gym>=0.15.6')
import roboschool # NOQA
env = gym.make(args.env)
# Unwrap TimiLimit wrapper
assert isinstance(env, gym.wrappers.TimeLimit)
env = env.env
# Use different random seeds for train and test envs
env_seed = 2 ** 32 - 1 - seed if test else seed
env.seed(int(env_seed))
# Cast observations to float32 because our model uses float32
env = chainerrl.wrappers.CastObservationToFloat32(env)
# Normalize action space to [-1, 1]^n
env = chainerrl.wrappers.NormalizeActionSpace(env)
if args.monitor:
env = chainerrl.wrappers.Monitor(
env, args.outdir, force=True, video_callable=lambda _: True)
if args.render:
env = chainerrl.wrappers.Render(env, mode='human')
return env
示例11: test_scale_reward
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def test_scale_reward(self):
env = chainerrl.wrappers.ScaleReward(
gym.make(self.env_id), scale=self.scale)
self.assertIsNone(env.original_reward)
self.assertAlmostEqual(env.scale, self.scale)
_ = env.reset()
_, r, _, _ = env.step(env.action_space.sample())
if self.env_id == 'CartPole-v1':
# Original reward must be 1
self.assertAlmostEqual(env.original_reward, 1)
self.assertAlmostEqual(r, self.scale)
elif self.env_id == 'MountainCar-v0':
# Original reward must be -1
self.assertAlmostEqual(env.original_reward, -1)
self.assertAlmostEqual(r, -self.scale)
else:
assert False
示例12: make_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(game_name):
env = gym.make(game_name + "NoFrameskip-v4")
env = SimpleMonitor(env)
env = wrap_dqn(env)
return env
示例13: make_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(game_name):
env = gym.make(game_name + "NoFrameskip-v4")
monitored_env = SimpleMonitor(env)
env = wrap_dqn(monitored_env)
return env, monitored_env
示例14: make_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def make_env(self):
return gym.make(self.game)
示例15: get_env
# 需要导入模块: import gym [as 别名]
# 或者: from gym import make [as 别名]
def get_env(env_str):
return gym.make(env_str)