本文整理汇总了Python中gym.spaces.Discrete方法的典型用法代码示例。如果您正苦于以下问题:Python spaces.Discrete方法的具体用法?Python spaces.Discrete怎么用?Python spaces.Discrete使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.spaces
的用法示例。
在下文中一共展示了spaces.Discrete方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(self, renders=True):
# start the bullet physics server
self._renders = renders
if (renders):
p.connect(p.GUI)
else:
p.connect(p.DIRECT)
observation_high = np.array([
np.finfo(np.float32).max,
np.finfo(np.float32).max,
np.finfo(np.float32).max,
np.finfo(np.float32).max])
action_high = np.array([0.1])
self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Box(-observation_high, observation_high)
self.theta_threshold_radians = 1
self.x_threshold = 2.4
self._seed()
# self.reset()
self.viewer = None
self._configure()
示例2: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(self):
self._seed()
self.viewer = None
self.world = Box2D.b2World()
self.moon = None
self.lander = None
self.particles = []
self.prev_reward = None
high = np.array([np.inf]*N_OBS_DIM) # useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(-high, high)
self.action_space = spaces.Discrete(N_ACT_DIM)
self.curr_step = None
self._reset()
示例3: observation_placeholder
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
'''
Create placeholder to feed observations into of the size appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
batch_size: int size of the batch to be fed into input. Can be left None in most cases.
name: str name of the placeholder
Returns:
-------
tensorflow placeholder tensor
'''
assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
'Can only deal with Discrete and Box observation spaces for now'
return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
示例4: encode_observation
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def encode_observation(ob_space, placeholder):
'''
Encode input in the way that is appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
placeholder: tf.placeholder observation input placeholder
'''
if isinstance(ob_space, Discrete):
return tf.to_float(tf.one_hot(placeholder, ob_space.n))
elif isinstance(ob_space, Box):
return tf.to_float(placeholder)
else:
raise NotImplementedError
示例5: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(
self,
n_actions=10,
seed=0,
episode_len=100
):
self.np_random = np.random.RandomState()
self.np_random.seed(seed)
self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]
self.action_space = Discrete(n_actions)
self.observation_space = Discrete(1)
self.episode_len = episode_len
self.time = 0
self.reset()
示例6: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(
self,
seed=0,
episode_len=None,
no_images=None
):
from tensorflow.examples.tutorials.mnist import input_data
# we could use temporary directory for this with a context manager and
# TemporaryDirecotry, but then each test that uses mnist would re-download the data
# this way the data is not cleaned up, but we only download it once per machine
mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
with filelock.FileLock(mnist_path + '.lock'):
self.mnist = input_data.read_data_sets(mnist_path)
self.np_random = np.random.RandomState()
self.np_random.seed(seed)
self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
self.action_space = Discrete(10)
self.episode_len = episode_len
self.time = 0
self.no_images = no_images
self.train_mode()
self.reset()
示例7: get_action_type
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def get_action_type(action_space):
'''Method to get the action type to choose prob. dist. to sample actions from NN logits output'''
if isinstance(action_space, spaces.Box):
shape = action_space.shape
assert len(shape) == 1
if shape[0] == 1:
return 'continuous'
else:
return 'multi_continuous'
elif isinstance(action_space, spaces.Discrete):
return 'discrete'
elif isinstance(action_space, spaces.MultiDiscrete):
return 'multi_discrete'
elif isinstance(action_space, spaces.MultiBinary):
return 'multi_binary'
else:
raise NotImplementedError
# action_policy base methods
示例8: set_gym_space_attr
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def set_gym_space_attr(gym_space):
'''Set missing gym space attributes for standardization'''
if isinstance(gym_space, spaces.Box):
setattr(gym_space, 'is_discrete', False)
elif isinstance(gym_space, spaces.Discrete):
setattr(gym_space, 'is_discrete', True)
setattr(gym_space, 'low', 0)
setattr(gym_space, 'high', gym_space.n)
elif isinstance(gym_space, spaces.MultiBinary):
setattr(gym_space, 'is_discrete', True)
setattr(gym_space, 'low', np.full(gym_space.n, 0))
setattr(gym_space, 'high', np.full(gym_space.n, 2))
elif isinstance(gym_space, spaces.MultiDiscrete):
setattr(gym_space, 'is_discrete', True)
setattr(gym_space, 'low', np.zeros_like(gym_space.nvec))
setattr(gym_space, 'high', np.array(gym_space.nvec))
else:
raise ValueError('gym_space not recognized')
示例9: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(self, size=2, discrete=True, partially_observable=False,
episodic=True, deterministic=False):
self.size = size
self.terminal_state = size
self.episodic = episodic
self.partially_observable = partially_observable
self.deterministic = deterministic
self.n_max_offset = 1
# (s_0, ..., s_N) + terminal state + offset
self.n_dim_obs = self.size + 1 + self.n_max_offset
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf,
shape=(self.n_dim_obs,), dtype=np.float32,
)
if discrete:
self.action_space = spaces.Discrete(self.size)
else:
self.action_space = spaces.Box(
low=-1.0, high=1.0,
shape=(self.size,), dtype=np.float32,
)
示例10: observation_placeholder
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
'''
Create placeholder to feed observations into of the size appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
batch_size: int size of the batch to be fed into input. Can be left None in most cases.
name: str name of the placeholder
Returns:
-------
tensorflow placeholder tensor
'''
assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
'Can only deal with Discrete and Box observation spaces for now'
return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
示例11: encode_observation
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def encode_observation(ob_space, placeholder):
'''
Encode input in the way that is appropriate to the observation space
Parameters:
----------
ob_space: gym.Space observation space
placeholder: tf.placeholder observation input placeholder
'''
if isinstance(ob_space, Discrete):
return tf.to_float(tf.one_hot(placeholder, ob_space.n))
elif isinstance(ob_space, Box):
return tf.to_float(placeholder)
else:
raise NotImplementedError
示例12: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(self, file_name, batch_size=128, n_step=1):
# create an offline_env to do fake interaction with agent
self.num_epoch = 0
self.num_record = 0
self._offset = 0
# how many records to read from table at one time
self.batch_size = batch_size
# number of step to reserved for n-step dqn
self.n_step = n_step
# defined the shape of observation and action
# we follow the definition of gym.spaces
# `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input
# actually low/high limitation will not be used by agent but required by gym.spaces
self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,))
self.action_space = Discrete(n=2)
fr = open(file_name)
self.data = fr.readlines()
self.num_record = len(self.data)
fr.close()
示例13: test_setup
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def test_setup(self):
ep = gym_env_problem.GymEnvProblem(
base_env_name="CartPole-v0", batch_size=5)
# Checks that environments were created and they are `batch_size` in number.
ep.assert_common_preconditions()
# Expectations on the observation space.
observation_space = ep.observation_space
self.assertIsInstance(observation_space, Box)
self.assertEqual(observation_space.shape, (4,))
self.assertEqual(observation_space.dtype, np.float32)
# Expectations on the action space.
action_space = ep.action_space
self.assertTrue(isinstance(action_space, Discrete))
self.assertEqual(action_space.shape, ())
self.assertEqual(action_space.dtype, np.int64)
self.assertEqual(ep.num_actions, 2)
# Reward range is infinite here.
self.assertFalse(ep.is_reward_range_finite)
示例14: __init__
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def __init__(self, strict=False):
self.strict = strict
# What about metadata and spec?
self.reward_range = (-1.0, 1.0)
# Action space -- 9 positions that we can chose to mark.
self.action_space = spaces.Discrete(9)
# Observation space -- this hopefully does what we need.
self.observation_space = spaces.Box(
low=-1, high=1, shape=(3, 3), dtype=np.int64)
# Set the seed.
self.np_random = None
self.seed()
# Start the game.
self.board_state = None
self.done = False
self.reset()
示例15: pick_action
# 需要导入模块: from gym import spaces [as 别名]
# 或者: from gym.spaces import Discrete [as 别名]
def pick_action(self, state: Union[int, float, np.ndarray]
) -> Union[int, float, np.ndarray]:
""" Pick an action given a state.
Picks uniformly random from all possible actions, using the environments
action_space.sample() method.
Parameters
----------
state: int
An integer corresponding to a state of a DiscreteEnv.
Not used in this agent.
Returns
-------
Union[int, float, np.ndarray]
An action
"""
# if other spaces are needed, check if their sample method conforms with
# returned type, change if necessary.
assert isinstance(self.env.action_space,
(Box, Discrete, MultiDiscrete, MultiBinary))
return self.env.action_space.sample()