本文整理汇总了Python中gym.spaces.discrete.Discrete方法的典型用法代码示例。如果您正苦于以下问题:Python discrete.Discrete方法的具体用法?Python discrete.Discrete怎么用?Python discrete.Discrete使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.spaces.discrete
的用法示例。
在下文中一共展示了discrete.Discrete方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_space_size
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def get_space_size(space):
if isinstance(space, Box):
return space.shape
elif isinstance(space, Discrete):
return [1, ] # space.n
else:
raise NotImplementedError("Assuming to use Box or Discrete, not {}".format(type(space)))
示例2: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
env: gym.Env,
parameters: Union[None, str, np.ndarray] = None,
action_in_domain: bool = False,
next_state_in_domain: bool = False):
"""
Parameters
----------
env: gym.Env
A gym environment for which the reward function is defined.
parameters: Union[None, str, np.ndarray]
A numpy ndarray containing the values for all elements in the reward table.
If value is 'random', initializes with random parameters (mean 0, standard deviation 1).
The size of parameters must correspond to the size of the domain
(one table value for each possible input)
action_in_domain: bool
Indicates whether actions are in the domain, i.e. R(s, a) or R(s, a, s')
next_state_in_domain: bool
Indicates whether next states are in the domain, i.e. R(s, a, s')
"""
super(TabularRewardFunction, self).__init__(
env, parameters, action_in_domain, next_state_in_domain)
# this reward function is only implemented for
# discrete state and action spaces
assert isinstance(env.observation_space, DiscreteSpace)
assert isinstance(env.action_space, DiscreteSpace)
# calculate number of elements in domain:
self.domain_size = self.env.observation_space.n
if self.action_in_domain:
self.domain_size *= self.env.action_space.n
if self.next_state_in_domain:
self.domain_size *= self.env.observation_space.n
if parameters == 'random':
self.parameters = np.random.standard_normal(size=self.domain_size)
else:
self.parameters = np.array(parameters)
assert len(self.parameters) == self.domain_size
示例3: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
name,
num_envs=1,
single_process=True,
log_dir=None,
episode_life=True,
seed=None):
if seed is None:
seed = np.random.randint(int(1e9))
if log_dir is not None:
mkdir(log_dir)
envs = [make_env(name, seed, i, episode_life) for i in range(num_envs)]
if single_process:
Wrapper = DummyVecEnv
else:
Wrapper = SubprocVecEnv
self.env = Wrapper(envs)
self.name = name
self.observation_space = self.env.observation_space
self.state_dim = int(np.prod(self.env.observation_space.shape))
self.action_space = self.env.action_space
if isinstance(self.action_space, Discrete):
self.action_dim = self.action_space.n
elif isinstance(self.action_space, Box):
self.action_dim = self.action_space.shape[0]
else:
assert 'unknown action space'
示例4: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, n, goal_length, num_distractor, distractor_length, max_steps=10**6, collect_key=True, world=None):
self.goal_length = goal_length
self.num_distractor = num_distractor
self.distractor_length = distractor_length
self.n = n
self.num_pairs = goal_length - 1 + distractor_length * num_distractor
self.collect_key = collect_key # if True, keys are collected immediately when available
# Penalties and Rewards
self.step_cost = 0
self.reward_gem = 10
self.reward_key = 1
self.reward_distractor = -1
# Other Settings
self.viewer = None
self.max_steps = max_steps
self.action_space = Discrete(len(ACTION_LOOKUP))
self.observation_space = Box(low=0, high=255, shape=(n+2, n+2, 3), dtype=np.uint8)
# Game initialization
self.owned_key = [220, 220, 220]
self.np_random_seed = None
self.reset(world)
self.num_env_steps = 0
self.episode_reward = 0
self.last_frames = deque(maxlen=3)
示例5: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, min_value, max_value, is_discrete):
self.__min_value = min_value
self.__max_value = max_value
if self.is_scaler(min_value) or self.is_scaler(max_value):
self.__shape = list([1])
elif isinstance(min_value, (list, np.ndarray, tuple)) and isinstance(max_value, (list, np.ndarray, tuple)):
shape = np.asarray(min_value).shape
shape2 = np.asarray(max_value).shape
if shape != shape2:
raise ValueError("Shape of min_value and max_value are mismatched !")
if shape[0] == 0:
raise ValueError("No value error")
self.__shape = list(shape)
if len(min_value) == 1 and shape[0] == 1:
self.__min_value = min_value[0]
self.__max_value = max_value[0]
self.__shape = list([1])
else:
raise ValueError("Unsupported type format !")
self.__discrete = is_discrete
if is_discrete and (isinstance(min_value, float) or isinstance(max_value, float)):
raise ValueError('Discrete data cannot be a real number !')
示例6: convert_openai_space
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def convert_openai_space(space):
from gym.spaces.box import Box
from gym.spaces.discrete import Discrete
if isinstance(space, Box):
return Space(space.low, space.high, False)
elif isinstance(space, Discrete):
return Space(0, space.n-1, True)
else:
raise ValueError("Does not support other types than Box and Discrete")
示例7: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=3,
num_gen_steps=None):
super(PushAndPullSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
self.boxes_are_on_target = [False] * num_boxes
self.action_space = Discrete(len(ACTION_LOOKUP))
_ = self.reset()
示例8: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=4,
num_gen_steps=None,
reset=True):
# General Configuration
self.dim_room = dim_room
if num_gen_steps == None:
self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
else:
self.num_gen_steps = num_gen_steps
self.num_boxes = num_boxes
self.boxes_on_target = 0
# Penalties and Rewards
self.penalty_for_step = -0.1
self.penalty_box_off_target = -1
self.reward_box_on_target = 1
self.reward_finished = 10
self.reward_last = 0
# Other Settings
self.viewer = None
self.max_steps = max_steps
self.action_space = Discrete(len(ACTION_LOOKUP))
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
if reset:
# Initialize Room
_ = self.reset()
示例9: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=3,
num_gen_steps=None):
super(TwoPlayerSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps, reset=False)
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
self.boxes_are_on_target = [False] * num_boxes
self.action_space = Discrete(len(ACTION_LOOKUP))
self.player_position = []
self.player_positions = {0: [0,0], 1: [1,1]}
_ = self.reset(second_player=True)
示例10: action_space
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def action_space(self):
"""See class definition."""
if self.discrete:
return Discrete(2 ** self.num_traffic_lights)
else:
return Box(
low=-1,
high=1,
shape=(self.num_traffic_lights,),
dtype=np.float32)
示例11: action_space
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def action_space(self):
"""See class definition."""
if self.discrete:
return Discrete(2)
else:
return Box(
low=-1,
high=1,
shape=(1,),
dtype=np.float32)
示例12: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, size, sleep=0, dict_state=False):
self.size = size
self.sleep = sleep
self.dict_state = dict_state
self.action_space = Discrete(2)
self.reset()
示例13: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, map_id: int = 0):
"""
Parameters
----------
map_id: int
Which version of the game to play. Legal values are 0 and 1. Default is 0.
Corresponds to the hard-coded maps MAP0 and MAP1 defined in the same module.
"""
if map_id == 0:
used_map = MAP0
elif map_id == 1:
used_map = MAP1
# if adding more values here, also adapt docstring above.
else:
raise NotImplementedError()
# get arrays of walls and reward fields:
self.map_walls, self.map_rewards = get_maps(used_map)
self.rews_where = np.where(self.map_rewards > 0)
# get list of coordinates for reward fields:
self.rew_coords = get_rew_coords(self.map_rewards)
# get number of rewards in map_id:
self.num_rewards = len(self.rews_where[0])
# calculate all possible paths, using the 'pathfinding' library.
self.paths = {}
matrix = np.swapaxes(np.abs(self.map_walls - 1.0), 0, 1).tolist()
finder = AStarFinder(diagonal_movement=DiagonalMovement.never)
for i in range(self.num_rewards):
self.paths[i] = {}
for j in range(self.num_rewards):
grid = Grid(matrix=matrix)
start = grid.node(self.rews_where[0][i], self.rews_where[1][i])
end = grid.node(self.rews_where[0][j], self.rews_where[1][j])
path, _ = finder.find_path(start, end, grid)
path = [[int(x[0]), int(x[1])] for x in path]
self.paths[i][j] = path
super(MazeWorld, self).__init__()
# set observation space and action space:
self.observation_space = MultiBinary(self.num_rewards * 2)
self.action_space = Discrete(self.num_rewards)
self.current_state = None
self.terminated = True
示例14: __init__
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, name):
"""
Init function.
:param name: gym task name
"""
self.reset_task()
self.__envir = gym.make(name) # gym environment
self.__envir_name = name # environment name
self.__obser_size = self.__envir.observation_space.shape[0] # the number of parameters in observation
self.__obser_up_bound = [] # the upper bound of parameters in observation
self.__obser_low_bound = [] # the lower bound of parameters in observation
self.total_step = 0 # total s
self.__action_size = None # the number of parameters in action
self.__action_sca = [] # environment action space, specified by gym
self.__action_type = [] # the type of action, false means discrete
self.__action_low_bound = [] # action lower bound
self.__action_up_bound = [] # action upper bound
# policy model, it's a neural network in this example
self.__policy_model = None
self.__max_step = 0 # maximum stop step
self.__stop_step = 0 # the stop step in recent trajectory
for i in range(self.__obser_size):
self.__obser_low_bound.append(
self.__envir.observation_space.high[i])
self.__obser_up_bound.append(self.__envir.observation_space.low[i])
# if the dimension of action space is one
if isinstance(self.__envir.action_space, Discrete):
self.__action_size = 1
self.__action_sca = []
self.__action_type = []
self.__action_sca.append(self.__envir.action_space.n)
self.__action_type.append(False)
# if action object is Box
else:
self.__action_size = self.__envir.action_space.shape[0]
self.__action_type = []
self.__action_low_bound = []
self.__action_up_bound = []
for i in range(self.__action_size):
self.__action_type.append(True)
self.__action_low_bound.append(
self.__envir.action_space.low[i])
self.__action_up_bound.append(
self.__envir.action_space.high[i])
示例15: test_vecenv
# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def test_vecenv(size=10, num=8, sleep=0.001):
verbose = __name__ == '__main__'
env_fns = [
lambda i=i: MyTestEnv(size=i, sleep=sleep)
for i in range(size, size + num)
]
venv = [
VectorEnv(env_fns),
SubprocVectorEnv(env_fns),
]
if verbose:
venv.append(RayVectorEnv(env_fns))
for v in venv:
v.seed()
action_list = [1] * 5 + [0] * 10 + [1] * 20
if not verbose:
o = [v.reset() for v in venv]
for i, a in enumerate(action_list):
o = []
for v in venv:
A, B, C, D = v.step([a] * num)
if sum(C):
A = v.reset(np.where(C)[0])
o.append([A, B, C, D])
for i in zip(*o):
for j in range(1, len(i) - 1):
assert (i[0] == i[j]).all()
else:
t = [0, 0, 0]
for i, e in enumerate(venv):
t[i] = time.time()
e.reset()
for a in action_list:
done = e.step([a] * num)[2]
if sum(done) > 0:
e.reset(np.where(done)[0])
t[i] = time.time() - t[i]
print(f'VectorEnv: {t[0]:.6f}s')
print(f'SubprocVectorEnv: {t[1]:.6f}s')
print(f'RayVectorEnv: {t[2]:.6f}s')
for v in venv:
assert v.size == list(range(size, size + num))
assert v.env_num == num
assert v.action_space == [Discrete(2)] * num
for v in venv:
v.close()