本文整理匯總了Python中gym.spaces.discrete.Discrete方法的典型用法代碼示例。如果您正苦於以下問題:Python discrete.Discrete方法的具體用法?Python discrete.Discrete怎麽用?Python discrete.Discrete使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類gym.spaces.discrete
的用法示例。
在下文中一共展示了discrete.Discrete方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_space_size
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def get_space_size(space):
if isinstance(space, Box):
return space.shape
elif isinstance(space, Discrete):
return [1, ] # space.n
else:
raise NotImplementedError("Assuming to use Box or Discrete, not {}".format(type(space)))
示例2: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self,
env: gym.Env,
parameters: Union[None, str, np.ndarray] = None,
action_in_domain: bool = False,
next_state_in_domain: bool = False):
"""
Parameters
----------
env: gym.Env
A gym environment for which the reward function is defined.
parameters: Union[None, str, np.ndarray]
A numpy ndarray containing the values for all elements in the reward table.
If value is 'random', initializes with random parameters (mean 0, standard deviation 1).
The size of parameters must correspond to the size of the domain
(one table value for each possible input)
action_in_domain: bool
Indicates whether actions are in the domain, i.e. R(s, a) or R(s, a, s')
next_state_in_domain: bool
Indicates whether next states are in the domain, i.e. R(s, a, s')
"""
super(TabularRewardFunction, self).__init__(
env, parameters, action_in_domain, next_state_in_domain)
# this reward function is only implemented for
# discrete state and action spaces
assert isinstance(env.observation_space, DiscreteSpace)
assert isinstance(env.action_space, DiscreteSpace)
# calculate number of elements in domain:
self.domain_size = self.env.observation_space.n
if self.action_in_domain:
self.domain_size *= self.env.action_space.n
if self.next_state_in_domain:
self.domain_size *= self.env.observation_space.n
if parameters == 'random':
self.parameters = np.random.standard_normal(size=self.domain_size)
else:
self.parameters = np.array(parameters)
assert len(self.parameters) == self.domain_size
示例3: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self,
name,
num_envs=1,
single_process=True,
log_dir=None,
episode_life=True,
seed=None):
if seed is None:
seed = np.random.randint(int(1e9))
if log_dir is not None:
mkdir(log_dir)
envs = [make_env(name, seed, i, episode_life) for i in range(num_envs)]
if single_process:
Wrapper = DummyVecEnv
else:
Wrapper = SubprocVecEnv
self.env = Wrapper(envs)
self.name = name
self.observation_space = self.env.observation_space
self.state_dim = int(np.prod(self.env.observation_space.shape))
self.action_space = self.env.action_space
if isinstance(self.action_space, Discrete):
self.action_dim = self.action_space.n
elif isinstance(self.action_space, Box):
self.action_dim = self.action_space.shape[0]
else:
assert 'unknown action space'
示例4: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self, n, goal_length, num_distractor, distractor_length, max_steps=10**6, collect_key=True, world=None):
self.goal_length = goal_length
self.num_distractor = num_distractor
self.distractor_length = distractor_length
self.n = n
self.num_pairs = goal_length - 1 + distractor_length * num_distractor
self.collect_key = collect_key # if True, keys are collected immediately when available
# Penalties and Rewards
self.step_cost = 0
self.reward_gem = 10
self.reward_key = 1
self.reward_distractor = -1
# Other Settings
self.viewer = None
self.max_steps = max_steps
self.action_space = Discrete(len(ACTION_LOOKUP))
self.observation_space = Box(low=0, high=255, shape=(n+2, n+2, 3), dtype=np.uint8)
# Game initialization
self.owned_key = [220, 220, 220]
self.np_random_seed = None
self.reset(world)
self.num_env_steps = 0
self.episode_reward = 0
self.last_frames = deque(maxlen=3)
示例5: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self, min_value, max_value, is_discrete):
self.__min_value = min_value
self.__max_value = max_value
if self.is_scaler(min_value) or self.is_scaler(max_value):
self.__shape = list([1])
elif isinstance(min_value, (list, np.ndarray, tuple)) and isinstance(max_value, (list, np.ndarray, tuple)):
shape = np.asarray(min_value).shape
shape2 = np.asarray(max_value).shape
if shape != shape2:
raise ValueError("Shape of min_value and max_value are mismatched !")
if shape[0] == 0:
raise ValueError("No value error")
self.__shape = list(shape)
if len(min_value) == 1 and shape[0] == 1:
self.__min_value = min_value[0]
self.__max_value = max_value[0]
self.__shape = list([1])
else:
raise ValueError("Unsupported type format !")
self.__discrete = is_discrete
if is_discrete and (isinstance(min_value, float) or isinstance(max_value, float)):
raise ValueError('Discrete data cannot be a real number !')
示例6: convert_openai_space
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def convert_openai_space(space):
from gym.spaces.box import Box
from gym.spaces.discrete import Discrete
if isinstance(space, Box):
return Space(space.low, space.high, False)
elif isinstance(space, Discrete):
return Space(0, space.n-1, True)
else:
raise ValueError("Does not support other types than Box and Discrete")
示例7: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=3,
num_gen_steps=None):
super(PushAndPullSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
self.boxes_are_on_target = [False] * num_boxes
self.action_space = Discrete(len(ACTION_LOOKUP))
_ = self.reset()
示例8: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=4,
num_gen_steps=None,
reset=True):
# General Configuration
self.dim_room = dim_room
if num_gen_steps == None:
self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
else:
self.num_gen_steps = num_gen_steps
self.num_boxes = num_boxes
self.boxes_on_target = 0
# Penalties and Rewards
self.penalty_for_step = -0.1
self.penalty_box_off_target = -1
self.reward_box_on_target = 1
self.reward_finished = 10
self.reward_last = 0
# Other Settings
self.viewer = None
self.max_steps = max_steps
self.action_space = Discrete(len(ACTION_LOOKUP))
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
if reset:
# Initialize Room
_ = self.reset()
示例9: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self,
dim_room=(10, 10),
max_steps=120,
num_boxes=3,
num_gen_steps=None):
super(TwoPlayerSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps, reset=False)
screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
self.boxes_are_on_target = [False] * num_boxes
self.action_space = Discrete(len(ACTION_LOOKUP))
self.player_position = []
self.player_positions = {0: [0,0], 1: [1,1]}
_ = self.reset(second_player=True)
示例10: action_space
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def action_space(self):
"""See class definition."""
if self.discrete:
return Discrete(2 ** self.num_traffic_lights)
else:
return Box(
low=-1,
high=1,
shape=(self.num_traffic_lights,),
dtype=np.float32)
示例11: action_space
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def action_space(self):
"""See class definition."""
if self.discrete:
return Discrete(2)
else:
return Box(
low=-1,
high=1,
shape=(1,),
dtype=np.float32)
示例12: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self, size, sleep=0, dict_state=False):
self.size = size
self.sleep = sleep
self.dict_state = dict_state
self.action_space = Discrete(2)
self.reset()
示例13: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self, map_id: int = 0):
"""
Parameters
----------
map_id: int
Which version of the game to play. Legal values are 0 and 1. Default is 0.
Corresponds to the hard-coded maps MAP0 and MAP1 defined in the same module.
"""
if map_id == 0:
used_map = MAP0
elif map_id == 1:
used_map = MAP1
# if adding more values here, also adapt docstring above.
else:
raise NotImplementedError()
# get arrays of walls and reward fields:
self.map_walls, self.map_rewards = get_maps(used_map)
self.rews_where = np.where(self.map_rewards > 0)
# get list of coordinates for reward fields:
self.rew_coords = get_rew_coords(self.map_rewards)
# get number of rewards in map_id:
self.num_rewards = len(self.rews_where[0])
# calculate all possible paths, using the 'pathfinding' library.
self.paths = {}
matrix = np.swapaxes(np.abs(self.map_walls - 1.0), 0, 1).tolist()
finder = AStarFinder(diagonal_movement=DiagonalMovement.never)
for i in range(self.num_rewards):
self.paths[i] = {}
for j in range(self.num_rewards):
grid = Grid(matrix=matrix)
start = grid.node(self.rews_where[0][i], self.rews_where[1][i])
end = grid.node(self.rews_where[0][j], self.rews_where[1][j])
path, _ = finder.find_path(start, end, grid)
path = [[int(x[0]), int(x[1])] for x in path]
self.paths[i][j] = path
super(MazeWorld, self).__init__()
# set observation space and action space:
self.observation_space = MultiBinary(self.num_rewards * 2)
self.action_space = Discrete(self.num_rewards)
self.current_state = None
self.terminated = True
示例14: __init__
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def __init__(self, name):
"""
Init function.
:param name: gym task name
"""
self.reset_task()
self.__envir = gym.make(name) # gym environment
self.__envir_name = name # environment name
self.__obser_size = self.__envir.observation_space.shape[0] # the number of parameters in observation
self.__obser_up_bound = [] # the upper bound of parameters in observation
self.__obser_low_bound = [] # the lower bound of parameters in observation
self.total_step = 0 # total s
self.__action_size = None # the number of parameters in action
self.__action_sca = [] # environment action space, specified by gym
self.__action_type = [] # the type of action, false means discrete
self.__action_low_bound = [] # action lower bound
self.__action_up_bound = [] # action upper bound
# policy model, it's a neural network in this example
self.__policy_model = None
self.__max_step = 0 # maximum stop step
self.__stop_step = 0 # the stop step in recent trajectory
for i in range(self.__obser_size):
self.__obser_low_bound.append(
self.__envir.observation_space.high[i])
self.__obser_up_bound.append(self.__envir.observation_space.low[i])
# if the dimension of action space is one
if isinstance(self.__envir.action_space, Discrete):
self.__action_size = 1
self.__action_sca = []
self.__action_type = []
self.__action_sca.append(self.__envir.action_space.n)
self.__action_type.append(False)
# if action object is Box
else:
self.__action_size = self.__envir.action_space.shape[0]
self.__action_type = []
self.__action_low_bound = []
self.__action_up_bound = []
for i in range(self.__action_size):
self.__action_type.append(True)
self.__action_low_bound.append(
self.__envir.action_space.low[i])
self.__action_up_bound.append(
self.__envir.action_space.high[i])
示例15: test_vecenv
# 需要導入模塊: from gym.spaces import discrete [as 別名]
# 或者: from gym.spaces.discrete import Discrete [as 別名]
def test_vecenv(size=10, num=8, sleep=0.001):
verbose = __name__ == '__main__'
env_fns = [
lambda i=i: MyTestEnv(size=i, sleep=sleep)
for i in range(size, size + num)
]
venv = [
VectorEnv(env_fns),
SubprocVectorEnv(env_fns),
]
if verbose:
venv.append(RayVectorEnv(env_fns))
for v in venv:
v.seed()
action_list = [1] * 5 + [0] * 10 + [1] * 20
if not verbose:
o = [v.reset() for v in venv]
for i, a in enumerate(action_list):
o = []
for v in venv:
A, B, C, D = v.step([a] * num)
if sum(C):
A = v.reset(np.where(C)[0])
o.append([A, B, C, D])
for i in zip(*o):
for j in range(1, len(i) - 1):
assert (i[0] == i[j]).all()
else:
t = [0, 0, 0]
for i, e in enumerate(venv):
t[i] = time.time()
e.reset()
for a in action_list:
done = e.step([a] * num)[2]
if sum(done) > 0:
e.reset(np.where(done)[0])
t[i] = time.time() - t[i]
print(f'VectorEnv: {t[0]:.6f}s')
print(f'SubprocVectorEnv: {t[1]:.6f}s')
print(f'RayVectorEnv: {t[2]:.6f}s')
for v in venv:
assert v.size == list(range(size, size + num))
assert v.env_num == num
assert v.action_space == [Discrete(2)] * num
for v in venv:
v.close()