当前位置: 首页>>代码示例>>Python>>正文


Python discrete.Discrete方法代码示例

本文整理汇总了Python中gym.spaces.discrete.Discrete方法的典型用法代码示例。如果您正苦于以下问题:Python discrete.Discrete方法的具体用法?Python discrete.Discrete怎么用?Python discrete.Discrete使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gym.spaces.discrete的用法示例。


在下文中一共展示了discrete.Discrete方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_space_size

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def get_space_size(space):
    if isinstance(space, Box):
        return space.shape
    elif isinstance(space, Discrete):
        return [1, ]  # space.n
    else:
        raise NotImplementedError("Assuming to use Box or Discrete, not {}".format(type(space))) 
开发者ID:keiohta,项目名称:tf2rl,代码行数:9,代码来源:get_replay_buffer.py

示例2: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
                 env: gym.Env,
                 parameters: Union[None, str, np.ndarray] = None,
                 action_in_domain: bool = False,
                 next_state_in_domain: bool = False):
        """

        Parameters
        ----------
        env: gym.Env
            A gym environment for which the reward function is defined.
        parameters: Union[None, str, np.ndarray]
            A numpy ndarray containing the values for all elements in the reward table.
            If value is 'random', initializes with random parameters (mean 0, standard deviation 1).
            The size of parameters must correspond to the size of the domain
            (one table value for each possible input)
        action_in_domain: bool
            Indicates whether actions are in the domain, i.e. R(s, a) or R(s, a, s')
        next_state_in_domain: bool
            Indicates whether next states are in the domain, i.e. R(s, a, s')
        """
        super(TabularRewardFunction, self).__init__(
            env, parameters, action_in_domain, next_state_in_domain)

        # this reward function is only implemented for
        # discrete state and action spaces
        assert isinstance(env.observation_space, DiscreteSpace)
        assert isinstance(env.action_space, DiscreteSpace)
        # calculate number of elements in domain:
        self.domain_size = self.env.observation_space.n
        if self.action_in_domain:
            self.domain_size *= self.env.action_space.n
        if self.next_state_in_domain:
            self.domain_size *= self.env.observation_space.n

        if parameters == 'random':
            self.parameters = np.random.standard_normal(size=self.domain_size)
        else:
            self.parameters = np.array(parameters)
        assert len(self.parameters) == self.domain_size 
开发者ID:JohannesHeidecke,项目名称:irl-benchmark,代码行数:42,代码来源:reward_function.py

示例3: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
                 name,
                 num_envs=1,
                 single_process=True,
                 log_dir=None,
                 episode_life=True,
                 seed=None):
        if seed is None:
            seed = np.random.randint(int(1e9))
        if log_dir is not None:
            mkdir(log_dir)
        envs = [make_env(name, seed, i, episode_life) for i in range(num_envs)]
        if single_process:
            Wrapper = DummyVecEnv
        else:
            Wrapper = SubprocVecEnv
        self.env = Wrapper(envs)
        self.name = name
        self.observation_space = self.env.observation_space
        self.state_dim = int(np.prod(self.env.observation_space.shape))

        self.action_space = self.env.action_space
        if isinstance(self.action_space, Discrete):
            self.action_dim = self.action_space.n
        elif isinstance(self.action_space, Box):
            self.action_dim = self.action_space.shape[0]
        else:
            assert 'unknown action space' 
开发者ID:ShangtongZhang,项目名称:DeepRL,代码行数:30,代码来源:envs.py

示例4: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, n, goal_length, num_distractor, distractor_length, max_steps=10**6, collect_key=True, world=None):
        self.goal_length = goal_length
        self.num_distractor = num_distractor
        self.distractor_length = distractor_length
        self.n = n
        self.num_pairs = goal_length - 1 + distractor_length * num_distractor
        self.collect_key = collect_key  # if True, keys are collected immediately when available

        # Penalties and Rewards
        self.step_cost = 0
        self.reward_gem = 10
        self.reward_key = 1
        self.reward_distractor = -1

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        self.observation_space = Box(low=0, high=255, shape=(n+2, n+2, 3), dtype=np.uint8)

        # Game initialization
        self.owned_key = [220, 220, 220]

        self.np_random_seed = None
        self.reset(world)

        self.num_env_steps = 0
        self.episode_reward = 0

        self.last_frames = deque(maxlen=3) 
开发者ID:nathangrinsztajn,项目名称:Box-World,代码行数:32,代码来源:box_world_env.py

示例5: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, min_value, max_value, is_discrete):
        self.__min_value = min_value
        self.__max_value = max_value

        if self.is_scaler(min_value) or self.is_scaler(max_value):
            self.__shape = list([1])
        elif isinstance(min_value, (list, np.ndarray, tuple)) and isinstance(max_value, (list, np.ndarray, tuple)):
            shape = np.asarray(min_value).shape
            shape2 = np.asarray(max_value).shape
            if shape != shape2:
                raise ValueError("Shape of min_value and max_value are mismatched !")

            if shape[0] == 0:
                raise ValueError("No value error")

            self.__shape = list(shape)

            if len(min_value) == 1 and shape[0] == 1:
                self.__min_value = min_value[0]
                self.__max_value = max_value[0]
                self.__shape = list([1])
        else:
            raise ValueError("Unsupported type format !")

        self.__discrete = is_discrete

        if is_discrete and (isinstance(min_value, float) or isinstance(max_value, float)):
            raise ValueError('Discrete data cannot be a real number !') 
开发者ID:garlicdevs,项目名称:Fruit-API,代码行数:30,代码来源:priv.py

示例6: convert_openai_space

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def convert_openai_space(space):
        from gym.spaces.box import Box
        from gym.spaces.discrete import Discrete
        if isinstance(space, Box):
            return Space(space.low, space.high, False)
        elif isinstance(space, Discrete):
            return Space(0, space.n-1, True)
        else:
            raise ValueError("Does not support other types than Box and Discrete") 
开发者ID:garlicdevs,项目名称:Fruit-API,代码行数:11,代码来源:priv.py

示例7: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
             dim_room=(10, 10),
             max_steps=120,
             num_boxes=3,
             num_gen_steps=None):

        super(PushAndPullSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps)
        screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
        self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        self.boxes_are_on_target = [False] * num_boxes
        self.action_space = Discrete(len(ACTION_LOOKUP))
        
        _ = self.reset() 
开发者ID:mpSchrader,项目名称:gym-sokoban,代码行数:15,代码来源:sokoban_env_pull.py

示例8: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
                 dim_room=(10, 10),
                 max_steps=120,
                 num_boxes=4,
                 num_gen_steps=None,
                 reset=True):

        # General Configuration
        self.dim_room = dim_room
        if num_gen_steps == None:
            self.num_gen_steps = int(1.7 * (dim_room[0] + dim_room[1]))
        else:
            self.num_gen_steps = num_gen_steps

        self.num_boxes = num_boxes
        self.boxes_on_target = 0

        # Penalties and Rewards
        self.penalty_for_step = -0.1
        self.penalty_box_off_target = -1
        self.reward_box_on_target = 1
        self.reward_finished = 10
        self.reward_last = 0

        # Other Settings
        self.viewer = None
        self.max_steps = max_steps
        self.action_space = Discrete(len(ACTION_LOOKUP))
        screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
        self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
        
        if reset:
            # Initialize Room
            _ = self.reset() 
开发者ID:mpSchrader,项目名称:gym-sokoban,代码行数:36,代码来源:sokoban_env.py

示例9: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self,
             dim_room=(10, 10),
             max_steps=120,
             num_boxes=3,
             num_gen_steps=None):
        
        super(TwoPlayerSokobanEnv, self).__init__(dim_room, max_steps, num_boxes, num_gen_steps, reset=False)
        screen_height, screen_width = (dim_room[0] * 16, dim_room[1] * 16)
        self.observation_space = Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        self.boxes_are_on_target = [False] * num_boxes
        self.action_space = Discrete(len(ACTION_LOOKUP))
        self.player_position = []
        self.player_positions = {0: [0,0], 1: [1,1]}

        _ = self.reset(second_player=True) 
开发者ID:mpSchrader,项目名称:gym-sokoban,代码行数:17,代码来源:sokoban_env_two_player.py

示例10: action_space

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def action_space(self):
        """See class definition."""
        if self.discrete:
            return Discrete(2 ** self.num_traffic_lights)
        else:
            return Box(
                low=-1,
                high=1,
                shape=(self.num_traffic_lights,),
                dtype=np.float32) 
开发者ID:flow-project,项目名称:flow,代码行数:12,代码来源:traffic_light_grid.py

示例11: action_space

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def action_space(self):
        """See class definition."""
        if self.discrete:
            return Discrete(2)
        else:
            return Box(
                low=-1,
                high=1,
                shape=(1,),
                dtype=np.float32) 
开发者ID:flow-project,项目名称:flow,代码行数:12,代码来源:traffic_light_grid.py

示例12: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, size, sleep=0, dict_state=False):
        self.size = size
        self.sleep = sleep
        self.dict_state = dict_state
        self.action_space = Discrete(2)
        self.reset() 
开发者ID:thu-ml,项目名称:tianshou,代码行数:8,代码来源:env.py

示例13: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, map_id: int = 0):
        """

        Parameters
        ----------
        map_id: int
            Which version of the game to play. Legal values are 0 and 1. Default is 0.
            Corresponds to the hard-coded maps MAP0 and MAP1 defined in the same module.
        """
        if map_id == 0:
            used_map = MAP0
        elif map_id == 1:
            used_map = MAP1
        # if adding more values here, also adapt docstring above.
        else:
            raise NotImplementedError()

        # get arrays of walls and reward fields:
        self.map_walls, self.map_rewards = get_maps(used_map)
        self.rews_where = np.where(self.map_rewards > 0)
        # get list of coordinates for reward fields:
        self.rew_coords = get_rew_coords(self.map_rewards)
        # get number of rewards in map_id:
        self.num_rewards = len(self.rews_where[0])

        # calculate all possible paths, using the 'pathfinding' library.
        self.paths = {}
        matrix = np.swapaxes(np.abs(self.map_walls - 1.0), 0, 1).tolist()
        finder = AStarFinder(diagonal_movement=DiagonalMovement.never)
        for i in range(self.num_rewards):
            self.paths[i] = {}
            for j in range(self.num_rewards):
                grid = Grid(matrix=matrix)
                start = grid.node(self.rews_where[0][i], self.rews_where[1][i])
                end = grid.node(self.rews_where[0][j], self.rews_where[1][j])
                path, _ = finder.find_path(start, end, grid)
                path = [[int(x[0]), int(x[1])] for x in path]
                self.paths[i][j] = path

        super(MazeWorld, self).__init__()

        # set observation space and action space:
        self.observation_space = MultiBinary(self.num_rewards * 2)
        self.action_space = Discrete(self.num_rewards)

        self.current_state = None
        self.terminated = True 
开发者ID:JohannesHeidecke,项目名称:irl-benchmark,代码行数:49,代码来源:maze_world.py

示例14: __init__

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def __init__(self, name):
        """
        Init function.

        :param name: gym task name
        """
        self.reset_task()
        self.__envir = gym.make(name)  # gym environment
        self.__envir_name = name  # environment name
        self.__obser_size = self.__envir.observation_space.shape[0]  # the number of parameters in observation
        self.__obser_up_bound = []  # the upper bound of parameters in observation
        self.__obser_low_bound = []  # the lower bound of parameters in observation
        self.total_step = 0  # total s
        self.__action_size = None  # the number of parameters in action
        self.__action_sca = []  # environment action space, specified by gym
        self.__action_type = []  # the type of action, false means discrete
        self.__action_low_bound = []  # action lower bound
        self.__action_up_bound = []  # action upper bound
        # policy model, it's a neural network in this example
        self.__policy_model = None
        self.__max_step = 0  # maximum stop step
        self.__stop_step = 0  # the stop step in recent trajectory

        for i in range(self.__obser_size):
            self.__obser_low_bound.append(
                self.__envir.observation_space.high[i])
            self.__obser_up_bound.append(self.__envir.observation_space.low[i])

        # if the dimension of action space is one
        if isinstance(self.__envir.action_space, Discrete):
            self.__action_size = 1
            self.__action_sca = []
            self.__action_type = []
            self.__action_sca.append(self.__envir.action_space.n)
            self.__action_type.append(False)
        # if action object is Box
        else:
            self.__action_size = self.__envir.action_space.shape[0]
            self.__action_type = []
            self.__action_low_bound = []
            self.__action_up_bound = []
            for i in range(self.__action_size):
                self.__action_type.append(True)
                self.__action_low_bound.append(
                    self.__envir.action_space.low[i])
                self.__action_up_bound.append(
                    self.__envir.action_space.high[i]) 
开发者ID:eyounx,项目名称:ZOOpt,代码行数:49,代码来源:gym_task.py

示例15: test_vecenv

# 需要导入模块: from gym.spaces import discrete [as 别名]
# 或者: from gym.spaces.discrete import Discrete [as 别名]
def test_vecenv(size=10, num=8, sleep=0.001):
    verbose = __name__ == '__main__'
    env_fns = [
        lambda i=i: MyTestEnv(size=i, sleep=sleep)
        for i in range(size, size + num)
    ]
    venv = [
        VectorEnv(env_fns),
        SubprocVectorEnv(env_fns),
    ]
    if verbose:
        venv.append(RayVectorEnv(env_fns))
    for v in venv:
        v.seed()
    action_list = [1] * 5 + [0] * 10 + [1] * 20
    if not verbose:
        o = [v.reset() for v in venv]
        for i, a in enumerate(action_list):
            o = []
            for v in venv:
                A, B, C, D = v.step([a] * num)
                if sum(C):
                    A = v.reset(np.where(C)[0])
                o.append([A, B, C, D])
            for i in zip(*o):
                for j in range(1, len(i) - 1):
                    assert (i[0] == i[j]).all()
    else:
        t = [0, 0, 0]
        for i, e in enumerate(venv):
            t[i] = time.time()
            e.reset()
            for a in action_list:
                done = e.step([a] * num)[2]
                if sum(done) > 0:
                    e.reset(np.where(done)[0])
            t[i] = time.time() - t[i]
        print(f'VectorEnv: {t[0]:.6f}s')
        print(f'SubprocVectorEnv: {t[1]:.6f}s')
        print(f'RayVectorEnv: {t[2]:.6f}s')
    for v in venv:
        assert v.size == list(range(size, size + num))
        assert v.env_num == num
        assert v.action_space == [Discrete(2)] * num

    for v in venv:
        v.close() 
开发者ID:thu-ml,项目名称:tianshou,代码行数:49,代码来源:test_env.py


注:本文中的gym.spaces.discrete.Discrete方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。