本文整理汇总了Python中gym.envs.toy_text.discrete.DiscreteEnv方法的典型用法代码示例。如果您正苦于以下问题:Python discrete.DiscreteEnv方法的具体用法?Python discrete.DiscreteEnv怎么用?Python discrete.DiscreteEnv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.envs.toy_text.discrete
的用法示例。
在下文中一共展示了discrete.DiscreteEnv方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: domain_to_index
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def domain_to_index(
self, domain_batch: Union[State, StateAction, StateActionState]
) -> np.ndarray:
"""Convert a domain batch into an numpy ndarray of reward table indices.
Parameters
----------
domain_batch: Union[State, StateAction, StateActionState]
A domain batch. Can also be the entire domain.
Returns
-------
np.ndarray
A numpy array of corresponding reward table indices.
"""
assert utils.wrapper.is_unwrappable_to(self.env, DiscreteEnv)
index = copy(domain_batch.state)
if self.action_in_domain:
index *= self.env.action_space.n
index += domain_batch.action
if self.next_state_in_domain:
index *= self.env.observation_space.n
index += domain_batch.next_state
return index
示例2: test_is_unwrappable_to
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def test_is_unwrappable_to():
assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper)
assert is_unwrappable_to(
feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper)
env = feature_wrapper.make('FrozenLake-v0')
reward_function = FeatureBasedRewardFunction(env, 'random')
env = RewardWrapper(env, reward_function)
assert is_unwrappable_to(env, RewardWrapper)
assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
assert is_unwrappable_to(env, DiscreteEnv)
assert is_unwrappable_to(env, gym.Env)
示例3: __init__
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def __init__(self, env):
assert is_unwrappable_to(env, DiscreteEnv)
super(DiscreteEnvModelWrapper, self).__init__(env)
示例4: get_transition_array
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def get_transition_array(self):
env = unwrap_env(self.env, DiscreteEnv)
# adding +1 to account for absorbing state
# (reached whenever game ended)
n_states = env.observation_space.n + 1
n_actions = env.action_space.n
transitions = np.zeros([n_states, n_actions, n_states])
# iterate over all "from" states:
for state, transitions_given_state in env.P.items():
# iterate over all actions:
for action, outcomes in transitions_given_state.items():
# iterate over all possible outcomes:
for probability, next_state, _, done in outcomes:
# add transition probability T(s, a, s')
transitions[state, action, next_state] += probability
if done:
# outcome was marked as ending the game.
# if game is done and state == next_state, map to absorbing state instead
if state == next_state:
transitions[state, action, next_state] = 0
# map next state to absorbing state
# make sure that next state wasn't mapped to any other state yet
assert np.sum(transitions[next_state, :, :-1]) == 0
transitions[next_state, :, -1] = 1.0
# specify transition probabilities for absorbing state:
# returning to itself for all actions.
transitions[-1, :, -1] = 1.0
return transitions
示例5: get_reward_array
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def get_reward_array(self):
env = unwrap_env(self.env, DiscreteEnv)
# adding +1 to account for absorbing state
# (reached whenever game ended)
n_states = env.observation_space.n + 1
n_actions = env.action_space.n
if is_unwrappable_to(self.env, RewardWrapper):
# get the reward function:
reward_wrapper = unwrap_env(self.env, RewardWrapper)
reward_function = reward_wrapper.reward_function
else:
reward_function = None
rewards = np.zeros([n_states, n_actions])
# iterate over all "from" states:
for state, transitions_given_state in env.P.items():
# iterate over all actions:
for action, outcomes in transitions_given_state.items():
# iterate over all possible outcomes:
for probability, next_state, reward, done in outcomes:
if reward_function is not None:
if done and state == next_state:
# don't output reward for reaching state if game is over
# and already in that state.
reward = 0
else:
rew_input = reward_wrapper.get_reward_input_for(
state, action, next_state)
reward = reward_function.reward(rew_input)
rewards[state, action] += reward * probability
# reward of absorbing state is zero:
rewards[-1, :] = 0.0
return rewards
示例6: domain
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def domain(self) -> Union[State, StateAction, StateActionState]:
"""Return the entire domain of the reward function.
Returns
-------
Union[State, StateAction, StateActionState]
The domain of the reward function.
"""
if utils.wrapper.is_unwrappable_to(self.env, DiscreteEnv):
n_states = self.env.observation_space.n
n_actions = self.env.action_space.n
states = np.arange(n_states)
elif utils.wrapper.is_unwrappable_to(self.env, MazeWorld):
maze_env = utils.wrapper.unwrap_env(self.env, MazeWorld)
num_rewards = maze_env.num_rewards
n_states = num_rewards * 2**num_rewards
n_actions = num_rewards
states = np.array(
[maze_env.index_to_state(i) for i in range(n_states)])
else:
raise NotImplementedError()
if self.action_in_domain:
# if domain contains actions: extend domain
states = np.repeat(states, n_actions, axis=0)
actions = np.arange(n_actions)
actions = np.tile(actions, n_states)
if self.next_state_in_domain:
# if domain contains next states: extend domain
states = np.repeat(states, n_states)
actions = np.repeat(actions, n_states)
next_states = np.arange(n_states)
next_states = np.tile(next_states, n_states * n_actions)
# return the adequate namedtuple:
return StateActionState(states, actions, next_states)
return StateAction(states, actions)
return State(states)
示例7: envs_known_transitions
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def envs_known_transitions():
"""Return all environment ids for which transition dynamics are known."""
result = []
for env_id in ENV_IDS:
if utils.wrapper.is_unwrappable_to(make_env(env_id), DiscreteEnv):
result.append(env_id)
return set(result)
示例8: make_wrapped_env
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def make_wrapped_env(env_id: str,
with_feature_wrapper: bool = False,
reward_function_factory: Callable = None,
with_model_wrapper: bool = False):
"""Make an environment, potentially wrapped in FeatureWrapper, RewardWrapper,
and BaseWorldModelWrapper.
Parameters
----------
env_id: str
The environment's id, e.g. 'FrozenLake-v0'.
with_feature_wrapper: bool
Whether to use a feature wrapper.
reward_function_factory: Callable
A function which returns a new reward function when called. If this is
provided, the environment will be wrapped in a RewardWrapper using
the returned reward function.
with_model_wrapper: bool
Whether to use a BaseWorldModelWrapper.
Returns
-------
gym.Env
A gym environment, potentially wrapped.
"""
assert env_id in ENV_IDS
if with_feature_wrapper:
assert env_id in feature_wrapper.feature_wrappable_envs()
env = feature_wrapper.make(env_id)
else:
env = make_env(env_id)
if reward_function_factory is not None:
reward_function = reward_function_factory(env)
assert isinstance(reward_function, BaseRewardFunction)
env = RewardWrapper(env, reward_function)
if with_model_wrapper:
if utils.wrapper.is_unwrappable_to(env, DiscreteEnv):
env = DiscreteEnvModelWrapper(env)
elif utils.wrapper.is_unwrappable_to(env, MazeWorld):
env = MazeModelWrapper(env)
else:
raise NotImplementedError()
return env
示例9: __init__
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def __init__(self):
self.desc = np.asarray(MAP,dtype='c')
self.locs = locs = [(0,0), (0,4), (4,0), (4,3)]
nS = 500
nR = 5
nC = 5
maxR = nR-1
maxC = nC-1
isd = np.zeros(nS)
nA = 6
P = {s : {a : [] for a in range(nA)} for s in range(nS)}
for row in range(5):
for col in range(5):
for passidx in range(5):
for destidx in range(4):
state = self.encode(row, col, passidx, destidx)
if passidx < 4 and passidx != destidx:
isd[state] += 1
for a in range(nA):
# defaults
newrow, newcol, newpassidx = row, col, passidx
reward = -1
done = False
taxiloc = (row, col)
if a==0:
newrow = min(row+1, maxR)
elif a==1:
newrow = max(row-1, 0)
if a==2 and self.desc[1+row,2*col+2]==b":":
newcol = min(col+1, maxC)
elif a==3 and self.desc[1+row,2*col]==b":":
newcol = max(col-1, 0)
elif a==4: # pickup
if (passidx < 4 and taxiloc == locs[passidx]):
newpassidx = 4
else:
reward = -10
elif a==5: # dropoff
if (taxiloc == locs[destidx]) and passidx==4:
done = True
reward = 20
elif (taxiloc in locs) and passidx==4:
newpassidx = locs.index(taxiloc)
else:
reward = -10
newstate = self.encode(newrow, newcol, newpassidx, destidx)
P[state][a].append((1.0, newstate, reward, done))
isd /= isd.sum()
discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)
示例10: __init__
# 需要导入模块: from gym.envs.toy_text import discrete [as 别名]
# 或者: from gym.envs.toy_text.discrete import DiscreteEnv [as 别名]
def __init__(self):
self.desc = np.asarray(MAP, dtype='c')
self.locs = locs = [(0,0), (0,4), (4,0), (4,3)]
num_states = 500
num_rows = 5
num_columns = 5
max_row = num_rows - 1
max_col = num_columns - 1
initial_state_distrib = np.zeros(num_states)
num_actions = 6
P = {state: {action: []
for action in range(num_actions)} for state in range(num_states)}
for row in range(num_rows):
for col in range(num_columns):
for pass_idx in range(len(locs) + 1): # +1 for being inside taxi
for dest_idx in range(len(locs)):
state = self.encode(row, col, pass_idx, dest_idx)
if pass_idx < 4 and pass_idx != dest_idx:
initial_state_distrib[state] += 1
for action in range(num_actions):
# defaults
new_row, new_col, new_pass_idx = row, col, pass_idx
reward = -1 # default reward when there is no pickup/dropoff
done = False
taxi_loc = (row, col)
if action == 0:
new_row = min(row + 1, max_row)
elif action == 1:
new_row = max(row - 1, 0)
if action == 2 and self.desc[1 + row, 2 * col + 2] == b":":
new_col = min(col + 1, max_col)
elif action == 3 and self.desc[1 + row, 2 * col] == b":":
new_col = max(col - 1, 0)
elif action == 4: # pickup
if (pass_idx < 4 and taxi_loc == locs[pass_idx]):
new_pass_idx = 4
else: # passenger not at location
reward = -10
elif action == 5: # dropoff
if (taxi_loc == locs[dest_idx]) and pass_idx == 4:
new_pass_idx = dest_idx
done = True
reward = 20
elif (taxi_loc in locs) and pass_idx == 4:
new_pass_idx = locs.index(taxi_loc)
else: # dropoff at wrong location
reward = -10
new_state = self.encode(
new_row, new_col, new_pass_idx, dest_idx)
P[state][action].append(
(1.0, new_state, reward, done))
initial_state_distrib /= initial_state_distrib.sum()
discrete.DiscreteEnv.__init__(
self, num_states, num_actions, P, initial_state_distrib)