本文整理汇总了Python中dm_env.specs.Array方法的典型用法代码示例。如果您正苦于以下问题:Python specs.Array方法的具体用法?Python specs.Array怎么用?Python specs.Array使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dm_env.specs
的用法示例。
在下文中一共展示了specs.Array方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _spec_to_box
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def _spec_to_box(spec):
def extract_min_max(s):
assert s.dtype == np.float64 or s.dtype == np.float32
dim = np.int(np.prod(s.shape))
if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=np.float32)
return -bound, bound
elif type(s) == specs.BoundedArray:
zeros = np.zeros(dim, dtype=np.float32)
return s.minimum + zeros, s.maximum + zeros
mins, maxs = [], []
for s in spec:
mn, mx = extract_min_max(s)
mins.append(mn)
maxs.append(mx)
low = np.concatenate(mins, axis=0)
high = np.concatenate(maxs, axis=0)
assert low.shape == high.shape
return spaces.Box(low, high, dtype=np.float32)
示例2: _convert_spec_to_space
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def _convert_spec_to_space(spec):
if isinstance(spec, dict):
return spaces.Dict(
{k: _convert_spec_to_space(v)
for k, v in spec.items()})
if isinstance(spec, specs.DiscreteArray):
return spaces.Discrete(spec.num_values)
elif isinstance(spec, specs.BoundedArray):
return spaces.Box(
low=np.asscalar(spec.minimum),
high=np.asscalar(spec.maximum),
shape=spec.shape,
dtype=spec.dtype)
elif isinstance(spec, specs.Array):
return spaces.Box(
low=-float("inf"),
high=float("inf"),
shape=spec.shape,
dtype=spec.dtype)
raise NotImplementedError(
("Could not convert `Array` spec of type {} to Gym space. "
"Attempted to convert: {}").format(type(spec), spec))
示例3: default_agent
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(
obs_spec: specs.Array,
action_spec: specs.DiscreteArray,
num_ensemble: int = 20,
) -> BootstrappedDqn:
"""Initialize a Bootstrapped DQN agent with default parameters."""
ensemble = make_ensemble(
num_actions=action_spec.num_values, num_ensemble=num_ensemble)
optimizer = snt.optimizers.Adam(learning_rate=1e-3)
return BootstrappedDqn(
obs_spec=obs_spec,
action_spec=action_spec,
ensemble=ensemble,
batch_size=128,
discount=.99,
replay_capacity=10000,
min_replay_size=128,
sgd_period=1,
target_update_period=4,
optimizer=optimizer,
mask_prob=0.5,
noise_scale=0.0,
epsilon_fn=lambda t: 10 / (10 + t),
seed=42,
)
示例4: default_agent
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
action_spec: specs.DiscreteArray):
"""Initialize a DQN agent with default parameters."""
del obs_spec # Unused.
network = snt.Sequential([
snt.Flatten(),
snt.nets.MLP([50, 50, action_spec.num_values]),
])
optimizer = snt.optimizers.Adam(learning_rate=1e-3)
return DQN(
action_spec=action_spec,
network=network,
batch_size=32,
discount=0.99,
replay_capacity=10000,
min_replay_size=100,
sgd_period=1,
target_update_period=4,
optimizer=optimizer,
epsilon=0.05,
seed=42)
示例5: __init__
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(
self,
obs_spec: specs.Array,
action_spec: specs.Array,
network: 'PolicyValueNet',
optimizer: snt.Optimizer,
max_sequence_length: int,
td_lambda: float,
discount: float,
seed: int,
):
"""A simple actor-critic agent."""
# Internalise hyperparameters.
tf.random.set_seed(seed)
self._td_lambda = td_lambda
self._discount = discount
# Internalise network and optimizer.
self._network = network
self._optimizer = optimizer
# Create windowed buffer for learning from trajectories.
self._buffer = sequence.Buffer(obs_spec, action_spec, max_sequence_length)
示例6: default_agent
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
action_spec: specs.DiscreteArray) -> base.Agent:
"""Initialize a DQN agent with default parameters."""
network = PolicyValueRNN(
hidden_sizes=[64, 64],
num_actions=action_spec.num_values,
)
return ActorCriticRNN(
obs_spec=obs_spec,
action_spec=action_spec,
network=network,
optimizer=snt.optimizers.Adam(learning_rate=3e-3),
max_sequence_length=32,
td_lambda=0.9,
discount=0.99,
seed=42,
)
示例7: __init__
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(
self,
obs_spec: specs.Array,
action_spec: specs.Array,
max_sequence_length: int,
):
"""Pre-allocates buffers of numpy arrays to hold the sequences."""
self._observations = np.zeros(
shape=(max_sequence_length + 1, *obs_spec.shape), dtype=obs_spec.dtype)
self._actions = np.zeros(
shape=(max_sequence_length, *action_spec.shape),
dtype=action_spec.dtype)
self._rewards = np.zeros(max_sequence_length, dtype=np.float32)
self._discounts = np.zeros(max_sequence_length, dtype=np.float32)
self._max_sequence_length = max_sequence_length
示例8: default_agent
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
action_spec: specs.DiscreteArray,
seed: int = 0) -> base.Agent:
"""Initialize a DQN agent with default parameters."""
def network(inputs: jnp.ndarray) -> jnp.ndarray:
flat_inputs = hk.Flatten()(inputs)
mlp = hk.nets.MLP([64, 64, action_spec.num_values])
action_values = mlp(flat_inputs)
return action_values
return DQN(
obs_spec=obs_spec,
action_spec=action_spec,
network=network,
optimizer=optix.adam(1e-3),
batch_size=32,
discount=0.99,
replay_capacity=10000,
min_replay_size=100,
sgd_period=1,
target_update_period=4,
epsilon=0.05,
rng=hk.PRNGSequence(seed),
)
示例9: default_agent
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
action_spec: specs.DiscreteArray,
seed: int = 0) -> base.Agent:
"""Creates an actor-critic agent with default hyperparameters."""
def network(inputs: jnp.ndarray) -> Tuple[Logits, Value]:
flat_inputs = hk.Flatten()(inputs)
torso = hk.nets.MLP([64, 64])
policy_head = hk.Linear(action_spec.num_values)
value_head = hk.Linear(1)
embedding = torso(flat_inputs)
logits = policy_head(embedding)
value = value_head(embedding)
return logits, jnp.squeeze(value, axis=-1)
return ActorCritic(
obs_spec=obs_spec,
action_spec=action_spec,
network=network,
optimizer=optix.adam(3e-3),
rng=hk.PRNGSequence(seed),
sequence_length=32,
discount=0.99,
td_lambda=0.9,
)
示例10: __init__
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(self, factors=sprite_lib.FACTOR_NAMES):
"""Constructor.
Outputs a list of dicts: [{object 1 factors} {object 2 factors} ...]
Args:
factors: Iterable of strings. Factors to record. Must be a subset of
sprite.FACTOR_NAMES.
"""
if not set(factors).issubset(set(sprite_lib.FACTOR_NAMES)):
raise ValueError('Factors have to belong to {}.'.format(
sprite_lib.FACTOR_NAMES))
self._num_sprites = None
self._factors = factors
self._per_object_spec = {
factor: specs.Array(shape=(), dtype=np.float32) for factor in factors
}
示例11: convert_dm_control_to_gym_space
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def convert_dm_control_to_gym_space(dm_control_space):
r"""Convert dm_control space to gym space. """
if isinstance(dm_control_space, specs.BoundedArray):
space = spaces.Box(low=dm_control_space.minimum,
high=dm_control_space.maximum,
dtype=dm_control_space.dtype)
assert space.shape == dm_control_space.shape
return space
elif isinstance(dm_control_space, specs.Array) and not isinstance(dm_control_space, specs.BoundedArray):
space = spaces.Box(low=-float('inf'),
high=float('inf'),
shape=dm_control_space.shape,
dtype=dm_control_space.dtype)
return space
elif isinstance(dm_control_space, dict):
space = spaces.Dict({key: convert_dm_control_to_gym_space(value)
for key, value in dm_control_space.items()})
return space
示例12: reward_spec
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def reward_spec(self):
"""Describes the reward returned by the environment.
By default this is assumed to be a single float.
Returns:
An `Array` spec, or a nested dict, list or tuple of `Array` specs.
"""
return specs.Array(shape=(), dtype=float, name='reward')
示例13: discount_spec
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def discount_spec(self):
"""Describes the discount returned by the environment.
By default this is assumed to be a single float between 0 and 1.
Returns:
An `Array` spec, or a nested dict, list or tuple of `Array` specs.
"""
return specs.BoundedArray(
shape=(), dtype=float, minimum=0., maximum=1., name='discount')
示例14: observation_spec
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def observation_spec(self):
"""Defines the observations provided by the environment.
May use a subclass of `specs.Array` that specifies additional properties
such as min and max bounds on the values.
Returns:
An `Array` spec, or a nested dict, list or tuple of `Array` specs.
"""
示例15: action_spec
# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def action_spec(self):
"""Defines the actions that should be provided to `step`.
May use a subclass of `specs.Array` that specifies additional properties
such as min and max bounds on the values.
Returns:
An `Array` spec, or a nested dict, list or tuple of `Array` specs.
"""