当前位置: 首页>>代码示例>>Python>>正文


Python specs.Array方法代码示例

本文整理汇总了Python中dm_env.specs.Array方法的典型用法代码示例。如果您正苦于以下问题:Python specs.Array方法的具体用法?Python specs.Array怎么用?Python specs.Array使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在dm_env.specs的用法示例。


在下文中一共展示了specs.Array方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _spec_to_box

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def _spec_to_box(spec):
    def extract_min_max(s):
        assert s.dtype == np.float64 or s.dtype == np.float32
        dim = np.int(np.prod(s.shape))
        if type(s) == specs.Array:
            bound = np.inf * np.ones(dim, dtype=np.float32)
            return -bound, bound
        elif type(s) == specs.BoundedArray:
            zeros = np.zeros(dim, dtype=np.float32)
            return s.minimum + zeros, s.maximum + zeros

    mins, maxs = [], []
    for s in spec:
        mn, mx = extract_min_max(s)
        mins.append(mn)
        maxs.append(mx)
    low = np.concatenate(mins, axis=0)
    high = np.concatenate(maxs, axis=0)
    assert low.shape == high.shape
    return spaces.Box(low, high, dtype=np.float32) 
开发者ID:denisyarats,项目名称:dmc2gym,代码行数:22,代码来源:wrappers.py

示例2: _convert_spec_to_space

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def _convert_spec_to_space(spec):
    if isinstance(spec, dict):
        return spaces.Dict(
            {k: _convert_spec_to_space(v)
             for k, v in spec.items()})
    if isinstance(spec, specs.DiscreteArray):
        return spaces.Discrete(spec.num_values)
    elif isinstance(spec, specs.BoundedArray):
        return spaces.Box(
            low=np.asscalar(spec.minimum),
            high=np.asscalar(spec.maximum),
            shape=spec.shape,
            dtype=spec.dtype)
    elif isinstance(spec, specs.Array):
        return spaces.Box(
            low=-float("inf"),
            high=float("inf"),
            shape=spec.shape,
            dtype=spec.dtype)

    raise NotImplementedError(
        ("Could not convert `Array` spec of type {} to Gym space. "
         "Attempted to convert: {}").format(type(spec), spec)) 
开发者ID:ray-project,项目名称:ray,代码行数:25,代码来源:dm_env_wrapper.py

示例3: default_agent

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(
    obs_spec: specs.Array,
    action_spec: specs.DiscreteArray,
    num_ensemble: int = 20,
) -> BootstrappedDqn:
  """Initialize a Bootstrapped DQN agent with default parameters."""
  ensemble = make_ensemble(
      num_actions=action_spec.num_values, num_ensemble=num_ensemble)
  optimizer = snt.optimizers.Adam(learning_rate=1e-3)
  return BootstrappedDqn(
      obs_spec=obs_spec,
      action_spec=action_spec,
      ensemble=ensemble,
      batch_size=128,
      discount=.99,
      replay_capacity=10000,
      min_replay_size=128,
      sgd_period=1,
      target_update_period=4,
      optimizer=optimizer,
      mask_prob=0.5,
      noise_scale=0.0,
      epsilon_fn=lambda t: 10 / (10 + t),
      seed=42,
  ) 
开发者ID:deepmind,项目名称:bsuite,代码行数:27,代码来源:agent.py

示例4: default_agent

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
                  action_spec: specs.DiscreteArray):
  """Initialize a DQN agent with default parameters."""
  del obs_spec  # Unused.
  network = snt.Sequential([
      snt.Flatten(),
      snt.nets.MLP([50, 50, action_spec.num_values]),
  ])
  optimizer = snt.optimizers.Adam(learning_rate=1e-3)
  return DQN(
      action_spec=action_spec,
      network=network,
      batch_size=32,
      discount=0.99,
      replay_capacity=10000,
      min_replay_size=100,
      sgd_period=1,
      target_update_period=4,
      optimizer=optimizer,
      epsilon=0.05,
      seed=42) 
开发者ID:deepmind,项目名称:bsuite,代码行数:23,代码来源:agent.py

示例5: __init__

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(
      self,
      obs_spec: specs.Array,
      action_spec: specs.Array,
      network: 'PolicyValueNet',
      optimizer: snt.Optimizer,
      max_sequence_length: int,
      td_lambda: float,
      discount: float,
      seed: int,
  ):
    """A simple actor-critic agent."""

    # Internalise hyperparameters.
    tf.random.set_seed(seed)
    self._td_lambda = td_lambda
    self._discount = discount

    # Internalise network and optimizer.
    self._network = network
    self._optimizer = optimizer

    # Create windowed buffer for learning from trajectories.
    self._buffer = sequence.Buffer(obs_spec, action_spec, max_sequence_length) 
开发者ID:deepmind,项目名称:bsuite,代码行数:26,代码来源:agent.py

示例6: default_agent

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
                  action_spec: specs.DiscreteArray) -> base.Agent:
  """Initialize a DQN agent with default parameters."""
  network = PolicyValueRNN(
      hidden_sizes=[64, 64],
      num_actions=action_spec.num_values,
  )
  return ActorCriticRNN(
      obs_spec=obs_spec,
      action_spec=action_spec,
      network=network,
      optimizer=snt.optimizers.Adam(learning_rate=3e-3),
      max_sequence_length=32,
      td_lambda=0.9,
      discount=0.99,
      seed=42,
  ) 
开发者ID:deepmind,项目名称:bsuite,代码行数:19,代码来源:agent.py

示例7: __init__

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(
      self,
      obs_spec: specs.Array,
      action_spec: specs.Array,
      max_sequence_length: int,
  ):
    """Pre-allocates buffers of numpy arrays to hold the sequences."""
    self._observations = np.zeros(
        shape=(max_sequence_length + 1, *obs_spec.shape), dtype=obs_spec.dtype)
    self._actions = np.zeros(
        shape=(max_sequence_length, *action_spec.shape),
        dtype=action_spec.dtype)
    self._rewards = np.zeros(max_sequence_length, dtype=np.float32)
    self._discounts = np.zeros(max_sequence_length, dtype=np.float32)

    self._max_sequence_length = max_sequence_length 
开发者ID:deepmind,项目名称:bsuite,代码行数:18,代码来源:sequence.py

示例8: default_agent

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
                  action_spec: specs.DiscreteArray,
                  seed: int = 0) -> base.Agent:
  """Initialize a DQN agent with default parameters."""

  def network(inputs: jnp.ndarray) -> jnp.ndarray:
    flat_inputs = hk.Flatten()(inputs)
    mlp = hk.nets.MLP([64, 64, action_spec.num_values])
    action_values = mlp(flat_inputs)
    return action_values

  return DQN(
      obs_spec=obs_spec,
      action_spec=action_spec,
      network=network,
      optimizer=optix.adam(1e-3),
      batch_size=32,
      discount=0.99,
      replay_capacity=10000,
      min_replay_size=100,
      sgd_period=1,
      target_update_period=4,
      epsilon=0.05,
      rng=hk.PRNGSequence(seed),
  ) 
开发者ID:deepmind,项目名称:bsuite,代码行数:27,代码来源:agent.py

示例9: default_agent

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def default_agent(obs_spec: specs.Array,
                  action_spec: specs.DiscreteArray,
                  seed: int = 0) -> base.Agent:
  """Creates an actor-critic agent with default hyperparameters."""

  def network(inputs: jnp.ndarray) -> Tuple[Logits, Value]:
    flat_inputs = hk.Flatten()(inputs)
    torso = hk.nets.MLP([64, 64])
    policy_head = hk.Linear(action_spec.num_values)
    value_head = hk.Linear(1)
    embedding = torso(flat_inputs)
    logits = policy_head(embedding)
    value = value_head(embedding)
    return logits, jnp.squeeze(value, axis=-1)

  return ActorCritic(
      obs_spec=obs_spec,
      action_spec=action_spec,
      network=network,
      optimizer=optix.adam(3e-3),
      rng=hk.PRNGSequence(seed),
      sequence_length=32,
      discount=0.99,
      td_lambda=0.9,
  ) 
开发者ID:deepmind,项目名称:bsuite,代码行数:27,代码来源:agent.py

示例10: __init__

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def __init__(self, factors=sprite_lib.FACTOR_NAMES):
    """Constructor.

    Outputs a list of dicts: [{object 1 factors} {object 2 factors} ...]

    Args:
      factors: Iterable of strings. Factors to record. Must be a subset of
        sprite.FACTOR_NAMES.
    """
    if not set(factors).issubset(set(sprite_lib.FACTOR_NAMES)):
      raise ValueError('Factors have to belong to {}.'.format(
          sprite_lib.FACTOR_NAMES))
    self._num_sprites = None
    self._factors = factors

    self._per_object_spec = {
        factor: specs.Array(shape=(), dtype=np.float32) for factor in factors
    } 
开发者ID:deepmind,项目名称:spriteworld,代码行数:20,代码来源:handcrafted.py

示例11: convert_dm_control_to_gym_space

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def convert_dm_control_to_gym_space(dm_control_space):
    r"""Convert dm_control space to gym space. """
    if isinstance(dm_control_space, specs.BoundedArray):
        space = spaces.Box(low=dm_control_space.minimum, 
                           high=dm_control_space.maximum, 
                           dtype=dm_control_space.dtype)
        assert space.shape == dm_control_space.shape
        return space
    elif isinstance(dm_control_space, specs.Array) and not isinstance(dm_control_space, specs.BoundedArray):
        space = spaces.Box(low=-float('inf'), 
                           high=float('inf'), 
                           shape=dm_control_space.shape, 
                           dtype=dm_control_space.dtype)
        return space
    elif isinstance(dm_control_space, dict):
        space = spaces.Dict({key: convert_dm_control_to_gym_space(value)
                             for key, value in dm_control_space.items()})
        return space 
开发者ID:zuoxingdong,项目名称:dm2gym,代码行数:20,代码来源:dm_suite_env.py

示例12: reward_spec

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def reward_spec(self):
    """Describes the reward returned by the environment.

    By default this is assumed to be a single float.

    Returns:
      An `Array` spec, or a nested dict, list or tuple of `Array` specs.
    """
    return specs.Array(shape=(), dtype=float, name='reward') 
开发者ID:deepmind,项目名称:dm_env,代码行数:11,代码来源:_environment.py

示例13: discount_spec

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def discount_spec(self):
    """Describes the discount returned by the environment.

    By default this is assumed to be a single float between 0 and 1.

    Returns:
      An `Array` spec, or a nested dict, list or tuple of `Array` specs.
    """
    return specs.BoundedArray(
        shape=(), dtype=float, minimum=0., maximum=1., name='discount') 
开发者ID:deepmind,项目名称:dm_env,代码行数:12,代码来源:_environment.py

示例14: observation_spec

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def observation_spec(self):
    """Defines the observations provided by the environment.

    May use a subclass of `specs.Array` that specifies additional properties
    such as min and max bounds on the values.

    Returns:
      An `Array` spec, or a nested dict, list or tuple of `Array` specs.
    """ 
开发者ID:deepmind,项目名称:dm_env,代码行数:11,代码来源:_environment.py

示例15: action_spec

# 需要导入模块: from dm_env import specs [as 别名]
# 或者: from dm_env.specs import Array [as 别名]
def action_spec(self):
    """Defines the actions that should be provided to `step`.

    May use a subclass of `specs.Array` that specifies additional properties
    such as min and max bounds on the values.

    Returns:
      An `Array` spec, or a nested dict, list or tuple of `Array` specs.
    """ 
开发者ID:deepmind,项目名称:dm_env,代码行数:11,代码来源:_environment.py


注:本文中的dm_env.specs.Array方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。