本文整理汇总了Python中sandbox.rocky.tf.core.layers_powered.LayersPowered.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python LayersPowered.__init__方法的具体用法?Python LayersPowered.__init__怎么用?Python LayersPowered.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.rocky.tf.core.layers_powered.LayersPowered
的用法示例。
在下文中一共展示了LayersPowered.__init__方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.relu,
action_merge_layer=-2,
output_nonlinearity=None,
bn=False):
Serializable.quick_init(self, locals())
l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim), name="obs")
l_action = L.InputLayer(shape=(None, env_spec.action_space.flat_dim), name="actions")
n_layers = len(hidden_sizes) + 1
if n_layers > 1:
action_merge_layer = \
(action_merge_layer % n_layers + n_layers) % n_layers
else:
action_merge_layer = 1
l_hidden = l_obs
for idx, size in enumerate(hidden_sizes):
if bn:
l_hidden = batch_norm(l_hidden)
if idx == action_merge_layer:
l_hidden = L.ConcatLayer([l_hidden, l_action])
l_hidden = L.DenseLayer(
l_hidden,
num_units=size,
nonlinearity=hidden_nonlinearity,
name="h%d" % (idx + 1)
)
if action_merge_layer == n_layers:
l_hidden = L.ConcatLayer([l_hidden, l_action])
l_output = L.DenseLayer(
l_hidden,
num_units=1,
nonlinearity=output_nonlinearity,
name="output"
)
output_var = L.get_output(l_output, deterministic=True)
self._f_qval = tensor_utils.compile_function([l_obs.input_var, l_action.input_var], output_var)
self._output_layer = l_output
self._obs_layer = l_obs
self._action_layer = l_action
self._output_nonlinearity = output_nonlinearity
LayersPowered.__init__(self, [l_output])
示例2: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=tf.nn.relu,
output_nonlinearity=tf.nn.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
# import pdb; pdb.set_trace()
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = tensor_utils.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LayersPowered.__init__(self, [prob_network.output_layer])
示例3: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(self, name, output_dim, hidden_sizes, hidden_nonlinearity,
output_nonlinearity, hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer,
output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer,
input_var=None, input_layer=None, input_shape=None, batch_normalization=False, weight_normalization=False,
):
Serializable.quick_init(self, locals())
with tf.variable_scope(name):
if input_layer is None:
l_in = L.InputLayer(shape=(None,) + input_shape, input_var=input_var, name="input")
else:
l_in = input_layer
self._layers = [l_in]
l_hid = l_in
if batch_normalization:
l_hid = L.batch_norm(l_hid)
for idx, hidden_size in enumerate(hidden_sizes):
l_hid = L.DenseLayer(
l_hid,
num_units=hidden_size,
nonlinearity=hidden_nonlinearity,
name="hidden_%d" % idx,
W=hidden_W_init,
b=hidden_b_init,
weight_normalization=weight_normalization
)
if batch_normalization:
l_hid = L.batch_norm(l_hid)
self._layers.append(l_hid)
l_out = L.DenseLayer(
l_hid,
num_units=output_dim,
nonlinearity=output_nonlinearity,
name="output",
W=output_W_init,
b=output_b_init,
weight_normalization=weight_normalization
)
if batch_normalization:
l_out = L.batch_norm(l_out)
self._layers.append(l_out)
self._l_in = l_in
self._l_out = l_out
# self._input_var = l_in.input_var
self._output = L.get_output(l_out)
LayersPowered.__init__(self, l_out)
示例4: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.tanh,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
with tf.variable_scope(name):
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=tf.nn.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = tensor_utils.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LayersPowered.__init__(self, [prob_network.output_layer])
示例5: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.relu,
output_nonlinearity=tf.nn.tanh,
prob_network=None,
bn=False):
Serializable.quick_init(self, locals())
with tf.variable_scope(name):
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim,),
output_dim=env_spec.action_space.flat_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
# batch_normalization=True,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = tensor_utils.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer, deterministic=True)
)
self.prob_network = prob_network
# Note the deterministic=True argument. It makes sure that when getting
# actions from single observations, we do not update params in the
# batch normalization layers.
# TODO: this doesn't currently work properly in the tf version so we leave out batch_norm
super(DeterministicMLPPolicy, self).__init__(env_spec)
LayersPowered.__init__(self, [prob_network.output_layer])
示例6: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_dim=32,
feature_network=None,
state_include_action=True,
hidden_nonlinearity=tf.tanh,
learn_std=True,
init_std=1.0,
output_nonlinearity=None,
):
"""
:param env_spec: A spec for the env.
:param hidden_dim: dimension of hidden layer
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:return:
"""
with tf.variable_scope(name):
Serializable.quick_init(self, locals())
super(GaussianGRUPolicy, self).__init__(env_spec)
obs_dim = env_spec.observation_space.flat_dim
action_dim = env_spec.action_space.flat_dim
if state_include_action:
input_dim = obs_dim + action_dim
else:
input_dim = obs_dim
l_input = L.InputLayer(
shape=(None, None, input_dim),
name="input"
)
if feature_network is None:
feature_dim = input_dim
l_flat_feature = None
l_feature = l_input
else:
feature_dim = feature_network.output_layer.output_shape[-1]
l_flat_feature = feature_network.output_layer
l_feature = L.OpLayer(
l_flat_feature,
extras=[l_input],
name="reshape_feature",
op=lambda flat_feature, input: tf.reshape(
flat_feature,
tf.pack([tf.shape(input)[0], tf.shape(input)[1], feature_dim])
),
shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)
)
mean_network = GRUNetwork(
input_shape=(feature_dim,),
input_layer=l_feature,
output_dim=action_dim,
hidden_dim=hidden_dim,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
name="mean_network"
)
l_log_std = L.ParamLayer(
mean_network.input_layer,
num_units=action_dim,
param=tf.constant_initializer(np.log(init_std)),
name="output_log_std",
trainable=learn_std,
)
l_step_log_std = L.ParamLayer(
mean_network.step_input_layer,
num_units=action_dim,
param=l_log_std.param,
name="step_output_log_std",
trainable=learn_std,
)
self.mean_network = mean_network
self.feature_network = feature_network
self.l_input = l_input
self.state_include_action = state_include_action
flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input")
if feature_network is None:
feature_var = flat_input_var
else:
feature_var = L.get_output(l_flat_feature, {feature_network.input_layer: flat_input_var})
self.f_step_mean_std = tensor_utils.compile_function(
[
flat_input_var,
mean_network.step_prev_hidden_layer.input_var,
],
L.get_output([
mean_network.step_output_layer,
l_step_log_std,
mean_network.step_hidden_layer,
], {mean_network.step_input_layer: feature_var})
#.........这里部分代码省略.........
示例7: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_dim=32,
feature_network=None,
prob_network=None,
state_include_action=True,
hidden_nonlinearity=tf.tanh,
forget_bias=1.0,
use_peepholes=False):
"""
:param env_spec: A spec for the env.
:param hidden_dim: dimension of hidden layer
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:return:
"""
with tf.variable_scope(name):
assert isinstance(env_spec.action_space, Discrete)
Serializable.quick_init(self, locals())
super(CategoricalLSTMPolicy, self).__init__(env_spec)
obs_dim = env_spec.observation_space.flat_dim
action_dim = env_spec.action_space.flat_dim
if state_include_action:
input_dim = obs_dim + action_dim
else:
input_dim = obs_dim
l_input = L.InputLayer(
shape=(None, None, input_dim),
name="input"
)
if feature_network is None:
feature_dim = input_dim
l_flat_feature = None
l_feature = l_input
else:
feature_dim = feature_network.output_layer.output_shape[-1]
l_flat_feature = feature_network.output_layer
l_feature = L.OpLayer(
l_flat_feature,
extras=[l_input],
name="reshape_feature",
op=lambda flat_feature, input: tf.reshape(
flat_feature,
tf.pack([tf.shape(input)[0], tf.shape(input)[1], feature_dim])
),
shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)
)
if prob_network is None:
prob_network = LSTMNetwork(
input_shape=(feature_dim,),
input_layer=l_feature,
output_dim=env_spec.action_space.n,
hidden_dim=hidden_dim,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=tf.nn.softmax,
forget_bias=forget_bias,
use_peepholes=use_peepholes,
name="prob_network"
)
self.prob_network = prob_network
self.feature_network = feature_network
self.l_input = l_input
self.state_include_action = state_include_action
flat_input_var = tf.placeholder(dtype=tf.float32, shape=(None, input_dim), name="flat_input")
if feature_network is None:
feature_var = flat_input_var
else:
feature_var = L.get_output(l_flat_feature, {feature_network.input_layer: flat_input_var})
self.f_step_prob = tensor_utils.compile_function(
[
flat_input_var,
prob_network.step_prev_hidden_layer.input_var,
prob_network.step_prev_cell_layer.input_var
],
L.get_output([
prob_network.step_output_layer,
prob_network.step_hidden_layer,
prob_network.step_cell_layer
], {prob_network.step_input_layer: feature_var})
)
self.input_dim = input_dim
self.action_dim = action_dim
self.hidden_dim = hidden_dim
self.prev_actions = None
self.prev_hiddens = None
self.prev_cells = None
self.dist = RecurrentCategorical(env_spec.action_space.n)
out_layers = [prob_network.output_layer]
#.........这里部分代码省略.........
示例8: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
input_shape,
output_dim,
prob_network=None,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.tanh,
optimizer=None,
tr_optimizer=None,
use_trust_region=True,
step_size=0.01,
normalize_inputs=True,
no_initial_trust_region=True,
):
"""
:param input_shape: Shape of the input data.
:param output_dim: Dimension of output.
:param hidden_sizes: Number of hidden units of each layer of the mean network.
:param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
:param optimizer: Optimizer for minimizing the negative log-likelihood.
:param use_trust_region: Whether to use trust region constraint.
:param step_size: KL divergence constraint for each iteration
"""
Serializable.quick_init(self, locals())
with tf.variable_scope(name):
if optimizer is None:
optimizer = LbfgsOptimizer(name="optimizer")
if tr_optimizer is None:
tr_optimizer = ConjugateGradientOptimizer()
self.output_dim = output_dim
self.optimizer = optimizer
self.tr_optimizer = tr_optimizer
if prob_network is None:
prob_network = MLP(
input_shape=input_shape,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=tf.nn.softmax,
name="prob_network"
)
l_prob = prob_network.output_layer
LayersPowered.__init__(self, [l_prob])
xs_var = prob_network.input_layer.input_var
ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="ys")
old_prob_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="old_prob")
x_mean_var = tf.get_variable(
name="x_mean",
shape=(1,) + input_shape,
initializer=tf.constant_initializer(0., dtype=tf.float32)
)
x_std_var = tf.get_variable(
name="x_std",
shape=(1,) + input_shape,
initializer=tf.constant_initializer(1., dtype=tf.float32)
)
normalized_xs_var = (xs_var - x_mean_var) / x_std_var
prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})
old_info_vars = dict(prob=old_prob_var)
info_vars = dict(prob=prob_var)
dist = self._dist = Categorical(output_dim)
mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))
loss = - tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))
predicted = tensor_utils.to_onehot_sym(tf.argmax(prob_var, dimension=1), output_dim)
self.prob_network = prob_network
self.f_predict = tensor_utils.compile_function([xs_var], predicted)
self.f_prob = tensor_utils.compile_function([xs_var], prob_var)
self.l_prob = l_prob
self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var])
self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var],
inputs=[xs_var, ys_var, old_prob_var],
leq_constraint=(mean_kl, step_size)
)
self.use_trust_region = use_trust_region
self.name = name
self.normalize_inputs = normalize_inputs
self.x_mean_var = x_mean_var
self.x_std_var = x_std_var
self.first_optimized = not no_initial_trust_region
示例9: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
input_shape,
output_dim,
mean_network=None,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.tanh,
optimizer=None,
use_trust_region=True,
step_size=0.01,
learn_std=True,
init_std=1.0,
adaptive_std=False,
std_share_network=False,
std_hidden_sizes=(32, 32),
std_nonlinearity=None,
normalize_inputs=True,
normalize_outputs=True,
subsample_factor=1.0
):
"""
:param input_shape: Shape of the input data.
:param output_dim: Dimension of output.
:param hidden_sizes: Number of hidden units of each layer of the mean network.
:param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
:param optimizer: Optimizer for minimizing the negative log-likelihood.
:param use_trust_region: Whether to use trust region constraint.
:param step_size: KL divergence constraint for each iteration
:param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If
adaptive_std is True, this parameter is ignored, and the weights for the std network are always learned.
:param adaptive_std: Whether to make the std a function of the states.
:param std_share_network: Whether to use the same network as the mean.
:param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if
`std_share_network` is False. It defaults to the same architecture as the mean.
:param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network`
is False. It defaults to the same non-linearity as the mean.
"""
Serializable.quick_init(self, locals())
with tf.variable_scope(name):
if optimizer is None:
if use_trust_region:
optimizer = PenaltyLbfgsOptimizer("optimizer")
else:
optimizer = LbfgsOptimizer("optimizer")
self._optimizer = optimizer
self._subsample_factor = subsample_factor
if mean_network is None:
mean_network = MLP(
name="mean_network",
input_shape=input_shape,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=None,
)
l_mean = mean_network.output_layer
if adaptive_std:
l_log_std = MLP(
name="log_std_network",
input_shape=input_shape,
input_var=mean_network.input_layer.input_var,
output_dim=output_dim,
hidden_sizes=std_hidden_sizes,
hidden_nonlinearity=std_nonlinearity,
output_nonlinearity=None,
).output_layer
else:
l_log_std = L.ParamLayer(
mean_network.input_layer,
num_units=output_dim,
param=tf.constant_initializer(np.log(init_std)),
name="output_log_std",
trainable=learn_std,
)
LayersPowered.__init__(self, [l_mean, l_log_std])
xs_var = mean_network.input_layer.input_var
ys_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim))
old_means_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim))
old_log_stds_var = tf.placeholder(dtype=tf.float32, name="old_log_stds", shape=(None, output_dim))
x_mean_var = tf.Variable(
np.zeros((1,) + input_shape, dtype=np.float32),
name="x_mean",
)
x_std_var = tf.Variable(
np.ones((1,) + input_shape, dtype=np.float32),
name="x_std",
)
y_mean_var = tf.Variable(
np.zeros((1, output_dim), dtype=np.float32),
name="y_mean",
#.........这里部分代码省略.........
示例10: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_sizes=(32, 32),
learn_std=True,
init_std=1.0,
adaptive_std=False,
std_share_network=False,
std_hidden_sizes=(32, 32),
min_std=1e-6,
std_hidden_nonlinearity=tf.nn.tanh,
hidden_nonlinearity=tf.nn.tanh,
output_nonlinearity=None,
mean_network=None,
std_network=None,
std_parametrization='exp',
# added arguments
w_auxiliary=False,
auxliary_classes=0.,
):
"""
:param env_spec:
:param hidden_sizes: list of sizes for the fully-connected hidden layers
:param learn_std: Is std trainable
:param init_std: Initial std
:param adaptive_std:
:param std_share_network:
:param std_hidden_sizes: list of sizes for the fully-connected layers for std
:param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues
:param std_hidden_nonlinearity:
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param output_nonlinearity: nonlinearity for the output layer
:param mean_network: custom network for the output mean
:param std_network: custom network for the output log std
:param std_parametrization: how the std should be parametrized. There are a few options:
- exp: the logarithm of the std will be stored, and applied a exponential transformation
- softplus: the std will be computed as log(1+exp(x))
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Box)
with tf.variable_scope(name):
obs_dim = env_spec.observation_space.flat_dim
action_dim = env_spec.action_space.flat_dim
# create network
if mean_network is None:
mean_network = MLP(
name="mean_network",
input_shape=(obs_dim,),
output_dim=action_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
w_auxiliary=w_auxiliary,
auxliary_classes=auxliary_classes,
)
self._mean_network = mean_network
l_mean = mean_network.output_layer
obs_var = mean_network.input_layer.input_var
if std_network is not None:
l_std_param = std_network.output_layer
else:
if adaptive_std:
std_network = MLP(
name="std_network",
input_shape=(obs_dim,),
input_layer=mean_network.input_layer,
output_dim=action_dim,
hidden_sizes=std_hidden_sizes,
hidden_nonlinearity=std_hidden_nonlinearity,
output_nonlinearity=None,
)
l_std_param = std_network.output_layer
else:
if std_parametrization == 'exp':
init_std_param = np.log(init_std)
elif std_parametrization == 'softplus':
init_std_param = np.log(np.exp(init_std) - 1)
else:
raise NotImplementedError
l_std_param = L.ParamLayer(
mean_network.input_layer,
num_units=action_dim,
param=tf.constant_initializer(init_std_param),
name="output_std_param",
trainable=learn_std,
)
self.std_parametrization = std_parametrization
if std_parametrization == 'exp':
min_std_param = np.log(min_std)
elif std_parametrization == 'softplus':
min_std_param = np.log(np.exp(min_std) - 1)
#.........这里部分代码省略.........
示例11: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
input_shape,
output_dim,
network=None,
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.tanh,
output_nonlinearity=None,
optimizer=None,
normalize_inputs=True,
):
"""
:param input_shape: Shape of the input data.
:param output_dim: Dimension of output.
:param hidden_sizes: Number of hidden units of each layer of the mean network.
:param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
:param optimizer: Optimizer for minimizing the negative log-likelihood.
"""
Serializable.quick_init(self, locals())
with tf.variable_scope(name):
if optimizer is None:
optimizer = LbfgsOptimizer(name="optimizer")
self.output_dim = output_dim
self.optimizer = optimizer
if network is None:
network = MLP(
input_shape=input_shape,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=output_nonlinearity,
name="network"
)
l_out = network.output_layer
LayersPowered.__init__(self, [l_out])
xs_var = network.input_layer.input_var
ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="ys")
x_mean_var = tf.get_variable(
name="x_mean",
shape=(1,) + input_shape,
initializer=tf.constant_initializer(0., dtype=tf.float32)
)
x_std_var = tf.get_variable(
name="x_std",
shape=(1,) + input_shape,
initializer=tf.constant_initializer(1., dtype=tf.float32)
)
normalized_xs_var = (xs_var - x_mean_var) / x_std_var
fit_ys_var = L.get_output(l_out, {network.input_layer: normalized_xs_var})
loss = - tf.reduce_mean(tf.square(fit_ys_var - ys_var))
self.f_predict = tensor_utils.compile_function([xs_var], fit_ys_var)
optimizer_args = dict(
loss=loss,
target=self,
network_outputs=[fit_ys_var],
)
optimizer_args["inputs"] = [xs_var, ys_var]
self.optimizer.update_opt(**optimizer_args)
self.name = name
self.l_out = l_out
self.normalize_inputs = normalize_inputs
self.x_mean_var = x_mean_var
self.x_std_var = x_std_var
示例12: __init__
# 需要导入模块: from sandbox.rocky.tf.core.layers_powered import LayersPowered [as 别名]
# 或者: from sandbox.rocky.tf.core.layers_powered.LayersPowered import __init__ [as 别名]
def __init__(
self,
name,
env_spec,
hidden_dims=(32,),
feature_network=None,
state_include_action=True,
hidden_nonlinearity=tf.tanh):
"""
:param env_spec: A spec for the env.
:param hidden_dims: dimension of hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:return:
"""
with tf.variable_scope(name):
assert isinstance(env_spec.action_space, Discrete)
Serializable.quick_init(self, locals())
super(RecurrentCategoricalPolicy, self).__init__(env_spec)
obs_dim = env_spec.observation_space.flat_dim
action_dim = env_spec.action_space.flat_dim
if state_include_action:
input_dim = obs_dim + action_dim
else:
input_dim = obs_dim
l_input = L.InputLayer(
shape=(None, None, input_dim),
name="input"
)
if feature_network is None:
feature_dim = input_dim
l_flat_feature = None
l_feature = l_input
else:
feature_dim = feature_network.output_layer.output_shape[-1]
l_flat_feature = feature_network.output_layer
l_feature = L.OpLayer(
l_flat_feature,
extras=[l_input],
name="reshape_feature",
op=lambda flat_feature, input: tf.reshape(
flat_feature,
tf.pack([tf.shape(input)[0], tf.shape(input)[1], feature_dim])
),
shape_op=lambda _, input_shape: (input_shape[0], input_shape[1], feature_dim)
)
prob_network = DeepGRUNetwork(
input_shape=(feature_dim,),
input_layer=l_feature,
output_dim=env_spec.action_space.n,
hidden_dims=hidden_dims,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=tf.nn.softmax,
name="prob_network"
)
self.prob_network = prob_network
self.feature_network = feature_network
self.l_input = l_input
self.state_include_action = state_include_action
flat_input_var = tf.placeholder(tf.float32, shape=(None, input_dim), name="flat_input")
if feature_network is None:
feature_var = flat_input_var
else:
feature_var = L.get_output(l_flat_feature, {feature_network.input_layer: flat_input_var})
# Build the step feedforward function.
inputs = [flat_input_var] \
+ [prev_hidden.input_var for prev_hidden
in prob_network.step_prev_hidden_layers]
outputs = [prob_network.step_output_layer] \
+ prob_network.step_hidden_layers
outputs = L.get_output(outputs, {prob_network.step_input_layer: feature_var})
self.f_step_prob = tensor_utils.compile_function(
inputs, outputs)
# Function to fetch hidden init values
self.f_hid_inits = tensor_utils.compile_function(
[], prob_network.hid_inits)
self.input_dim = input_dim
self.action_dim = action_dim
self.hidden_dims = hidden_dims
self.prev_actions = None
self.prev_hiddens = None
self.dist = RecurrentCategorical(env_spec.action_space.n)
out_layers = [prob_network.output_layer]
if feature_network is not None:
out_layers.append(feature_network.output_layer)
LayersPowered.__init__(self, out_layers)