本文整理汇总了Python中rllab.misc.ext.compile_function方法的典型用法代码示例。如果您正苦于以下问题:Python ext.compile_function方法的具体用法?Python ext.compile_function怎么用?Python ext.compile_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.misc.ext
的用法示例。
在下文中一共展示了ext.compile_function方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dist_info
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def dist_info(self, obs, state_infos=None):
if state_infos is None or len(state_infos) == 0:
return self._f_dist_info(obs)
if self._f_dist_info_givens is None:
# compile function
obs_var = self._mean_network.input_var
latent_keys = ["latent_%d" % idx for idx in range(self._n_latent_layers)]
latent_vars = [TT.matrix("latent_%d" % idx) for idx in range(self._n_latent_layers)]
latent_dict = dict(list(zip(latent_keys, latent_vars)))
self._f_dist_info_givens = ext.compile_function(
inputs=[obs_var] + latent_vars,
outputs=self.dist_info_sym(obs_var, latent_dict),
)
latent_vals = []
for idx in range(self._n_latent_layers):
latent_vals.append(state_infos["latent_%d" % idx])
return self._f_dist_info_givens(*[obs] + latent_vals)
示例2: update_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def update_opt(self, loss, target, inputs, network_outputs, extra_inputs=None):
"""
:param loss: Symbolic expression for the loss function.
:param target: A parameterized object to optimize over. It should implement methods of the
:class:`rllab.core.paramerized.Parameterized` class.
:param inputs: A list of symbolic variables as inputs
:return: No return value.
"""
self._target = target
if extra_inputs is None:
extra_inputs = list()
self._hf_optimizer = hf_optimizer(
_p=target.get_params(trainable=True),
inputs=(inputs + extra_inputs),
s=network_outputs,
costs=[loss],
)
self._opt_fun = lazydict(
f_loss=lambda: compile_function(inputs + extra_inputs, loss),
)
示例3: update_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def update_opt(self, f, target, inputs, reg_coeff):
self.target = target
self.reg_coeff = reg_coeff
params = target.get_params(trainable=True)
constraint_grads = theano.grad(
f, wrt=params, disconnected_inputs='warn')
xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
def Hx_plain():
Hx_plain_splits = TT.grad(
TT.sum([TT.sum(g * x)
for g, x in zip(constraint_grads, xs)]),
wrt=params,
disconnected_inputs='warn'
)
return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])
self.opt_fun = ext.lazydict(
f_Hx_plain=lambda: ext.compile_function(
inputs=inputs + xs,
outputs=Hx_plain(),
log_name="f_Hx_plain",
),
)
示例4: update_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs):
"""
:param loss: Symbolic expression for the loss function.
:param target: A parameterized object to optimize over. It should implement methods of the
:class:`rllab.core.paramerized.Parameterized` class.
:param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
:param inputs: A list of symbolic variables as inputs
:param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default
this will be computed by calling theano.grad
:return: No return value.
"""
self._target = target
def get_opt_output(gradients):
if gradients is None:
gradients = theano.grad(loss, target.get_params(trainable=True))
flat_grad = flatten_tensor_variables(gradients)
return [loss.astype('float64'), flat_grad.astype('float64')]
if extra_inputs is None:
extra_inputs = list()
self._opt_fun = lazydict(
f_loss=lambda: compile_function(inputs + extra_inputs, loss),
f_opt=lambda: compile_function(
inputs=inputs + extra_inputs,
outputs=get_opt_output(gradients),
)
)
示例5: update_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs):
"""
:param loss: Symbolic expression for the loss function.
:param target: A parameterized object to optimize over. It should implement methods of the
:class:`rllab.core.paramerized.Parameterized` class.
:param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
:param inputs: A list of symbolic variables as inputs
:return: No return value.
"""
self._target = target
if gradients is None:
gradients = theano.grad(loss, target.get_params(trainable=True), disconnected_inputs='ignore')
updates = self._update_method(gradients, target.get_params(trainable=True))
updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in updates.items()])
if extra_inputs is None:
extra_inputs = list()
self._opt_fun = ext.lazydict(
f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss),
f_opt=lambda: ext.compile_function(
inputs=inputs + extra_inputs,
outputs=loss,
updates=updates,
)
)
示例6: update_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs):
"""
:param loss: Symbolic expression for the loss function.
:param target: A parameterized object to optimize over. It should implement methods of the
:class:`rllab.core.paramerized.Parameterized` class.
:param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
:param inputs: A list of symbolic variables as inputs
:return: No return value.
"""
constraint_term, constraint_value = leq_constraint
penalty_var = TT.scalar("penalty")
penalized_loss = loss + penalty_var * constraint_term
self._target = target
self._max_constraint_val = constraint_value
self._constraint_name = constraint_name
def get_opt_output():
flat_grad = flatten_tensor_variables(theano.grad(
penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore'
))
return [penalized_loss.astype('float64'), flat_grad.astype('float64')]
self._opt_fun = lazydict(
f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"),
f_constraint=lambda: compile_function(inputs, constraint_term, log_name="f_constraint"),
f_penalized_loss=lambda: compile_function(
inputs=inputs + [penalty_var],
outputs=[penalized_loss, loss, constraint_term],
log_name="f_penalized_loss",
),
f_opt=lambda: compile_function(
inputs=inputs + [penalty_var],
outputs=get_opt_output(),
log_name="f_opt"
)
)
示例7: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
示例8: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(
env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
示例9: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(
self,
env_spec,
latent_dim=0, # all this is fake
latent_name='categorical',
bilinear_integration=False,
resample=False, # until here
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
#bullshit
self.latent_dim = latent_dim ##could I avoid needing this self for the get_action?
self.latent_name = latent_name
self.bilinear_integration = bilinear_integration
self.resample = resample
self._set_std_to_0 = False
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
示例10: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(self, _p, inputs, s, costs, h=None, ha=None):
'''Constructs and compiles the necessary Theano functions.
p : list of Theano shared variables
Parameters of the model to be optimized.
inputs : list of Theano variables
Symbolic variables that are inputs to your graph (they should also
include your model 'output'). Your training examples must fit these.
s : Theano variable
Symbolic variable with respect to which the Hessian of the objective is
positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
it is the activation of the output layer.
costs : list of Theano variables
Monitoring costs, the first of which will be the optimized objective.
h: Theano variable or None
Structural damping is applied to this variable (typically the hidden units
of an RNN).
ha: Theano variable or None
Symbolic variable that implicitly defines the Gauss-Newton matrix for the
structural damping term (typically the activation of the hidden layer). If
None, it will be set to `h`.'''
self.p = _p
self.shapes = [i.get_value().shape for i in _p]
self.sizes = list(map(numpy.prod, self.shapes))
self.positions = numpy.cumsum([0] + self.sizes)[:-1]
g = T.grad(costs[0], _p)
g = list(map(T.as_tensor_variable, g)) # for CudaNdarray
self.f_gc = compile_function(inputs, g + costs) # during gradient computation
self.f_cost = compile_function(inputs, costs) # for quick cost evaluation
symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4
v = [symbolic_types[len(i)]() for i in self.shapes]
Gv = gauss_newton_product(costs[0], _p, v, s)
coefficient = T.scalar() # this is lambda*mu
if h is not None: # structural damping with cross-entropy
h_constant = symbolic_types[h.ndim]() # T.Rop does not support `consider_constant` yet, so use `givens`
structural_damping = coefficient * (
-h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
if ha is None: ha = h
Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
Gv = [a + b for a, b in zip(Gv, Gv_damping)]
givens = {h_constant: h}
else:
givens = {}
self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)
示例11: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.rectify,
hidden_W_init=LI.HeUniform(),
hidden_b_init=LI.Constant(0.),
output_nonlinearity=NL.tanh,
output_W_init=LI.Uniform(-3e-3, 3e-3),
output_b_init=LI.Uniform(-3e-3, 3e-3),
bn=False):
Serializable.quick_init(self, locals())
l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))
l_hidden = l_obs
if bn:
l_hidden = batch_norm(l_hidden)
for idx, size in enumerate(hidden_sizes):
l_hidden = L.DenseLayer(
l_hidden,
num_units=size,
W=hidden_W_init,
b=hidden_b_init,
nonlinearity=hidden_nonlinearity,
name="h%d" % idx
)
if bn:
l_hidden = batch_norm(l_hidden)
l_output = L.DenseLayer(
l_hidden,
num_units=env_spec.action_space.flat_dim,
W=output_W_init,
b=output_b_init,
nonlinearity=output_nonlinearity,
name="output"
)
# Note the deterministic=True argument. It makes sure that when getting
# actions from single observations, we do not update params in the
# batch normalization layers
action_var = L.get_output(l_output, deterministic=True)
self._output_layer = l_output
self._f_actions = ext.compile_function([l_obs.input_var], action_var)
super(DeterministicMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [l_output])
示例12: __init__
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def __init__(
self,
name,
env_spec,
conv_filters, conv_filter_sizes, conv_strides, conv_pads,
hidden_sizes=[],
hidden_nonlinearity=NL.rectify,
output_nonlinearity=NL.softmax,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
self._env_spec = env_spec
if prob_network is None:
prob_network = ConvNetwork(
input_shape=env_spec.observation_space.shape,
output_dim=env_spec.action_space.n,
conv_filters=conv_filters,
conv_filter_sizes=conv_filter_sizes,
conv_strides=conv_strides,
conv_pads=conv_pads,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
name="prob_network",
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function(
[prob_network.input_layer.input_var],
L.get_output(prob_network.output_layer)
)
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalConvPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
示例13: init_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import compile_function [as 别名]
def init_opt(self):
is_recurrent = int(self.policy.recurrent)
obs_var = self.env.observation_space.new_tensor_variable(
'obs',
extra_dims=1 + is_recurrent,
)
action_var = self.env.action_space.new_tensor_variable(
'action',
extra_dims=1 + is_recurrent,
)
advantage_var = ext.new_tensor(
'advantage',
ndim=1 + is_recurrent,
dtype=theano.config.floatX
)
dist = self.policy.distribution
old_dist_info_vars = {
k: ext.new_tensor(
'old_%s' % k,
ndim=2 + is_recurrent,
dtype=theano.config.floatX
) for k in dist.dist_info_keys
}
old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys]
if is_recurrent:
valid_var = TT.matrix('valid')
else:
valid_var = None
dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
logli = dist.log_likelihood_sym(action_var, dist_info_vars)
kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)
# formulate as a minimization problem
# The gradient of the surrogate objective is the policy gradient
if is_recurrent:
surr_obj = - TT.sum(logli * advantage_var * valid_var) / TT.sum(valid_var)
mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
max_kl = TT.max(kl * valid_var)
else:
surr_obj = - TT.mean(logli * advantage_var)
mean_kl = TT.mean(kl)
max_kl = TT.max(kl)
input_list = [obs_var, action_var, advantage_var]
if is_recurrent:
input_list.append(valid_var)
self.optimizer.update_opt(surr_obj, target=self.policy, inputs=input_list)
f_kl = ext.compile_function(
inputs=input_list + old_dist_info_vars_list,
outputs=[mean_kl, max_kl],
)
self.opt_info = dict(
f_kl=f_kl,
)