Python ext.compile_function方法代碼示例

本文整理匯總了Python中rllab.misc.ext.compile_function方法的典型用法代碼示例。如果您正苦於以下問題：Python ext.compile_function方法的具體用法？Python ext.compile_function怎麽用？Python ext.compile_function使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rllab.misc.ext的用法示例。

在下文中一共展示了ext.compile_function方法的13個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: dist_info

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def dist_info(self, obs, state_infos=None):
        if state_infos is None or len(state_infos) == 0:
            return self._f_dist_info(obs)
        if self._f_dist_info_givens is None:
            # compile function
            obs_var = self._mean_network.input_var
            latent_keys = ["latent_%d" % idx for idx in range(self._n_latent_layers)]
            latent_vars = [TT.matrix("latent_%d" % idx) for idx in range(self._n_latent_layers)]
            latent_dict = dict(list(zip(latent_keys, latent_vars)))
            self._f_dist_info_givens = ext.compile_function(
                inputs=[obs_var] + latent_vars,
                outputs=self.dist_info_sym(obs_var, latent_dict),
            )
        latent_vals = []
        for idx in range(self._n_latent_layers):
            latent_vals.append(state_infos["latent_%d" % idx])
        return self._f_dist_info_givens(*[obs] + latent_vals)

開發者ID:florensacc，項目名稱:snn4hrl，代碼行數:19，代碼來源:stochastic_gaussian_mlp_policy.py

示例2: update_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def update_opt(self, loss, target, inputs, network_outputs, extra_inputs=None):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """

        self._target = target

        if extra_inputs is None:
            extra_inputs = list()

        self._hf_optimizer = hf_optimizer(
            _p=target.get_params(trainable=True),
            inputs=(inputs + extra_inputs),
            s=network_outputs,
            costs=[loss],
        )

        self._opt_fun = lazydict(
            f_loss=lambda: compile_function(inputs + extra_inputs, loss),
        )

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:26，代碼來源:hessian_free_optimizer.py

示例3: update_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:27，代碼來源:conjugate_gradient_optimizer.py

示例4: update_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default
        this will be computed by calling theano.grad
        :return: No return value.
        """

        self._target = target

        def get_opt_output(gradients):
            if gradients is None:
                gradients = theano.grad(loss, target.get_params(trainable=True))
            flat_grad = flatten_tensor_variables(gradients)
            return [loss.astype('float64'), flat_grad.astype('float64')]

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = lazydict(
            f_loss=lambda: compile_function(inputs + extra_inputs, loss),
            f_opt=lambda: compile_function(
                inputs=inputs + extra_inputs,
                outputs=get_opt_output(gradients),
            )
        )

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:32，代碼來源:lbfgs_optimizer.py

示例5: update_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """

        self._target = target

        if gradients is None:
            gradients = theano.grad(loss, target.get_params(trainable=True), disconnected_inputs='ignore')
        updates = self._update_method(gradients, target.get_params(trainable=True))
        updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in updates.items()])

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss),
            f_opt=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                updates=updates,
            )
        )

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:30，代碼來源:first_order_optimizer.py

示例6: update_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        constraint_term, constraint_value = leq_constraint
        penalty_var = TT.scalar("penalty")
        penalized_loss = loss + penalty_var * constraint_term

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        def get_opt_output():
            flat_grad = flatten_tensor_variables(theano.grad(
                penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore'
            ))
            return [penalized_loss.astype('float64'), flat_grad.astype('float64')]

        self._opt_fun = lazydict(
            f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"),
            f_constraint=lambda: compile_function(inputs, constraint_term, log_name="f_constraint"),
            f_penalized_loss=lambda: compile_function(
                inputs=inputs + [penalty_var],
                outputs=[penalized_loss, loss, constraint_term],
                log_name="f_penalized_loss",
            ),
            f_opt=lambda: compile_function(
                inputs=inputs + [penalty_var],
                outputs=get_opt_output(),
                log_name="f_opt"
            )
        )

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:39，代碼來源:penalty_lbfgs_optimizer.py

示例7: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:40，代碼來源:categorical_mlp_policy.py

示例8: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(
                    env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

開發者ID:sisl，項目名稱:gail-driver，代碼行數:41，代碼來源:categorical_mlp_policy.py

示例9: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(
            self,
            env_spec,
            latent_dim=0,    # all this is fake
            latent_name='categorical',
            bilinear_integration=False,
            resample=False,  # until here
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        #bullshit
        self.latent_dim = latent_dim  ##could I avoid needing this self for the get_action?
        self.latent_name = latent_name
        self.bilinear_integration = bilinear_integration
        self.resample = resample
        self._set_std_to_0 = False

        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

開發者ID:florensacc，項目名稱:snn4hrl，代碼行數:50，代碼來源:categorical_mlp_policy.py

示例10: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(self, _p, inputs, s, costs, h=None, ha=None):
        '''Constructs and compiles the necessary Theano functions.

        p : list of Theano shared variables
            Parameters of the model to be optimized.
        inputs : list of Theano variables
            Symbolic variables that are inputs to your graph (they should also
            include your model 'output'). Your training examples must fit these.
        s : Theano variable
            Symbolic variable with respect to which the Hessian of the objective is
            positive-definite, implicitly defining the Gauss-Newton matrix. Typically,
            it is the activation of the output layer.
        costs : list of Theano variables
            Monitoring costs, the first of which will be the optimized objective.
        h: Theano variable or None
            Structural damping is applied to this variable (typically the hidden units
            of an RNN).
        ha: Theano variable or None
            Symbolic variable that implicitly defines the Gauss-Newton matrix for the
            structural damping term (typically the activation of the hidden layer). If
            None, it will be set to `h`.'''

        self.p = _p
        self.shapes = [i.get_value().shape for i in _p]
        self.sizes = list(map(numpy.prod, self.shapes))
        self.positions = numpy.cumsum([0] + self.sizes)[:-1]

        g = T.grad(costs[0], _p)
        g = list(map(T.as_tensor_variable, g))  # for CudaNdarray
        self.f_gc = compile_function(inputs, g + costs)  # during gradient computation
        self.f_cost = compile_function(inputs, costs)  # for quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        v = [symbolic_types[len(i)]() for i in self.shapes]
        Gv = gauss_newton_product(costs[0], _p, v, s)

        coefficient = T.scalar()  # this is lambda*mu
        if h is not None:  # structural damping with cross-entropy
            h_constant = symbolic_types[h.ndim]()  # T.Rop does not support `consider_constant` yet, so use `givens`
            structural_damping = coefficient * (
                -h_constant * T.log(h + 1e-10) - (1 - h_constant) * T.log((1 - h) + 1e-10)).sum() / h.shape[0]
            if ha is None: ha = h
            Gv_damping = gauss_newton_product(structural_damping, _p, v, ha)
            Gv = [a + b for a, b in zip(Gv, Gv_damping)]
            givens = {h_constant: h}
        else:
            givens = {}

        self.function_Gv = compile_function(inputs + v + [coefficient], Gv, givens=givens)

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:52，代碼來源:hf.py

示例11: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            hidden_W_init=LI.HeUniform(),
            hidden_b_init=LI.Constant(0.),
            output_nonlinearity=NL.tanh,
            output_W_init=LI.Uniform(-3e-3, 3e-3),
            output_b_init=LI.Uniform(-3e-3, 3e-3),
            bn=False):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_W_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx
            )
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_W_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output"
        )

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = ext.compile_function([l_obs.input_var], action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:53，代碼來源:deterministic_mlp_policy.py

示例12: init

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

開發者ID:bstadie，項目名稱:third_person_im，代碼行數:51，代碼來源:categorical_conv_policy.py

示例13: init_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import compile_function [as 別名]
def init_opt(self):
        is_recurrent = int(self.policy.recurrent)

        obs_var = self.env.observation_space.new_tensor_variable(
            'obs',
            extra_dims=1 + is_recurrent,
        )
        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1 + is_recurrent,
        )
        advantage_var = ext.new_tensor(
            'advantage',
            ndim=1 + is_recurrent,
            dtype=theano.config.floatX
        )
        dist = self.policy.distribution
        old_dist_info_vars = {
            k: ext.new_tensor(
                'old_%s' % k,
                ndim=2 + is_recurrent,
                dtype=theano.config.floatX
            ) for k in dist.dist_info_keys
            }
        old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys]

        if is_recurrent:
            valid_var = TT.matrix('valid')
        else:
            valid_var = None

        dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
        logli = dist.log_likelihood_sym(action_var, dist_info_vars)
        kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)

        # formulate as a minimization problem
        # The gradient of the surrogate objective is the policy gradient
        if is_recurrent:
            surr_obj = - TT.sum(logli * advantage_var * valid_var) / TT.sum(valid_var)
            mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
            max_kl = TT.max(kl * valid_var)
        else:
            surr_obj = - TT.mean(logli * advantage_var)
            mean_kl = TT.mean(kl)
            max_kl = TT.max(kl)

        input_list = [obs_var, action_var, advantage_var]
        if is_recurrent:
            input_list.append(valid_var)

        self.optimizer.update_opt(surr_obj, target=self.policy, inputs=input_list)

        f_kl = ext.compile_function(
            inputs=input_list + old_dist_info_vars_list,
            outputs=[mean_kl, max_kl],
        )
        self.opt_info = dict(
            f_kl=f_kl,
        )

開發者ID:openai，項目名稱:vime，代碼行數:61，代碼來源:vpg_expl.py

注：本文中的rllab.misc.ext.compile_function方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。

示例1: dist_info

示例2: update_opt

示例3: update_opt

示例4: update_opt

示例5: update_opt

示例6: update_opt

示例7: __init__

示例8: __init__

示例9: __init__

示例10: __init__

示例11: __init__