當前位置: 首頁>>代碼示例>>Python>>正文


Python ext.new_tensor方法代碼示例

本文整理匯總了Python中rllab.misc.ext.new_tensor方法的典型用法代碼示例。如果您正苦於以下問題:Python ext.new_tensor方法的具體用法?Python ext.new_tensor怎麽用?Python ext.new_tensor使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在rllab.misc.ext的用法示例。


在下文中一共展示了ext.new_tensor方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        if self.n <= 2 ** 8:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims+1,
                dtype='uint8'
            )
        elif self.n <= 2 ** 16:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims+1,
                dtype='uint16'
            )
        else:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims+1,
                dtype='uint32'
            ) 
開發者ID:bstadie,項目名稱:third_person_im,代碼行數:21,代碼來源:discrete.py

示例2: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        if self.n <= 2 ** 8:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims + 1,
                dtype='uint8'
            )
        elif self.n <= 2 ** 16:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims + 1,
                dtype='uint16'
            )
        else:
            return ext.new_tensor(
                name=name,
                ndim=extra_dims + 1,
                dtype='uint32'
            ) 
開發者ID:sisl,項目名稱:gail-driver,代碼行數:21,代碼來源:discrete.py

示例3: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        return ext.new_tensor(
            name=name,
            ndim=extra_dims+1,
            dtype=self._common_dtype,
        ) 
開發者ID:bstadie,項目名稱:third_person_im,代碼行數:8,代碼來源:product.py

示例4: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        return ext.new_tensor(
            name=name,
            ndim=extra_dims+1,
            dtype=theano.config.floatX
        ) 
開發者ID:bstadie,項目名稱:third_person_im,代碼行數:8,代碼來源:box.py

示例5: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        import theano
        return ext.new_tensor(
            name=name,
            ndim=extra_dims+1,
            dtype=theano.config.floatX
        ) 
開發者ID:shaneshixiang,項目名稱:rllabplusplus,代碼行數:9,代碼來源:box.py

示例6: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        return ext.new_tensor(
            name=name,
            ndim=extra_dims + 1,
            dtype=self._common_dtype,
        ) 
開發者ID:sisl,項目名稱:gail-driver,代碼行數:8,代碼來源:product.py

示例7: new_tensor_variable

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def new_tensor_variable(self, name, extra_dims):
        return ext.new_tensor(
            name=name,
            ndim=extra_dims + 1,
            dtype=theano.config.floatX
        ) 
開發者ID:sisl,項目名稱:gail-driver,代碼行數:8,代碼來源:box.py

示例8: init_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def init_opt(self):
        is_recurrent = int(self.policy.recurrent)

        obs_var = self.env.observation_space.new_tensor_variable(
            'obs',
            extra_dims=1 + is_recurrent,
        )
        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1 + is_recurrent,
        )
        advantage_var = ext.new_tensor(
            'advantage',
            ndim=1 + is_recurrent,
            dtype=theano.config.floatX
        )
        dist = self.policy.distribution
        old_dist_info_vars = {
            k: ext.new_tensor(
                'old_%s' % k,
                ndim=2 + is_recurrent,
                dtype=theano.config.floatX
            ) for k in dist.dist_info_keys
            }
        old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys]

        if is_recurrent:
            valid_var = TT.matrix('valid')
        else:
            valid_var = None

        dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
        logli = dist.log_likelihood_sym(action_var, dist_info_vars)
        kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)

        # formulate as a minimization problem
        # The gradient of the surrogate objective is the policy gradient
        if is_recurrent:
            surr_obj = - TT.sum(logli * advantage_var * valid_var) / TT.sum(valid_var)
            mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
            max_kl = TT.max(kl * valid_var)
        else:
            surr_obj = - TT.mean(logli * advantage_var)
            mean_kl = TT.mean(kl)
            max_kl = TT.max(kl)

        input_list = [obs_var, action_var, advantage_var]
        if is_recurrent:
            input_list.append(valid_var)

        self.optimizer.update_opt(surr_obj, target=self.policy, inputs=input_list)

        f_kl = ext.compile_function(
            inputs=input_list + old_dist_info_vars_list,
            outputs=[mean_kl, max_kl],
        )
        self.opt_info = dict(
            f_kl=f_kl,
        ) 
開發者ID:openai,項目名稱:vime,代碼行數:61,代碼來源:vpg_expl.py

示例9: init_opt

# 需要導入模塊: from rllab.misc import ext [as 別名]
# 或者: from rllab.misc.ext import new_tensor [as 別名]
def init_opt(self):
        is_recurrent = int(self.policy.recurrent)
        obs_var = self.env.observation_space.new_tensor_variable(
            'obs',
            extra_dims=1 + is_recurrent,
        )
        action_var = self.env.action_space.new_tensor_variable(
            'action',
            extra_dims=1 + is_recurrent,
        )
        advantage_var = ext.new_tensor(
            'advantage',
            ndim=1 + is_recurrent,
            dtype=theano.config.floatX
        )
        dist = self.policy.distribution
        old_dist_info_vars = {
            k: ext.new_tensor(
                'old_%s' % k,
                ndim=2 + is_recurrent,
                dtype=theano.config.floatX
            ) for k in dist.dist_info_keys
        }
        old_dist_info_vars_list = [old_dist_info_vars[k]
                                   for k in dist.dist_info_keys]

        if is_recurrent:
            valid_var = TT.matrix('valid')
        else:
            valid_var = None

        dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
        kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)
        lr = dist.likelihood_ratio_sym(
            action_var, old_dist_info_vars, dist_info_vars)
        if is_recurrent:
            mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
            surr_loss = - \
                TT.sum(lr * advantage_var * valid_var) / TT.sum(valid_var)
        else:
            mean_kl = TT.mean(kl)
            surr_loss = - TT.mean(lr * advantage_var)

        input_list = [
            obs_var,
            action_var,
            advantage_var,
        ] + old_dist_info_vars_list
        if is_recurrent:
            input_list.append(valid_var)

        self.optimizer.update_opt(
            loss=surr_loss,
            target=self.policy,
            leq_constraint=(mean_kl, self.step_size),
            inputs=input_list,
            constraint_name="mean_kl"
        )
        return dict() 
開發者ID:openai,項目名稱:vime,代碼行數:61,代碼來源:npo_expl.py


注:本文中的rllab.misc.ext.new_tensor方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。