本文整理汇总了Python中rllab.misc.ext.new_tensor方法的典型用法代码示例。如果您正苦于以下问题:Python ext.new_tensor方法的具体用法?Python ext.new_tensor怎么用?Python ext.new_tensor使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.misc.ext
的用法示例。
在下文中一共展示了ext.new_tensor方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
if self.n <= 2 ** 8:
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype='uint8'
)
elif self.n <= 2 ** 16:
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype='uint16'
)
else:
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype='uint32'
)
示例2: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
if self.n <= 2 ** 8:
return ext.new_tensor(
name=name,
ndim=extra_dims + 1,
dtype='uint8'
)
elif self.n <= 2 ** 16:
return ext.new_tensor(
name=name,
ndim=extra_dims + 1,
dtype='uint16'
)
else:
return ext.new_tensor(
name=name,
ndim=extra_dims + 1,
dtype='uint32'
)
示例3: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype=self._common_dtype,
)
示例4: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype=theano.config.floatX
)
示例5: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
import theano
return ext.new_tensor(
name=name,
ndim=extra_dims+1,
dtype=theano.config.floatX
)
示例6: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
return ext.new_tensor(
name=name,
ndim=extra_dims + 1,
dtype=self._common_dtype,
)
示例7: new_tensor_variable
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def new_tensor_variable(self, name, extra_dims):
return ext.new_tensor(
name=name,
ndim=extra_dims + 1,
dtype=theano.config.floatX
)
示例8: init_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def init_opt(self):
is_recurrent = int(self.policy.recurrent)
obs_var = self.env.observation_space.new_tensor_variable(
'obs',
extra_dims=1 + is_recurrent,
)
action_var = self.env.action_space.new_tensor_variable(
'action',
extra_dims=1 + is_recurrent,
)
advantage_var = ext.new_tensor(
'advantage',
ndim=1 + is_recurrent,
dtype=theano.config.floatX
)
dist = self.policy.distribution
old_dist_info_vars = {
k: ext.new_tensor(
'old_%s' % k,
ndim=2 + is_recurrent,
dtype=theano.config.floatX
) for k in dist.dist_info_keys
}
old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys]
if is_recurrent:
valid_var = TT.matrix('valid')
else:
valid_var = None
dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
logli = dist.log_likelihood_sym(action_var, dist_info_vars)
kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)
# formulate as a minimization problem
# The gradient of the surrogate objective is the policy gradient
if is_recurrent:
surr_obj = - TT.sum(logli * advantage_var * valid_var) / TT.sum(valid_var)
mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
max_kl = TT.max(kl * valid_var)
else:
surr_obj = - TT.mean(logli * advantage_var)
mean_kl = TT.mean(kl)
max_kl = TT.max(kl)
input_list = [obs_var, action_var, advantage_var]
if is_recurrent:
input_list.append(valid_var)
self.optimizer.update_opt(surr_obj, target=self.policy, inputs=input_list)
f_kl = ext.compile_function(
inputs=input_list + old_dist_info_vars_list,
outputs=[mean_kl, max_kl],
)
self.opt_info = dict(
f_kl=f_kl,
)
示例9: init_opt
# 需要导入模块: from rllab.misc import ext [as 别名]
# 或者: from rllab.misc.ext import new_tensor [as 别名]
def init_opt(self):
is_recurrent = int(self.policy.recurrent)
obs_var = self.env.observation_space.new_tensor_variable(
'obs',
extra_dims=1 + is_recurrent,
)
action_var = self.env.action_space.new_tensor_variable(
'action',
extra_dims=1 + is_recurrent,
)
advantage_var = ext.new_tensor(
'advantage',
ndim=1 + is_recurrent,
dtype=theano.config.floatX
)
dist = self.policy.distribution
old_dist_info_vars = {
k: ext.new_tensor(
'old_%s' % k,
ndim=2 + is_recurrent,
dtype=theano.config.floatX
) for k in dist.dist_info_keys
}
old_dist_info_vars_list = [old_dist_info_vars[k]
for k in dist.dist_info_keys]
if is_recurrent:
valid_var = TT.matrix('valid')
else:
valid_var = None
dist_info_vars = self.policy.dist_info_sym(obs_var, action_var)
kl = dist.kl_sym(old_dist_info_vars, dist_info_vars)
lr = dist.likelihood_ratio_sym(
action_var, old_dist_info_vars, dist_info_vars)
if is_recurrent:
mean_kl = TT.sum(kl * valid_var) / TT.sum(valid_var)
surr_loss = - \
TT.sum(lr * advantage_var * valid_var) / TT.sum(valid_var)
else:
mean_kl = TT.mean(kl)
surr_loss = - TT.mean(lr * advantage_var)
input_list = [
obs_var,
action_var,
advantage_var,
] + old_dist_info_vars_list
if is_recurrent:
input_list.append(valid_var)
self.optimizer.update_opt(
loss=surr_loss,
target=self.policy,
leq_constraint=(mean_kl, self.step_size),
inputs=input_list,
constraint_name="mean_kl"
)
return dict()