本文整理汇总了Python中rllab.misc.special.discount_cumsum方法的典型用法代码示例。如果您正苦于以下问题:Python special.discount_cumsum方法的具体用法?Python special.discount_cumsum怎么用?Python special.discount_cumsum使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rllab.misc.special
的用法示例。
在下文中一共展示了special.discount_cumsum方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _worker_rollout_policy
# 需要导入模块: from rllab.misc import special [as 别名]
# 或者: from rllab.misc.special import discount_cumsum [as 别名]
def _worker_rollout_policy(G, args):
sample_std = args["sample_std"].flatten()
cur_mean = args["cur_mean"].flatten()
K = len(cur_mean)
params = np.random.standard_normal(K) * sample_std + cur_mean
G.policy.set_param_values(params)
path = rollout(G.env, G.policy, args["max_path_length"])
path["returns"] = discount_cumsum(path["rewards"], args["discount"])
path["undiscounted_return"] = sum(path["rewards"])
if args["criterion"] == "samples":
inc = len(path["rewards"])
elif args["criterion"] == "paths":
inc = 1
else:
raise NotImplementedError
return (params, path), inc
示例2: sample_return
# 需要导入模块: from rllab.misc import special [as 别名]
# 或者: from rllab.misc.special import discount_cumsum [as 别名]
def sample_return(G, params, max_path_length, discount):
# env, policy, params, max_path_length, discount = args
# of course we make the strong assumption that there is no race condition
G.policy.set_param_values(params)
path = rollout(
G.env,
G.policy,
max_path_length,
)
path["returns"] = discount_cumsum(path["rewards"], discount)
path["undiscounted_return"] = sum(path["rewards"])
return path
示例3: _worker_rollout_policy
# 需要导入模块: from rllab.misc import special [as 别名]
# 或者: from rllab.misc.special import discount_cumsum [as 别名]
def _worker_rollout_policy(G, args):
sample_std = args["sample_std"].flatten()
cur_mean = args["cur_mean"].flatten()
n_evals = args["n_evals"]
K = len(cur_mean)
params = np.random.standard_normal(K) * sample_std + cur_mean
G.policy.set_param_values(params)
paths, returns, undiscounted_returns = [], [], []
for _ in range(n_evals):
path = rollout(G.env, G.policy, args["max_path_length"])
path["returns"] = discount_cumsum(path["rewards"], args["discount"])
path["undiscounted_return"] = sum(path["rewards"])
paths.append(path)
returns.append(path["returns"])
undiscounted_returns.append(path["undiscounted_return"])
result_path = {'full_paths':paths}
result_path['undiscounted_return'] = _get_stderr_lb(undiscounted_returns)
result_path['returns'] = _get_stderr_lb_varyinglens(returns)
# not letting n_evals count towards below cases since n_evals is multiple eval for single paramset
if args["criterion"] == "samples":
inc = len(path["rewards"])
elif args["criterion"] == "paths":
inc = 1
else:
raise NotImplementedError
return (params, result_path), inc
示例4: compute_and_apply_importance_weights
# 需要导入模块: from rllab.misc import special [as 别名]
# 或者: from rllab.misc.special import discount_cumsum [as 别名]
def compute_and_apply_importance_weights(self,path):
new_logli = self.algo.policy.distribution.log_likelihood(path["actions"],path["agent_infos"])
logli_diff = new_logli - path["log_likelihood"]
if self.algo.decision_weight_mode=='pd':
logli_diff = logli_diff[::-1]
log_decision_weighted_IS_coeffs = special.discount_cumsum(logli_diff,1)
IS_coeff = np.exp(log_decision_weighted_IS_coeffs[::-1])
elif self.algo.decision_weight_mode=='pt':
IS_coeff = np.exp(np.sum(logli_diff))
if self.algo.clip_IS_coeff_above:
IS_coeff = np.minimum(IS_coeff,self.algo.IS_coeff_upper_bound)
if self.algo.clip_IS_coeff_below:
IS_coeff = np.maximum(IS_coeff,self.algo.IS_coeff_lower_bound)
path["IS_coeff"] = IS_coeff