本文整理汇总了Python中hrlproject.misc.HRLutils.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python HRLutils.normalize方法的具体用法?Python HRLutils.normalize怎么用?Python HRLutils.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hrlproject.misc.HRLutils
的用法示例。
在下文中一共展示了HRLutils.normalize方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gen_encoders
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import normalize [as 别名]
def gen_encoders(self, N, contextD, context_scale):
"""Generate encoders for state population of learning agent.
:param N: number of neurons in state population
:param contextD: dimension of context vector representation
:param context_scale: weight on context representation relative to
state (1.0 = equal weighting)
"""
if contextD > 0:
contexts = MU.I(contextD)
else:
contexts = [[]]
# neurons each sensitive to different combinations of stimuli
encs = (list(MU.I(self.stateD)) +
[o + s + c
for o in MU.I(self.num_orientations)
for s in MU.I(self.num_shapes)
for c in MU.I(self.num_colours)])
return [HRLutils.normalize(
HRLutils.normalize(random.choice(encs)) +
[x * context_scale for x in random.choice(contexts)])
for _ in range(N)]
示例2: tick
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import normalize [as 别名]
def tick(self):
cond_active = False
for c in self.conds:
if isinstance(c, Timer):
# if it is a timer entry, just update the timer and check if it
# has expired
c.tick()
if c.ring():
self.reward = self.rewardval
self.activate()
c.reset()
cond_active = True
elif (self.env.is_in(self.env.state, c) and
(self.conds[c] is None or
HRLutils.similarity(HRLutils.normalize(self.context),
self.conds[c]) > 0.3)):
# if it is a state entry, check if the agent is in the region
# associated with that state, and check if that region is the
# one corresponding to the currently selected context
self.reward = self.rewardval
self.rewardamount += 1
if self.rewardamount > self.rewardresetamount:
self.activate()
self.rewardamount = 0
cond_active = True
# if no termination conditions met, just give default reward
if not cond_active:
self.reward = self.defaultreward
# reset rewardamount when the reset signal is sent (so that there won't
# be any leftover rewardamount from the agent's previous decision)
if self.t > self.resettime[0] and self.t < self.resettime[1]:
self.rewardamount = 0
# add a penalty if the state hasn't changed (to help prevent agent from
# getting stuck)
if sum(self.prev_state) != 0 and \
HRLutils.similarity(HRLutils.normalize(self.env.state),
HRLutils.normalize(self.prev_state)) < 1.0:
self.state_penalty = 0.0
else:
self.state_penalty += 0.0001
self.prev_state = copy.deepcopy(self.env.state)
self.reward = self.reward - self.state_penalty
示例3: termination_context
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import normalize [as 别名]
def termination_context(self, c, pstc=0.01):
self.context = max(self.contexts, key=lambda x: MU.prod(HRLutils.normalize(c), HRLutils.normalize(x[1])))
示例4: termination_action
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import normalize [as 别名]
def termination_action(self, a, pstc=0.01, dimensions=3):
self.action = HRLutils.normalize(a)