本文整理汇总了Python中hrlproject.misc.HRLutils.similarity方法的典型用法代码示例。如果您正苦于以下问题:Python HRLutils.similarity方法的具体用法?Python HRLutils.similarity怎么用?Python HRLutils.similarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hrlproject.misc.HRLutils
的用法示例。
在下文中一共展示了HRLutils.similarity方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tick
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
def tick(self):
cond_active = False
for c in self.conds:
if isinstance(c, Timer):
# if it is a timer entry, just update the timer and check if it
# has expired
c.tick()
if c.ring():
self.reward = self.rewardval
self.activate()
c.reset()
cond_active = True
elif (self.env.is_in(self.env.state, c) and
(self.conds[c] is None or
HRLutils.similarity(HRLutils.normalize(self.context),
self.conds[c]) > 0.3)):
# if it is a state entry, check if the agent is in the region
# associated with that state, and check if that region is the
# one corresponding to the currently selected context
self.reward = self.rewardval
self.rewardamount += 1
if self.rewardamount > self.rewardresetamount:
self.activate()
self.rewardamount = 0
cond_active = True
# if no termination conditions met, just give default reward
if not cond_active:
self.reward = self.defaultreward
# reset rewardamount when the reset signal is sent (so that there won't
# be any leftover rewardamount from the agent's previous decision)
if self.t > self.resettime[0] and self.t < self.resettime[1]:
self.rewardamount = 0
# add a penalty if the state hasn't changed (to help prevent agent from
# getting stuck)
if sum(self.prev_state) != 0 and \
HRLutils.similarity(HRLutils.normalize(self.env.state),
HRLutils.normalize(self.prev_state)) < 1.0:
self.state_penalty = 0.0
else:
self.state_penalty += 0.0001
self.prev_state = copy.deepcopy(self.env.state)
self.reward = self.reward - self.state_penalty
示例2: calc_optimal_move
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
def calc_optimal_move(self):
"""Calculate the optimal move for the agent to take in the current
state/context."""
# basically the same as PlaceCellEnvironment.calc_optimal_move, except
# we look at whether or not we have the package to pick a goal state
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in
range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
int(self.imgsize[1] / (2 * stepsize)) - 1)]:
for x in [v * stepsize for v in
range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
int(self.imgsize[0] / (2 * stepsize)) - 1)]:
if ((self.is_in((x, y), "a") and not self.in_hand) or
(self.is_in((x, y), "b") and self.in_hand)):
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
self.optimal_move = max(
self.actions, key=lambda x:-1
if self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例3: calc_optimal_move
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
def calc_optimal_move(self):
"""Calculate the optimal move for the agent to take in the current
state/context."""
# basically the same as PlaceCellEnvironment.calc_optimal_move, except
# we look at the current context to find the goal
goal = [c for c in self.contexts
if self.contexts[c] == self.context][0]
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in range(int(-self.imgsize[1] /
(2 * stepsize)) + 1,
int(self.imgsize[1] /
(2 * stepsize)) - 1)]:
for x in [v * stepsize for v in range(int(-self.imgsize[0] /
(2 * stepsize)) + 1,
int(self.imgsize[0] /
(2 * stepsize)) - 1)]:
if self.is_in((x, y), goal):
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
self.optimal_move = max(
self.actions, key=lambda x:-1 if
self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例4: calc_optimal_move
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
def calc_optimal_move(self):
"""Calculates the optimal move for the agent to make in the current state.
Used for debugging mainly.
"""
# grid search the image with the given stepsize
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
int(self.imgsize[1] / (2 * stepsize)) - 1)]:
for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
int(self.imgsize[0] / (2 * stepsize)) - 1)]:
# if the pt you're looking at is in the region you're looking for
if self.is_in((x, y), "target"):
# generate a target point in the direction from current location to target
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
# pick the action that is closest to the target point
# note: penalize actions that would involve moving through a wall
self.optimal_move = max(self.actions, key=lambda x:-1
if self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例5: tick
# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
def tick(self):
# check if env is currently giving reward (we want to give pseudoreward at the same time)
if self.env.reward != 0:
if self.target_answer is None:
self.reward = 0
else:
# check if the selected action matches the correct action
self.reward = self.rewardval if HRLutils.similarity(self.target_answer, self.action) > 0.5 else -self.rewardval
else:
self.reward = 0
# update the target_answer (the action the low level should be selecting given
# the current context)
if self.context[0] == "orientation":
self.target_answer = self.env.state[:self.env.num_orientations]
elif self.context[0] == "shape":
self.target_answer = self.env.state[self.env.num_orientations:-self.env.num_colours]
else:
self.target_answer = None