当前位置: 首页>>代码示例>>Python>>正文


Python HRLutils.similarity方法代码示例

本文整理汇总了Python中hrlproject.misc.HRLutils.similarity方法的典型用法代码示例。如果您正苦于以下问题:Python HRLutils.similarity方法的具体用法?Python HRLutils.similarity怎么用?Python HRLutils.similarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在hrlproject.misc.HRLutils的用法示例。


在下文中一共展示了HRLutils.similarity方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tick

# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
    def tick(self):
        cond_active = False
        for c in self.conds:
            if isinstance(c, Timer):
                # if it is a timer entry, just update the timer and check if it
                # has expired
                c.tick()
                if c.ring():
                    self.reward = self.rewardval
                    self.activate()
                    c.reset()
                    cond_active = True

            elif (self.env.is_in(self.env.state, c) and
                  (self.conds[c] is None or
                   HRLutils.similarity(HRLutils.normalize(self.context),
                                       self.conds[c]) > 0.3)):
                # if it is a state entry, check if the agent is in the region
                # associated with that state, and check if that region is the
                # one corresponding to the currently selected context

                self.reward = self.rewardval

                self.rewardamount += 1
                if self.rewardamount > self.rewardresetamount:
                    self.activate()
                    self.rewardamount = 0

                cond_active = True

        # if no termination conditions met, just give default reward
        if not cond_active:
            self.reward = self.defaultreward

        # reset rewardamount when the reset signal is sent (so that there won't
        # be any leftover rewardamount from the agent's previous decision)
        if self.t > self.resettime[0] and self.t < self.resettime[1]:
            self.rewardamount = 0

        # add a penalty if the state hasn't changed (to help prevent agent from
        # getting stuck)
        if sum(self.prev_state) != 0 and \
                HRLutils.similarity(HRLutils.normalize(self.env.state),
                                    HRLutils.normalize(self.prev_state)) < 1.0:
            self.state_penalty = 0.0
        else:
            self.state_penalty += 0.0001
        self.prev_state = copy.deepcopy(self.env.state)

        self.reward = self.reward - self.state_penalty
开发者ID:drasmuss,项目名称:nhrlmodel,代码行数:52,代码来源:terminationnode.py

示例2: calc_optimal_move

# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at whether or not we have the package to pick a goal state

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in
                  range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                        int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in
                      range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                            int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                if ((self.is_in((x, y), "a") and not self.in_hand) or
                        (self.is_in((x, y), "b") and self.in_hand)):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1
                        if self.is_in((x[1][0] * self.dx + self.state[0],
                                       x[1][1] * self.dx + self.state[1]),
                                      "wall")
                        else HRLutils.similarity(x[1], pt))[0]

                    return
开发者ID:drasmuss,项目名称:nhrlmodel,代码行数:29,代码来源:deliveryenvironment.py

示例3: calc_optimal_move

# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at the current context to find the goal

        goal = [c for c in self.contexts
                if self.contexts[c] == self.context][0]

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] /
                                                  (2 * stepsize)) + 1,
                                              int(self.imgsize[1] /
                                                  (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] /
                                                      (2 * stepsize)) + 1,
                                                  int(self.imgsize[0] /
                                                      (2 * stepsize)) - 1)]:
                if self.is_in((x, y), goal):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1 if
                        self.is_in((x[1][0] * self.dx + self.state[0],
                                    x[1][1] * self.dx + self.state[1]),
                                   "wall")
                        else HRLutils.similarity(x[1], pt))[0]
                    return
开发者ID:drasmuss,项目名称:nhrlmodel,代码行数:32,代码来源:contextenvironment.py

示例4: calc_optimal_move

# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
    def calc_optimal_move(self):
        """Calculates the optimal move for the agent to make in the current state.

        Used for debugging mainly.
        """

        # grid search the image with the given stepsize
        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                                            int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                                                int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                # if the pt you're looking at is in the region you're looking for
                if self.is_in((x, y), "target"):
                    # generate a target point in the direction from current location to target
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))

                    # pick the action that is closest to the target point
                    # note: penalize actions that would involve moving through a wall
                    self.optimal_move = max(self.actions, key=lambda x:-1
                                            if self.is_in((x[1][0] * self.dx + self.state[0],
                                                           x[1][1] * self.dx + self.state[1]),
                                                          "wall")
                                            else HRLutils.similarity(x[1], pt))[0]
                    return
开发者ID:Seanny123,项目名称:HRL_1.0,代码行数:29,代码来源:placecell_bmp.py

示例5: tick

# 需要导入模块: from hrlproject.misc import HRLutils [as 别名]
# 或者: from hrlproject.misc.HRLutils import similarity [as 别名]
    def tick(self):
        # check if env is currently giving reward (we want to give pseudoreward at the same time)
        if self.env.reward != 0:
            if self.target_answer is None:
                self.reward = 0
            else:
                # check if the selected action matches the correct action
                self.reward = self.rewardval if HRLutils.similarity(self.target_answer, self.action) > 0.5 else -self.rewardval
        else:
            self.reward = 0

            # update the target_answer (the action the low level should be selecting given
            # the current context)
            if self.context[0] == "orientation":
                self.target_answer = self.env.state[:self.env.num_orientations]
            elif self.context[0] == "shape":
                self.target_answer = self.env.state[self.env.num_orientations:-self.env.num_colours]
            else:
                self.target_answer = None
开发者ID:Seanny123,项目名称:HRL_1.0,代码行数:21,代码来源:badre_pseudoreward.py


注:本文中的hrlproject.misc.HRLutils.similarity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。