當前位置: 首頁>>代碼示例>>Python>>正文


Python misc.HRLutils類代碼示例

本文整理匯總了Python中hrlproject.misc.HRLutils的典型用法代碼示例。如果您正苦於以下問題:Python HRLutils類的具體用法?Python HRLutils怎麽用?Python HRLutils使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了HRLutils類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: combine_files

def combine_files():
    path = os.path.join("..", "..", "data", "delivery", "flat", "dataoutput_2")

    data = []
    for i in range(10):
        try:
            data += [HRLutils.load_data(path + ".%s.txt" % i)]
        except IOError:
            continue

    print "found %s files to combine" % len(data)
    print len(data[0]), "records"

    starttime = 0.0
    newdata = [[] for _ in data[0]]
    for d in data:
        if len(d) != len(newdata):
            print "uh oh, number of records is wrong"
            print len(d), len(newdata)
        for i, record in enumerate(d):
            for entry in record:
                newdata[i] += [[entry[0] + starttime, entry[1]]]
        starttime = newdata[0][-1][0]

    HRLutils.save_data(path + "_combined.txt", newdata)
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:25,代碼來源:datatools.py

示例2: gen_encoders

    def gen_encoders(self, N, contextD, context_scale):
        """Generate encoders for state population of learning agent.

        :param N: number of neurons in state population
        :param contextD: dimension of context vector representation
        :param context_scale: weight on context representation relative to
            state (1.0 = equal weighting)
        """

        if contextD > 0:
            contexts = MU.I(contextD)
        else:
            contexts = [[]]

        # neurons each sensitive to different combinations of stimuli
        encs = (list(MU.I(self.stateD)) +
                [o + s + c
                 for o in MU.I(self.num_orientations)
                 for s in MU.I(self.num_shapes)
                 for c in MU.I(self.num_colours)])

        return [HRLutils.normalize(
            HRLutils.normalize(random.choice(encs)) +
            [x * context_scale for x in random.choice(contexts)])
            for _ in range(N)]
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:25,代碼來源:badreenvironment.py

示例3: tick

    def tick(self):
        cond_active = False
        for c in self.conds:
            if isinstance(c, Timer):
                # if it is a timer entry, just update the timer and check if it
                # has expired
                c.tick()
                if c.ring():
                    self.reward = self.rewardval
                    self.activate()
                    c.reset()
                    cond_active = True

            elif (self.env.is_in(self.env.state, c) and
                  (self.conds[c] is None or
                   HRLutils.similarity(HRLutils.normalize(self.context),
                                       self.conds[c]) > 0.3)):
                # if it is a state entry, check if the agent is in the region
                # associated with that state, and check if that region is the
                # one corresponding to the currently selected context

                self.reward = self.rewardval

                self.rewardamount += 1
                if self.rewardamount > self.rewardresetamount:
                    self.activate()
                    self.rewardamount = 0

                cond_active = True

        # if no termination conditions met, just give default reward
        if not cond_active:
            self.reward = self.defaultreward

        # reset rewardamount when the reset signal is sent (so that there won't
        # be any leftover rewardamount from the agent's previous decision)
        if self.t > self.resettime[0] and self.t < self.resettime[1]:
            self.rewardamount = 0

        # add a penalty if the state hasn't changed (to help prevent agent from
        # getting stuck)
        if sum(self.prev_state) != 0 and \
                HRLutils.similarity(HRLutils.normalize(self.env.state),
                                    HRLutils.normalize(self.prev_state)) < 1.0:
            self.state_penalty = 0.0
        else:
            self.state_penalty += 0.0001
        self.prev_state = copy.deepcopy(self.env.state)

        self.reward = self.reward - self.state_penalty
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:50,代碼來源:terminationnode.py

示例4: __init__

    def __init__(self, N, d, name="PositiveBias"):
        """Builds the PositiveBias network.

        :param N: base number of neurons
        :param d: dimension of input signal
        :param name: name for network
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        tauPSC = 0.007
        biaslevel = 0.03  # the value to be output for negative inputs

        # threshold the input signal to detect positive values
        nfac = HRLutils.node_fac()
        nfac.setIntercept(IndicatorPDF(0, 0.1))
        neg_thresh = net.make_array("neg_thresh", N, d, encoders=[[1]],
                                    node_factory=nfac)
        neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False)

        # create a population that tries to output biaslevel across
        # all dimensions
        bias_input = net.make_input("bias_input", [biaslevel])
        bias_pop = net.make_array("bias_pop", N, d,
                                  node_factory=HRLutils.node_fac(),
                                  eval_points=[[x * 0.01] for x in
                                               range(0, biaslevel * 200)])

        net.connect(bias_input, bias_pop, pstc=tauPSC)

        # the individual dimensions of bias_pop are then inhibited by the
        # output of neg_thresh (so any positive values don't get the bias)
        net.connect(neg_thresh, bias_pop, pstc=tauPSC,
                    func=lambda x: [1.0] if x[0] > 0 else [0.0],
                    transform=[[-10 if i == k else 0 for k in range(d)]
                               for i in range(d) for _ in
                               range(bias_pop.getNeurons() / d)])

        # the whole population is inhibited by the learn signal, so that it
        # outputs 0 if the system isn't supposed to be learning
        bias_pop.addTermination("learn", [[-10] for _ in
                                          range(bias_pop.getNeurons())],
                                tauPSC, False)

        self.exposeTermination(neg_thresh.getTermination("input"), "input")
        self.exposeTermination(bias_pop.getTermination("learn"), "learn")
        self.exposeOrigin(bias_pop.getOrigin("X"), "X")
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:48,代碼來源:positivebias.py

示例5: calc_optimal_move

    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at the current context to find the goal

        goal = [c for c in self.contexts
                if self.contexts[c] == self.context][0]

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] /
                                                  (2 * stepsize)) + 1,
                                              int(self.imgsize[1] /
                                                  (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] /
                                                      (2 * stepsize)) + 1,
                                                  int(self.imgsize[0] /
                                                      (2 * stepsize)) - 1)]:
                if self.is_in((x, y), goal):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1 if
                        self.is_in((x[1][0] * self.dx + self.state[0],
                                    x[1][1] * self.dx + self.state[1]),
                                   "wall")
                        else HRLutils.similarity(x[1], pt))[0]
                    return
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:30,代碼來源:contextenvironment.py

示例6: calc_optimal_move

    def calc_optimal_move(self):
        """Calculate the optimal move for the agent to take in the current
        state/context."""

        # basically the same as PlaceCellEnvironment.calc_optimal_move, except
        # we look at whether or not we have the package to pick a goal state

        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in
                  range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                        int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in
                      range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                            int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                if ((self.is_in((x, y), "a") and not self.in_hand) or
                        (self.is_in((x, y), "b") and self.in_hand)):
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))
                    self.optimal_move = max(
                        self.actions, key=lambda x:-1
                        if self.is_in((x[1][0] * self.dx + self.state[0],
                                       x[1][1] * self.dx + self.state[1]),
                                      "wall")
                        else HRLutils.similarity(x[1], pt))[0]

                    return
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:27,代碼來源:deliveryenvironment.py

示例7: calc_optimal_move

    def calc_optimal_move(self):
        """Calculates the optimal move for the agent to make in the current state.

        Used for debugging mainly.
        """

        # grid search the image with the given stepsize
        stepsize = 0.1
        self.optimal_move = None
        for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
                                            int(self.imgsize[1] / (2 * stepsize)) - 1)]:
            for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
                                                int(self.imgsize[0] / (2 * stepsize)) - 1)]:
                # if the pt you're looking at is in the region you're looking for
                if self.is_in((x, y), "target"):
                    # generate a target point in the direction from current location to target
                    angle = math.atan2(y - self.state[1], x - self.state[0])
                    pt = (math.cos(angle), math.sin(angle))

                    # pick the action that is closest to the target point
                    # note: penalize actions that would involve moving through a wall
                    self.optimal_move = max(self.actions, key=lambda x:-1
                                            if self.is_in((x[1][0] * self.dx + self.state[0],
                                                           x[1][1] * self.dx + self.state[1]),
                                                          "wall")
                                            else HRLutils.similarity(x[1], pt))[0]
                    return
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:27,代碼來源:placecell_bmp.py

示例8: test_terminationnode

def test_terminationnode():
    net = nef.Network("testTerminationNode")

    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]
    env = deliveryenvironment.DeliveryEnvironment(
        actions,
        HRLutils.datafile("contextmap.bmp"),
        colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
        imgsize=(5, 5),
        dx=0.001,
        placedev=0.5,
    )
    net.add(env)

    term_node = terminationnode.TerminationNode(
        {"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None}, env, contextD=2, rewardval=1
    )
    net.add(term_node)

    print term_node.conds

    context_input = net.make_input("contextinput", {0.0: [0, 0.1], 0.5: [1, 0], 1.0: [0, 1]})
    net.connect(context_input, term_node.getTermination("context"))

    net.add_to_nengo()
    net.view()
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:26,代碼來源:test.py

示例9: saveParams

    def saveParams(self, prefix):
        #save connection weights
        if self.neuron_learning:
            self.getNode("actionvals").saveWeights(prefix)
            self.getNode("old_actionvals").saveWeights(prefix)
        else:
            dec = self.getNode("state_pop").getOrigin("vals").getDecoders()
            with open(HRLutils.datafile(prefix + "_state_decoders.txt"), "w") as f:
                f.write("\n".join([" ".join([str(x) for x in d]) for d in dec]))

            dec = self.getNode("old_state_pop").getOrigin("vals").getDecoders()
            with open(HRLutils.datafile(prefix + "_old_state_decoders.txt"), "w") as f:
                f.write("\n".join([" ".join([str(x) for x in d]) for d in dec]))

        #save state encoders
        enc = self.getNode("state_pop").getEncoders()
        with open(HRLutils.datafile(prefix + "_state_encoders.txt"), "w") as f:
            f.write("\n".join([" ".join([str(x) for x in e]) for e in enc]))
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:18,代碼來源:Qnetwork.py

示例10: loadParams

    def loadParams(self, prefix):
        print "loading params: %s" % prefix

        #load connection weights
        if self.neuron_learning:
            self.getNode("actionvals").loadWeights(prefix)
            self.getNode("old_actionvals").loadWeights(prefix)
        else:
            with open(HRLutils.datafile(prefix + "_state_decoders.txt")) as f:
                self.getNode("state_pop").getOrigin("vals").setDecoders(
                            [[float(x) for x in d.split(" ")] for d in f.readlines()])

            with open(HRLutils.datafile(prefix + "_old_state_decoders.txt")) as f:
                self.getNode("old_state_pop").getOrigin("vals").setDecoders(
                            [[float(x) for x in d.split(" ")] for d in f.readlines()])

        #load state encoders
        with open(HRLutils.datafile(prefix + "_state_encoders.txt")) as f:
            enc = [[float(x) for x in e.split(" ")] for e in f.readlines()]
        self.getNode("state_pop").setEncoders(enc)
        self.getNode("old_state_pop").setEncoders(enc) #note we assume that state_pop and old_state_pop use the same encoders
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:21,代碼來源:Qnetwork.py

示例11: test_bmp

def test_bmp():
    from javax.imageio import ImageIO
    from java.io import File

    img = ImageIO.read(File(HRLutils.datafile("contextmap.bmp")))

    colours = [int(val) for val in img.getRGB(0, 0, img.getWidth(), img.getHeight(), None, 0, img.getWidth())]
    unique_colours = []
    for c in colours:
        if c not in unique_colours:
            unique_colours += [c]

    print unique_colours
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:13,代碼來源:test.py

示例12: __init__

    def __init__(self, actions, mapname, colormap, name="PlaceCellEnvironment",
                 imgsize=(1.0, 1.0), dx=0.01, placedev=0.1, num_places=None):
        """Initialize environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: name of file describing environment map
        :param colormap: dict mapping pixel colours to labels
        :param name: name for environment
        :param imgsize: width of space represented by the map image
        :param dx: distance agent moves each timestep
        :param placedev: standard deviation of gaussian place cell activations
        :param num_places: number of placecells to use (if None it will attempt
            to fill the space)
        """

        EnvironmentTemplate.__init__(self, name, 2, actions)

        # parameters
        self.colormap = colormap
        self.rewardamount = 0  # number of timesteps spent in reward

        # number of timesteps to spend in reward before agent is reset
        # note: convenient to express this as time_in_reward / dt
        self.rewardresetamount = 0.6 / 0.001

        self.num_actions = len(actions)
        self.imgsize = [float(x) for x in imgsize]
        self.dx = dx
        self.placedev = placedev
        self.num_places = num_places
        self.optimal_move = None
        self.defaultreward = -0.075

        # load environment
        self.map = ImageIO.read(File(HRLutils.datafile(mapname)))

        # generate place cells
        self.gen_placecells(min_spread=1.0 * placedev)

        # initial conditions
        self.state = self.random_location(avoid=["wall", "target"])
        self.place_activations = [0 for _ in self.placecells]

        self.create_origin("place", lambda: self.place_activations)

        # note: making the value small, so that the noise node will give us
        # some random exploration as well
        self.create_origin("optimal_move",
                           lambda: [0.1 if self.optimal_move == a[0] else 0.0
                                    for a in self.actions])
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:51,代碼來源:placecell_bmp.py

示例13: saveWeights

    def saveWeights(self, prefix):
        """Save the connection weights to file."""

        prefix = prefix + "_" + self.name
        for n in self.getNodes():
            if n.getName().startswith("action"):
                term = n.getTermination("learning")
                weights = [t.getWeights() for t in term.getNodeTerminations()]

                f = open(HRLutils.datafile(prefix + "_" + n.getName() + ".txt"), "w")
                f.write(str(HRLutils.SEED) + "\n")
                for row in weights:
                    f.write(" ".join([str(x) for x in row]) + "\n")
                f.close()
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:14,代碼來源:actionvalues.py

示例14: run_gridworld

def run_gridworld(args, seed=None):

    if seed is not None:
        HRLutils.set_seed(seed)
    seed = HRLutils.SEED

    net = nef.Network("run_gridworld")

    stateN = 400
    stateD = 2
    actions = [("up", [0, 1]), ("right", [1, 0]),
               ("down", [0, -1]), ("left", [-1, 0])]

    agent = smdpagent.SMDPAgent(stateN, stateD, actions, stateradius=3,
                                **args)
    net.add(agent)

    env = gridworldenvironment.GridWorldEnvironment(
        stateD, actions, HRLutils.datafile("smallgrid.txt"), cartesian=True,
        delay=(0.6, 0.9), datacollection=False)
    net.add(env)

    net.connect(env.getOrigin("state"), agent.getTermination("state_input"))
    net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
    net.connect(env.getOrigin("reset"), agent.getTermination("reset"))
    net.connect(env.getOrigin("learn"), agent.getTermination("learn"))
    net.connect(env.getOrigin("reset"), agent.getTermination("save_state"))
    net.connect(env.getOrigin("reset"), agent.getTermination("save_action"))

    net.connect(agent.getOrigin("action_output"), env.getTermination("action"))
    net.connect(agent.getOrigin("Qs"), env.getTermination("Qs"))

    net.add_to_nengo()
    view = timeview.View(net.network, update_frequency=5)
    view.add_watch(gridworldwatch.GridWorldWatch())
    view.restore()
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:36,代碼來源:run.py

示例15: test_actionvalues

def test_actionvalues():
    net = nef.Network("testActionValues")

    stateN = 200
    N = 100
    stateD = 2
    stateradius = 1.0
    statelength = math.sqrt(2 * stateradius ** 2)
    init_Qs = 0.5
    learningrate = 0.0
    Qradius = 1
    tauPSC = 0.007
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]

    # state
    state_pop = net.make(
        "state_pop",
        stateN,
        stateD,
        radius=statelength,
        node_factory=HRLutils.node_fac(),
        eval_points=[
            [x / statelength, y / statelength]
            for x in range(-int(stateradius), int(stateradius))
            for y in range(-int(stateradius), int(stateradius))
        ],
    )
    state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
    state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False)

    # set up action nodes
    decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()

    actionvals = actionvalues.ActionValues(
        "testActionValues", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders
    )
    net.add(actionvals)

    net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))

    # input
    inp = net.make_input("input", [0, 0])
    net.connect(inp, state_pop.getTermination("state_input"))

    net.add_to_nengo()
    net.view()
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:46,代碼來源:test.py


注:本文中的hrlproject.misc.HRLutils類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。