當前位置: 首頁>>代碼示例>>Python>>正文


Python HRLutils.node_fac方法代碼示例

本文整理匯總了Python中hrlproject.misc.HRLutils.node_fac方法的典型用法代碼示例。如果您正苦於以下問題:Python HRLutils.node_fac方法的具體用法?Python HRLutils.node_fac怎麽用?Python HRLutils.node_fac使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在hrlproject.misc.HRLutils的用法示例。


在下文中一共展示了HRLutils.node_fac方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, N, d, name="PositiveBias"):
        """Builds the PositiveBias network.

        :param N: base number of neurons
        :param d: dimension of input signal
        :param name: name for network
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        tauPSC = 0.007
        biaslevel = 0.03  # the value to be output for negative inputs

        # threshold the input signal to detect positive values
        nfac = HRLutils.node_fac()
        nfac.setIntercept(IndicatorPDF(0, 0.1))
        neg_thresh = net.make_array("neg_thresh", N, d, encoders=[[1]],
                                    node_factory=nfac)
        neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False)

        # create a population that tries to output biaslevel across
        # all dimensions
        bias_input = net.make_input("bias_input", [biaslevel])
        bias_pop = net.make_array("bias_pop", N, d,
                                  node_factory=HRLutils.node_fac(),
                                  eval_points=[[x * 0.01] for x in
                                               range(0, biaslevel * 200)])

        net.connect(bias_input, bias_pop, pstc=tauPSC)

        # the individual dimensions of bias_pop are then inhibited by the
        # output of neg_thresh (so any positive values don't get the bias)
        net.connect(neg_thresh, bias_pop, pstc=tauPSC,
                    func=lambda x: [1.0] if x[0] > 0 else [0.0],
                    transform=[[-10 if i == k else 0 for k in range(d)]
                               for i in range(d) for _ in
                               range(bias_pop.getNeurons() / d)])

        # the whole population is inhibited by the learn signal, so that it
        # outputs 0 if the system isn't supposed to be learning
        bias_pop.addTermination("learn", [[-10] for _ in
                                          range(bias_pop.getNeurons())],
                                tauPSC, False)

        self.exposeTermination(neg_thresh.getTermination("input"), "input")
        self.exposeTermination(bias_pop.getTermination("learn"), "learn")
        self.exposeOrigin(bias_pop.getOrigin("X"), "X")
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:50,代碼來源:positivebias.py

示例2: test_actionvalues

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def test_actionvalues():
    net = nef.Network("testActionValues")

    stateN = 200
    N = 100
    stateD = 2
    stateradius = 1.0
    statelength = math.sqrt(2 * stateradius ** 2)
    init_Qs = 0.5
    learningrate = 0.0
    Qradius = 1
    tauPSC = 0.007
    actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]

    # state
    state_pop = net.make(
        "state_pop",
        stateN,
        stateD,
        radius=statelength,
        node_factory=HRLutils.node_fac(),
        eval_points=[
            [x / statelength, y / statelength]
            for x in range(-int(stateradius), int(stateradius))
            for y in range(-int(stateradius), int(stateradius))
        ],
    )
    state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
    state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False)

    # set up action nodes
    decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()

    actionvals = actionvalues.ActionValues(
        "testActionValues", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders
    )
    net.add(actionvals)

    net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))

    # input
    inp = net.make_input("input", [0, 0])
    net.connect(inp, state_pop.getTermination("state_input"))

    net.add_to_nengo()
    net.view()
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:48,代碼來源:test.py

示例3: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, num_actions, Qradius=1.0, rewardradius=1.0, discount=0.3):
        """Builds the ErrorNetwork.

        :param num_actions: the number of actions available to the system
        :param Qradius: expected radius of Q values
        :param rewardradius: expected radius of reward signal
        :param discount: discount factor
        """

        self.name = "ErrorNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        N = 50
        tauPSC = 0.007
        errorcap = 0.1 #soft cap on error magnitude (large errors seem to cause problems 
            #with overly-generalizing the learning)

        #set up relays
        vals_relay = net.make("vals_relay", 1, num_actions, mode="direct")
        vals_relay.fixMode()
        vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)

        old_vals_relay = net.make("old_vals_relay", 1, num_actions, mode="direct")
        old_vals_relay.fixMode()
        old_vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)

        curr_bg_relay = net.make("curr_bg_relay", 1, num_actions, mode="direct")
        curr_bg_relay.fixMode()
        curr_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)

        saved_bg_relay = net.make("saved_bg_relay", 1, num_actions, mode="direct")
        saved_bg_relay.fixMode()
        saved_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)


        #select out only the currently chosen Q value
        gatedQ = net.make_array("gatedQ", N, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius)
        gatedQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(vals_relay, gatedQ, pstc=tauPSC)

        net.connect(curr_bg_relay, gatedQ,
                    transform=[[-3 if i != k else 0 for k in range(num_actions)]
                               for i in range(num_actions) for _ in range(gatedQ.getNeurons() / num_actions)],
                    pstc=tauPSC)

        currQ = net.make("currQ", 1, 1, mode="direct")
        currQ.fixMode()
        net.connect(gatedQ, currQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001)

        #select out only the previously chosen Q value
        gatedstoreQ = net.make_array("gatedstoreQ", N, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius)
        gatedstoreQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(old_vals_relay, gatedstoreQ, pstc=tauPSC)

        net.connect(saved_bg_relay, gatedstoreQ,
                    transform=[[-3 if i != k else 0 for k in range(num_actions)]
                               for i in range(num_actions) for _ in range(gatedstoreQ.getNeurons() / num_actions)],
                    pstc=tauPSC)

        storeQ = net.make("storeQ", 1, 1, mode="direct")
        storeQ.fixMode()
        net.connect(gatedstoreQ, storeQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001)

        #create error calculation network
        error = errorcalc2.ErrorCalc2(discount, rewardradius=rewardradius, Qradius=Qradius)
        net.add(error)

        net.connect(currQ, error.getTermination("currQ"))
        net.connect(storeQ, error.getTermination("storeQ"))

        #gate error by learning signal and saved BG output (we only want error when the
        #system is supposed to be learning, and we only want error related to the action
        #that was selected)
        gatederror = net.make_array("gatederror", N * 2, num_actions, radius=errorcap, node_factory=HRLutils.node_fac())
        gatederror.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(error, gatederror, transform=[[1.0 / Qradius] for _ in range(num_actions)], pstc=tauPSC)
            #scale the error by Qradius, so that we don't get super huge errors (screws up the gating)

        learninggate = net.make("learninggate", N, 1, node_factory=HRLutils.node_fac())
        learninggate.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        learninggate.addTermination("gate", [[-10] for _ in range(N)], tauPSC, False)

        net.connect(learninggate, gatederror, func=lambda x: [1.0],
                    transform=[[-12] for _ in range(gatederror.getNeurons())], pstc=tauPSC)

        net.connect(saved_bg_relay, gatederror,
                    transform=[[-12 if i != k else 0 for k in range(num_actions)]
                               for i in range(num_actions) for _ in range(gatederror.getNeurons() / num_actions)],
                    pstc=tauPSC)

        #add a positive bias to the error anywhere the Q values are negative (to stop
        #Q values from getting too negative, which screws up the action selection)
        posbias = positivebias.PositiveBias(N, num_actions)
        net.add(posbias)
        net.connect(old_vals_relay, posbias.getTermination("input"))
        net.connect(learninggate, posbias.getTermination("learn"), func=lambda x: [1.0])

#.........這裏部分代碼省略.........
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:103,代碼來源:errornetwork.py

示例4: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, discount, rewardradius=1.0, Qradius=1.0):
        """Builds the ErrorCalc2 network.

        :param discount: discount factor, controls rate of integration
        :param rewardradius: expected radius of reward value
        :param Qradius: expected radius of Q values
        """

        self.name = "ErrorCalc"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        tauPSC = 0.007
        intPSC = 0.1
        N = 50

        # relay for current Q input
        currQ = net.make("currQ", 1, 1, node_factory=HRLutils.node_fac(), mode="direct",
                         radius=Qradius)
        currQ.fixMode()
        currQ.addDecodedTermination("input", [[1]], 0.001, False)

        # input population for resetting the network
        reset_nodefac = HRLutils.node_fac()
        reset_nodefac.setIntercept(IndicatorPDF(0.3, 1.0))
        reset = net.make("reset", N, 1, encoders=[[1]], node_factory=reset_nodefac)
        reset.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
            # this population will begin outputting a value once the reset
            # signal exceeds the threshold, and that output will then be
            # used to reset the rest of the network

        reset.addDecodedTermination("input", [[1]], tauPSC, False)

        # relay for stored previous value of Q
        storeQ = net.make("storeQ", 1, 1, node_factory=HRLutils.node_fac(), mode="direct",
                          radius=Qradius)
        storeQ.fixMode()
        storeQ.addDecodedTermination("input", [[1]], 0.001, False)

        #calculate "discount" by integrating output of storeQ   
        acc_storeQ = memory.Memory("acc_storeQ", N * 8, 1, inputscale=50)
        net.add(acc_storeQ)

        zero_input = net.make_input("zero_input", [0])

        net.connect(zero_input, acc_storeQ.getTermination("target"))
        net.connect(reset, acc_storeQ.getTermination("transfer"))

        # threshold storeQ value so it won't go below zero.  that is, if we have
        # negative Q values, we don't want to have a negative discount, or that will just drive
        # the highest (negative) Q value upwards, and it will always be selected.  negative Q
        # values are instead pushed upwards by the PositiveBias mechanism.  
        Qthresh = net.make("Qthresh", N * 2, 1, encoders=[[1]], eval_points=[[x * 0.001] for x in range(1000)],
                           radius=Qradius, intercept=(0, 1))
        net.connect(storeQ, Qthresh, pstc=tauPSC)
        net.connect(Qthresh, acc_storeQ, pstc=intPSC,
                    transform=[[discount * intPSC]], func=lambda x: max(x[0], 0.0))

        # accumulate  reward
        reward = memory.Memory("reward", N * 4, 1, radius=rewardradius, inputscale=50)
        net.add(reward)

        reward.addDecodedTermination("input", [[intPSC]], intPSC, False)

        net.connect(zero_input, reward.getTermination("target"))
        net.connect(reset, reward.getTermination("transfer"))

        # put reward, currQ, storeQ, and discount together to calculate error
        error = net.make("error", N * 2, 1, node_factory=HRLutils.node_fac())

        net.connect(currQ, error, pstc=tauPSC)
        net.connect(reward, error, pstc=tauPSC)
        net.connect(storeQ, error, pstc=tauPSC, transform=[[-1]])
        net.connect(acc_storeQ, error, pstc=tauPSC, transform=[[-1]])

        self.exposeTermination(reward.getTermination("input"), "reward")
        self.exposeTermination(reset.getTermination("input"), "reset")
        self.exposeTermination(currQ.getTermination("input"), "currQ")
        self.exposeTermination(storeQ.getTermination("input"), "storeQ")
        self.exposeOrigin(error.getOrigin("X"), "X")
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:81,代碼來源:errorcalc2.py

示例5: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, name, N, d, radius=1.0, inputscale=1.0, recurweight=1.0,
                 direct_storage=False):
        """Builds the Memory network.

        :param name: name of network
        :param N: base number of neurons
        :param d: dimension of stored value
        :param radius: radius of stored value
        :param inputscale: controls how fast the stored value moves to the
            target
        :param recurweight: controls the preservation of the stored value
        :param direct_storage: if True, use directmode for the memory
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)
        self.dimension = d
        self.radius = radius

        tauPSC = 0.007
        intPSC = 0.1

        # population that will store the value
        if not direct_storage:
            storage = net.make_array("storage", N, d,
                                     node_factory=HRLutils.node_fac(),
                                     eval_points=[[x * 0.001]
                                                  for x in range(-1000, 1000)])
        else:
            storage = net.make("storage", 1, d, mode="direct")
            storage.fixMode()

        net.connect(storage, storage, transform=MU.diag([recurweight
                                                         for _ in range(d)]),
                    pstc=intPSC)

        # storageinput will represent (target - stored_value), which when used
        # as input to storage will drive the stored value to target
        storageinput = net.make_array("storageinput", N, d,
                                      node_factory=HRLutils.node_fac())
        storageinput.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        storageinput.addDecodedTermination("target",
                                           MU.diag([1.0 / radius
                                                    for _ in range(d)]),
                                           tauPSC, False)
        # note: store everything in -1 -- 1 range by dividing by radius

        # scale storageinput value by inputscale to control rate at which
        # it moves to the target
        net.connect(storageinput, storage, pstc=intPSC,
                    transform=MU.diag([inputscale * intPSC for _ in range(d)]))

        # subtract currently stored value
        net.connect(storage, storageinput, pstc=tauPSC,
                    transform=MU.diag([-1 for _ in range(d)]))

        # we want to open the input gate when the transfer signal arrives (to
        # transfer storageinput to storage). using a double inhibition setup
        # (rather than just feeding it e.g. the the inverse of the transfer
        # signal) so that we get a nice clean zero

        # this inhibits the storageinput population (to block input to the
        # storage)
        transferinhib = net.make("transferinhib", N, 1,
                                 node_factory=HRLutils.node_fac())
        transferinhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        transferinhib.addTermination("gate",
                                     [[-10] for _ in
                                      range(transferinhib.getNeurons())],
                                     tauPSC, False)

        net.connect(transferinhib, storageinput, pstc=tauPSC,
                    transform=[[-10] for _ in
                               range(storageinput.getNeurons())])

        # this drives the transferinhib population (so that by default it will
        # block any input). inhibiting transferinhib will thus remove the
        # inhibition on storageinput, and change the stored value
        biasinput = net.make_input("biasinput", [1])

        net.connect(biasinput, transferinhib, pstc=tauPSC)

        # output population (to undo radius scaling)
        storageoutput = net.make("storageoutput", 1, d, mode="direct")
        storageoutput.fixMode()
        net.connect(storage, storageoutput, pstc=0.001,
                    transform=MU.diag([radius for _ in range(d)]))

        self.exposeTermination(transferinhib.getTermination("gate"),
                               "transfer")
        self.exposeTermination(storageinput.getTermination("target"), "target")
        self.exposeOrigin(storageoutput.getOrigin("X"), "X")
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:96,代碼來源:memory.py

示例6: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, actions, Qradius=1, noiselevel=0.03):
        """Builds the BGNetwork.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param Qradius: expected radius of Q values
        :param noiselevel: standard deviation of noise added to Q values for
            exploration
        """

        self.name = "BGNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = 50
        self.d = len(actions)
        self.mut_inhib = 1.0  # mutual inhibition between actions
        self.tauPSC = 0.007

        # make basal ganglia
        netbg = nef.Network("bg")

        bginput = netbg.make("bginput", 1, self.d, mode="direct")
        bginput.fixMode()
        bginput.addDecodedTermination("input",
                                      MU.diag([1.0 / Qradius for _ in
                                               range(self.d)]), 0.001, False)
        # divide by Q radius to get values back into 0 -- 1 range

        bgoutput = netbg.make("bgoutput", 1, self.d, mode="direct")
        bgoutput.fixMode()

        basalganglia.make_basal_ganglia(netbg, bginput, bgoutput,
                                        dimensions=self.d, neurons=200)
        bg = netbg.network
        net.add(bg)
        bg.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        bg.exposeTermination(bginput.getTermination("input"), "input")
        bg.exposeOrigin(bgoutput.getOrigin("X"), "X")

        # insert noise (used to give some randomness to drive exploration)
        noiselevel = net.make_input("noiselevel", [noiselevel])

        noise = noisenode.NoiseNode(1, dimension=len(actions))
        net.add(noise)

        net.connect(noiselevel, noise.getTermination("scale"))
        net.connect(noise.getOrigin("noise"), "bg.bginput", pstc=0.001)

        # add bias to shift everything up to 0.5--1.5
        biasinput = net.make_input("biasinput", [0.5])
        net.connect(biasinput, "bg.bginput",
                    transform=[[1] for _ in range(self.d)], pstc=0.001)

        # invert BG output (so the "selected" action will have a positive value
        # and the rest zero)
        invert = thalamus.make(net, name="invert", neurons=self.N,
                               dimensions=self.d, useQuick=False)
        invert.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
        net.connect(bg, invert.getTermination("bg_input"))

        # add mutual inhibition
        net.connect(invert.getOrigin("xBiased"), invert, pstc=self.tauPSC,
                    transform=[[0 if i == j else -self.mut_inhib
                                for j in range(self.d)]
                               for i in range(self.d)])

        # threshold output values so that you get a nice clean 0 for
        # non-selected and 1 for selected
        threshf = HRLutils.node_fac()
        threshold = 0.1
        threshf.setIntercept(IndicatorPDF(threshold, 1.0))
        val_threshold = net.make_array("val_threshold", self.N * 2, self.d,
                                       node_factory=threshf, encoders=[[1]])
        val_threshold.addDecodedOrigin(
            "output",
            [PiecewiseConstantFunction([threshold], [0, 1])
             for _ in range(self.d)], "AXON", True)

        net.connect(invert.getOrigin("xBiased"), val_threshold,
                    pstc=self.tauPSC)

        # output action (action vectors weighted by BG output)
        weight_actions = net.make_array("weight_actions", 50,
                                        len(actions[0][1]), intercept=(0, 1))
        net.connect(val_threshold.getOrigin("output"), weight_actions,
                    transform=MU.transpose([actions[i][1]
                                            for i in range(self.d)]),
                    pstc=0.007)

        # save the BG output (selected action and selected action value)
        save_relay = net.make("save_relay", 1, 1, mode="direct")
        save_relay.fixMode()
        save_relay.addDecodedTermination("input", [[1]], 0.001, False)

        saved_action = memory.Memory("saved_action", self.N * 2,
                                     len(actions[0][1]), inputscale=75)
        net.add(saved_action)
        net.connect(weight_actions, saved_action.getTermination("target"))
        net.connect(save_relay, saved_action.getTermination("transfer"))
#.........這裏部分代碼省略.........
開發者ID:drasmuss,項目名稱:nhrlmodel,代碼行數:103,代碼來源:bgnetwork.py

示例7: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, stateN, stateD, state_encoders, actions, learningrate,
                stateradius=1.0, Qradius=1.0,
                load_weights=None, state_evals=None, state_threshold=0.0):
        """Builds the QNetwork.

        :param stateN: number of neurons to use to represent state
        :param stateD: dimension of state vector
        :param state_encoders: encoders to use for neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learningrate for action value learning rule
        :param stateradius: expected radius of state values
        :param Qradius: expected radius of Q values
        :param load_weights: filename to load Q value weights from
        :param state_evals: evaluation points to use for state population.
            This is used when initializing the Q values (may be necessary if the
            input states don't tend to fall in the hypersphere).
        :param state_threshold: threshold of state neurons (minimum intercept)
        """

        self.name = "QNetwork"
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        N = 50
        tauPSC = 0.007
        num_actions = len(actions)
        init_Qs = 0.2 #initial value for all Q values
        self.neuron_learning = False
        # if True, use neuron--neuron weight learning,
        # otherwise, use decoder learning

        # set up relays
        state_relay = net.make("state_relay", 1, stateD, mode="direct")
        state_relay.fixMode() # This apparently fixes the simulator mode to the curremt mode, so I'm guessing we just don't want it over-ridden by an over-zealous config file.
        state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False)

        # create state population
        state_fac = HRLutils.node_fac()
        state_fac.setIntercept(IndicatorPDF(state_threshold, 1.0))

        print("making the state_pop")
        state_pop = net.make("state_pop", stateN, stateD,
                              radius=stateradius,
                              node_factory=state_fac,
                              encoders=state_encoders,
                              eval_points=state_evals)
        state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(state_relay, state_pop, pstc=tauPSC)

        # store the state value (used to drive population encoding previous state)
        print("create the saved state memory")
        saved_state = memory.Memory("saved_state", N * 4, stateD, inputscale=50, radius=stateradius,
                                    direct_storage=True)
        net.add(saved_state)

        net.connect(state_relay, saved_state.getTermination("target"))

        # create population representing previous state
        old_state_pop = net.make("old_state_pop", stateN, stateD,
                              radius=stateradius,
                              node_factory=state_fac,
                              encoders=state_encoders,
                              eval_points=state_evals)
        old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

        net.connect(saved_state, old_state_pop, pstc=tauPSC)

        print("setup the action nodes")
        # set up action nodes
        if self.neuron_learning:
            # use ActionValues network to compute Q values

            # current Q values
            decoders = state_pop.addDecodedOrigin("init_decoders",
                                                  [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
            actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate,
                                                   Qradius=Qradius, init_decoders=decoders)
            net.add(actionvals)

            net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))

            # Q values of previous state
            decoders = old_state_pop.addDecodedOrigin("init_decoders",
                                                      [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
            old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate,
                                                       Qradius=Qradius, init_decoders=decoders)
            net.add(old_actionvals)

            net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state"))
        else:
            # just use decoder on state population to compute Q values

            # current Q values
            origin = state_pop.addDecodedOrigin("vals",
                                        [ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)],
                                        "AXON")
            state_dlnode = decoderlearningnode.DecoderLearningNode(state_pop, origin, learningrate,
                                                                   num_actions, name="state_learningnode")
            net.add(state_dlnode)
#.........這裏部分代碼省略.........
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:103,代碼來源:Qnetwork.py

示例8: __init__

# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
    def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the connection
            weights to whatever function is specified by the decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0 # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays  
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]],
                                                    0.005, True)
                # set modulatory transform so that it selects one dimension of the error signal

            # create learning termination
            if init_decoders != None:
                weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders))
            else:
                weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision)

            # connect each action back to output relay  
            net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))],
                        pstc=0.001)
                # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms), "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
開發者ID:Seanny123,項目名稱:HRL_1.0,代碼行數:65,代碼來源:actionvalues.py


注:本文中的hrlproject.misc.HRLutils.node_fac方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。