本文整理匯總了Python中hrlproject.misc.HRLutils.node_fac方法的典型用法代碼示例。如果您正苦於以下問題:Python HRLutils.node_fac方法的具體用法?Python HRLutils.node_fac怎麽用?Python HRLutils.node_fac使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類hrlproject.misc.HRLutils
的用法示例。
在下文中一共展示了HRLutils.node_fac方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, N, d, name="PositiveBias"):
"""Builds the PositiveBias network.
:param N: base number of neurons
:param d: dimension of input signal
:param name: name for network
"""
self.name = name
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
tauPSC = 0.007
biaslevel = 0.03 # the value to be output for negative inputs
# threshold the input signal to detect positive values
nfac = HRLutils.node_fac()
nfac.setIntercept(IndicatorPDF(0, 0.1))
neg_thresh = net.make_array("neg_thresh", N, d, encoders=[[1]],
node_factory=nfac)
neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False)
# create a population that tries to output biaslevel across
# all dimensions
bias_input = net.make_input("bias_input", [biaslevel])
bias_pop = net.make_array("bias_pop", N, d,
node_factory=HRLutils.node_fac(),
eval_points=[[x * 0.01] for x in
range(0, biaslevel * 200)])
net.connect(bias_input, bias_pop, pstc=tauPSC)
# the individual dimensions of bias_pop are then inhibited by the
# output of neg_thresh (so any positive values don't get the bias)
net.connect(neg_thresh, bias_pop, pstc=tauPSC,
func=lambda x: [1.0] if x[0] > 0 else [0.0],
transform=[[-10 if i == k else 0 for k in range(d)]
for i in range(d) for _ in
range(bias_pop.getNeurons() / d)])
# the whole population is inhibited by the learn signal, so that it
# outputs 0 if the system isn't supposed to be learning
bias_pop.addTermination("learn", [[-10] for _ in
range(bias_pop.getNeurons())],
tauPSC, False)
self.exposeTermination(neg_thresh.getTermination("input"), "input")
self.exposeTermination(bias_pop.getTermination("learn"), "learn")
self.exposeOrigin(bias_pop.getOrigin("X"), "X")
示例2: test_actionvalues
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def test_actionvalues():
net = nef.Network("testActionValues")
stateN = 200
N = 100
stateD = 2
stateradius = 1.0
statelength = math.sqrt(2 * stateradius ** 2)
init_Qs = 0.5
learningrate = 0.0
Qradius = 1
tauPSC = 0.007
actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]
# state
state_pop = net.make(
"state_pop",
stateN,
stateD,
radius=statelength,
node_factory=HRLutils.node_fac(),
eval_points=[
[x / statelength, y / statelength]
for x in range(-int(stateradius), int(stateradius))
for y in range(-int(stateradius), int(stateradius))
],
)
state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False)
# set up action nodes
decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
actionvals = actionvalues.ActionValues(
"testActionValues", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders
)
net.add(actionvals)
net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))
# input
inp = net.make_input("input", [0, 0])
net.connect(inp, state_pop.getTermination("state_input"))
net.add_to_nengo()
net.view()
示例3: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, num_actions, Qradius=1.0, rewardradius=1.0, discount=0.3):
"""Builds the ErrorNetwork.
:param num_actions: the number of actions available to the system
:param Qradius: expected radius of Q values
:param rewardradius: expected radius of reward signal
:param discount: discount factor
"""
self.name = "ErrorNetwork"
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
N = 50
tauPSC = 0.007
errorcap = 0.1 #soft cap on error magnitude (large errors seem to cause problems
#with overly-generalizing the learning)
#set up relays
vals_relay = net.make("vals_relay", 1, num_actions, mode="direct")
vals_relay.fixMode()
vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)
old_vals_relay = net.make("old_vals_relay", 1, num_actions, mode="direct")
old_vals_relay.fixMode()
old_vals_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)
curr_bg_relay = net.make("curr_bg_relay", 1, num_actions, mode="direct")
curr_bg_relay.fixMode()
curr_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)
saved_bg_relay = net.make("saved_bg_relay", 1, num_actions, mode="direct")
saved_bg_relay.fixMode()
saved_bg_relay.addDecodedTermination("input", MU.I(num_actions), 0.001, False)
#select out only the currently chosen Q value
gatedQ = net.make_array("gatedQ", N, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius)
gatedQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(vals_relay, gatedQ, pstc=tauPSC)
net.connect(curr_bg_relay, gatedQ,
transform=[[-3 if i != k else 0 for k in range(num_actions)]
for i in range(num_actions) for _ in range(gatedQ.getNeurons() / num_actions)],
pstc=tauPSC)
currQ = net.make("currQ", 1, 1, mode="direct")
currQ.fixMode()
net.connect(gatedQ, currQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001)
#select out only the previously chosen Q value
gatedstoreQ = net.make_array("gatedstoreQ", N, num_actions, node_factory=HRLutils.node_fac(), radius=Qradius)
gatedstoreQ.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(old_vals_relay, gatedstoreQ, pstc=tauPSC)
net.connect(saved_bg_relay, gatedstoreQ,
transform=[[-3 if i != k else 0 for k in range(num_actions)]
for i in range(num_actions) for _ in range(gatedstoreQ.getNeurons() / num_actions)],
pstc=tauPSC)
storeQ = net.make("storeQ", 1, 1, mode="direct")
storeQ.fixMode()
net.connect(gatedstoreQ, storeQ, transform=[[1 for _ in range(num_actions)]], pstc=0.001)
#create error calculation network
error = errorcalc2.ErrorCalc2(discount, rewardradius=rewardradius, Qradius=Qradius)
net.add(error)
net.connect(currQ, error.getTermination("currQ"))
net.connect(storeQ, error.getTermination("storeQ"))
#gate error by learning signal and saved BG output (we only want error when the
#system is supposed to be learning, and we only want error related to the action
#that was selected)
gatederror = net.make_array("gatederror", N * 2, num_actions, radius=errorcap, node_factory=HRLutils.node_fac())
gatederror.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(error, gatederror, transform=[[1.0 / Qradius] for _ in range(num_actions)], pstc=tauPSC)
#scale the error by Qradius, so that we don't get super huge errors (screws up the gating)
learninggate = net.make("learninggate", N, 1, node_factory=HRLutils.node_fac())
learninggate.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
learninggate.addTermination("gate", [[-10] for _ in range(N)], tauPSC, False)
net.connect(learninggate, gatederror, func=lambda x: [1.0],
transform=[[-12] for _ in range(gatederror.getNeurons())], pstc=tauPSC)
net.connect(saved_bg_relay, gatederror,
transform=[[-12 if i != k else 0 for k in range(num_actions)]
for i in range(num_actions) for _ in range(gatederror.getNeurons() / num_actions)],
pstc=tauPSC)
#add a positive bias to the error anywhere the Q values are negative (to stop
#Q values from getting too negative, which screws up the action selection)
posbias = positivebias.PositiveBias(N, num_actions)
net.add(posbias)
net.connect(old_vals_relay, posbias.getTermination("input"))
net.connect(learninggate, posbias.getTermination("learn"), func=lambda x: [1.0])
#.........這裏部分代碼省略.........
示例4: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, discount, rewardradius=1.0, Qradius=1.0):
"""Builds the ErrorCalc2 network.
:param discount: discount factor, controls rate of integration
:param rewardradius: expected radius of reward value
:param Qradius: expected radius of Q values
"""
self.name = "ErrorCalc"
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
tauPSC = 0.007
intPSC = 0.1
N = 50
# relay for current Q input
currQ = net.make("currQ", 1, 1, node_factory=HRLutils.node_fac(), mode="direct",
radius=Qradius)
currQ.fixMode()
currQ.addDecodedTermination("input", [[1]], 0.001, False)
# input population for resetting the network
reset_nodefac = HRLutils.node_fac()
reset_nodefac.setIntercept(IndicatorPDF(0.3, 1.0))
reset = net.make("reset", N, 1, encoders=[[1]], node_factory=reset_nodefac)
reset.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
# this population will begin outputting a value once the reset
# signal exceeds the threshold, and that output will then be
# used to reset the rest of the network
reset.addDecodedTermination("input", [[1]], tauPSC, False)
# relay for stored previous value of Q
storeQ = net.make("storeQ", 1, 1, node_factory=HRLutils.node_fac(), mode="direct",
radius=Qradius)
storeQ.fixMode()
storeQ.addDecodedTermination("input", [[1]], 0.001, False)
#calculate "discount" by integrating output of storeQ
acc_storeQ = memory.Memory("acc_storeQ", N * 8, 1, inputscale=50)
net.add(acc_storeQ)
zero_input = net.make_input("zero_input", [0])
net.connect(zero_input, acc_storeQ.getTermination("target"))
net.connect(reset, acc_storeQ.getTermination("transfer"))
# threshold storeQ value so it won't go below zero. that is, if we have
# negative Q values, we don't want to have a negative discount, or that will just drive
# the highest (negative) Q value upwards, and it will always be selected. negative Q
# values are instead pushed upwards by the PositiveBias mechanism.
Qthresh = net.make("Qthresh", N * 2, 1, encoders=[[1]], eval_points=[[x * 0.001] for x in range(1000)],
radius=Qradius, intercept=(0, 1))
net.connect(storeQ, Qthresh, pstc=tauPSC)
net.connect(Qthresh, acc_storeQ, pstc=intPSC,
transform=[[discount * intPSC]], func=lambda x: max(x[0], 0.0))
# accumulate reward
reward = memory.Memory("reward", N * 4, 1, radius=rewardradius, inputscale=50)
net.add(reward)
reward.addDecodedTermination("input", [[intPSC]], intPSC, False)
net.connect(zero_input, reward.getTermination("target"))
net.connect(reset, reward.getTermination("transfer"))
# put reward, currQ, storeQ, and discount together to calculate error
error = net.make("error", N * 2, 1, node_factory=HRLutils.node_fac())
net.connect(currQ, error, pstc=tauPSC)
net.connect(reward, error, pstc=tauPSC)
net.connect(storeQ, error, pstc=tauPSC, transform=[[-1]])
net.connect(acc_storeQ, error, pstc=tauPSC, transform=[[-1]])
self.exposeTermination(reward.getTermination("input"), "reward")
self.exposeTermination(reset.getTermination("input"), "reset")
self.exposeTermination(currQ.getTermination("input"), "currQ")
self.exposeTermination(storeQ.getTermination("input"), "storeQ")
self.exposeOrigin(error.getOrigin("X"), "X")
示例5: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, name, N, d, radius=1.0, inputscale=1.0, recurweight=1.0,
direct_storage=False):
"""Builds the Memory network.
:param name: name of network
:param N: base number of neurons
:param d: dimension of stored value
:param radius: radius of stored value
:param inputscale: controls how fast the stored value moves to the
target
:param recurweight: controls the preservation of the stored value
:param direct_storage: if True, use directmode for the memory
"""
self.name = name
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
self.dimension = d
self.radius = radius
tauPSC = 0.007
intPSC = 0.1
# population that will store the value
if not direct_storage:
storage = net.make_array("storage", N, d,
node_factory=HRLutils.node_fac(),
eval_points=[[x * 0.001]
for x in range(-1000, 1000)])
else:
storage = net.make("storage", 1, d, mode="direct")
storage.fixMode()
net.connect(storage, storage, transform=MU.diag([recurweight
for _ in range(d)]),
pstc=intPSC)
# storageinput will represent (target - stored_value), which when used
# as input to storage will drive the stored value to target
storageinput = net.make_array("storageinput", N, d,
node_factory=HRLutils.node_fac())
storageinput.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
storageinput.addDecodedTermination("target",
MU.diag([1.0 / radius
for _ in range(d)]),
tauPSC, False)
# note: store everything in -1 -- 1 range by dividing by radius
# scale storageinput value by inputscale to control rate at which
# it moves to the target
net.connect(storageinput, storage, pstc=intPSC,
transform=MU.diag([inputscale * intPSC for _ in range(d)]))
# subtract currently stored value
net.connect(storage, storageinput, pstc=tauPSC,
transform=MU.diag([-1 for _ in range(d)]))
# we want to open the input gate when the transfer signal arrives (to
# transfer storageinput to storage). using a double inhibition setup
# (rather than just feeding it e.g. the the inverse of the transfer
# signal) so that we get a nice clean zero
# this inhibits the storageinput population (to block input to the
# storage)
transferinhib = net.make("transferinhib", N, 1,
node_factory=HRLutils.node_fac())
transferinhib.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
transferinhib.addTermination("gate",
[[-10] for _ in
range(transferinhib.getNeurons())],
tauPSC, False)
net.connect(transferinhib, storageinput, pstc=tauPSC,
transform=[[-10] for _ in
range(storageinput.getNeurons())])
# this drives the transferinhib population (so that by default it will
# block any input). inhibiting transferinhib will thus remove the
# inhibition on storageinput, and change the stored value
biasinput = net.make_input("biasinput", [1])
net.connect(biasinput, transferinhib, pstc=tauPSC)
# output population (to undo radius scaling)
storageoutput = net.make("storageoutput", 1, d, mode="direct")
storageoutput.fixMode()
net.connect(storage, storageoutput, pstc=0.001,
transform=MU.diag([radius for _ in range(d)]))
self.exposeTermination(transferinhib.getTermination("gate"),
"transfer")
self.exposeTermination(storageinput.getTermination("target"), "target")
self.exposeOrigin(storageoutput.getOrigin("X"), "X")
示例6: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, actions, Qradius=1, noiselevel=0.03):
"""Builds the BGNetwork.
:param actions: actions available to the system
:type actions: list of tuples (action_name,action_vector)
:param Qradius: expected radius of Q values
:param noiselevel: standard deviation of noise added to Q values for
exploration
"""
self.name = "BGNetwork"
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
self.N = 50
self.d = len(actions)
self.mut_inhib = 1.0 # mutual inhibition between actions
self.tauPSC = 0.007
# make basal ganglia
netbg = nef.Network("bg")
bginput = netbg.make("bginput", 1, self.d, mode="direct")
bginput.fixMode()
bginput.addDecodedTermination("input",
MU.diag([1.0 / Qradius for _ in
range(self.d)]), 0.001, False)
# divide by Q radius to get values back into 0 -- 1 range
bgoutput = netbg.make("bgoutput", 1, self.d, mode="direct")
bgoutput.fixMode()
basalganglia.make_basal_ganglia(netbg, bginput, bgoutput,
dimensions=self.d, neurons=200)
bg = netbg.network
net.add(bg)
bg.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
bg.exposeTermination(bginput.getTermination("input"), "input")
bg.exposeOrigin(bgoutput.getOrigin("X"), "X")
# insert noise (used to give some randomness to drive exploration)
noiselevel = net.make_input("noiselevel", [noiselevel])
noise = noisenode.NoiseNode(1, dimension=len(actions))
net.add(noise)
net.connect(noiselevel, noise.getTermination("scale"))
net.connect(noise.getOrigin("noise"), "bg.bginput", pstc=0.001)
# add bias to shift everything up to 0.5--1.5
biasinput = net.make_input("biasinput", [0.5])
net.connect(biasinput, "bg.bginput",
transform=[[1] for _ in range(self.d)], pstc=0.001)
# invert BG output (so the "selected" action will have a positive value
# and the rest zero)
invert = thalamus.make(net, name="invert", neurons=self.N,
dimensions=self.d, useQuick=False)
invert.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(bg, invert.getTermination("bg_input"))
# add mutual inhibition
net.connect(invert.getOrigin("xBiased"), invert, pstc=self.tauPSC,
transform=[[0 if i == j else -self.mut_inhib
for j in range(self.d)]
for i in range(self.d)])
# threshold output values so that you get a nice clean 0 for
# non-selected and 1 for selected
threshf = HRLutils.node_fac()
threshold = 0.1
threshf.setIntercept(IndicatorPDF(threshold, 1.0))
val_threshold = net.make_array("val_threshold", self.N * 2, self.d,
node_factory=threshf, encoders=[[1]])
val_threshold.addDecodedOrigin(
"output",
[PiecewiseConstantFunction([threshold], [0, 1])
for _ in range(self.d)], "AXON", True)
net.connect(invert.getOrigin("xBiased"), val_threshold,
pstc=self.tauPSC)
# output action (action vectors weighted by BG output)
weight_actions = net.make_array("weight_actions", 50,
len(actions[0][1]), intercept=(0, 1))
net.connect(val_threshold.getOrigin("output"), weight_actions,
transform=MU.transpose([actions[i][1]
for i in range(self.d)]),
pstc=0.007)
# save the BG output (selected action and selected action value)
save_relay = net.make("save_relay", 1, 1, mode="direct")
save_relay.fixMode()
save_relay.addDecodedTermination("input", [[1]], 0.001, False)
saved_action = memory.Memory("saved_action", self.N * 2,
len(actions[0][1]), inputscale=75)
net.add(saved_action)
net.connect(weight_actions, saved_action.getTermination("target"))
net.connect(save_relay, saved_action.getTermination("transfer"))
#.........這裏部分代碼省略.........
示例7: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, stateN, stateD, state_encoders, actions, learningrate,
stateradius=1.0, Qradius=1.0,
load_weights=None, state_evals=None, state_threshold=0.0):
"""Builds the QNetwork.
:param stateN: number of neurons to use to represent state
:param stateD: dimension of state vector
:param state_encoders: encoders to use for neurons in state population
:param actions: actions available to the system
:type actions: list of tuples (action_name,action_vector)
:param learningrate: learningrate for action value learning rule
:param stateradius: expected radius of state values
:param Qradius: expected radius of Q values
:param load_weights: filename to load Q value weights from
:param state_evals: evaluation points to use for state population.
This is used when initializing the Q values (may be necessary if the
input states don't tend to fall in the hypersphere).
:param state_threshold: threshold of state neurons (minimum intercept)
"""
self.name = "QNetwork"
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
N = 50
tauPSC = 0.007
num_actions = len(actions)
init_Qs = 0.2 #initial value for all Q values
self.neuron_learning = False
# if True, use neuron--neuron weight learning,
# otherwise, use decoder learning
# set up relays
state_relay = net.make("state_relay", 1, stateD, mode="direct")
state_relay.fixMode() # This apparently fixes the simulator mode to the curremt mode, so I'm guessing we just don't want it over-ridden by an over-zealous config file.
state_relay.addDecodedTermination("input", MU.I(stateD), 0.001, False)
# create state population
state_fac = HRLutils.node_fac()
state_fac.setIntercept(IndicatorPDF(state_threshold, 1.0))
print("making the state_pop")
state_pop = net.make("state_pop", stateN, stateD,
radius=stateradius,
node_factory=state_fac,
encoders=state_encoders,
eval_points=state_evals)
state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(state_relay, state_pop, pstc=tauPSC)
# store the state value (used to drive population encoding previous state)
print("create the saved state memory")
saved_state = memory.Memory("saved_state", N * 4, stateD, inputscale=50, radius=stateradius,
direct_storage=True)
net.add(saved_state)
net.connect(state_relay, saved_state.getTermination("target"))
# create population representing previous state
old_state_pop = net.make("old_state_pop", stateN, stateD,
radius=stateradius,
node_factory=state_fac,
encoders=state_encoders,
eval_points=state_evals)
old_state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
net.connect(saved_state, old_state_pop, pstc=tauPSC)
print("setup the action nodes")
# set up action nodes
if self.neuron_learning:
# use ActionValues network to compute Q values
# current Q values
decoders = state_pop.addDecodedOrigin("init_decoders",
[ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
actionvals = actionvalues.ActionValues("actionvals", N, stateN, actions, learningrate,
Qradius=Qradius, init_decoders=decoders)
net.add(actionvals)
net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))
# Q values of previous state
decoders = old_state_pop.addDecodedOrigin("init_decoders",
[ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
old_actionvals = actionvalues.ActionValues("old_actionvals", N, stateN, actions, learningrate,
Qradius=Qradius, init_decoders=decoders)
net.add(old_actionvals)
net.connect(old_state_pop.getOrigin("AXON"), old_actionvals.getTermination("state"))
else:
# just use decoder on state population to compute Q values
# current Q values
origin = state_pop.addDecodedOrigin("vals",
[ConstantFunction(num_actions, init_Qs) for _ in range(num_actions)],
"AXON")
state_dlnode = decoderlearningnode.DecoderLearningNode(state_pop, origin, learningrate,
num_actions, name="state_learningnode")
net.add(state_dlnode)
#.........這裏部分代碼省略.........
示例8: __init__
# 需要導入模塊: from hrlproject.misc import HRLutils [as 別名]
# 或者: from hrlproject.misc.HRLutils import node_fac [as 別名]
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None):
"""Build ActionValues network.
:param name: name of Network
:param N: base number of neurons
:param stateN: number of neurons in state population
:param actions: actions available to the system
:type actions: list of tuples (action_name,action_vector)
:param learningrate: learning rate for PES rule
:param Qradius: expected radius of Q values
:param init_decoders: if specified, will be used to initialize the connection
weights to whatever function is specified by the decoders
"""
self.name = name
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
self.N = N
self.learningrate = learningrate
self.supervision = 1.0 # don't use the unsupervised stuff at all
self.tauPSC = 0.007
modterms = []
learnterms = []
# relays
output = net.make("output", 1, len(actions), mode="direct")
output.fixMode()
for i, action in enumerate(actions):
# create one population corresponding to each action
act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac())
act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
# add error termination
modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]],
0.005, True)
# set modulatory transform so that it selects one dimension of the error signal
# create learning termination
if init_decoders != None:
weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders))
else:
weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())]
learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None)
# initialize the learning rule
net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision)
# connect each action back to output relay
net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))],
pstc=0.001)
# note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here
modterms += [modterm]
learnterms += [learningterm]
# use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination
self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state")
self.exposeTermination(EnsembleTermination(self, "error", modterms), "error")
self.exposeOrigin(output.getOrigin("X"), "X")