本文整理汇总了Python中hrlproject.misc.HRLutils类的典型用法代码示例。如果您正苦于以下问题:Python HRLutils类的具体用法?Python HRLutils怎么用?Python HRLutils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HRLutils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: combine_files
def combine_files():
path = os.path.join("..", "..", "data", "delivery", "flat", "dataoutput_2")
data = []
for i in range(10):
try:
data += [HRLutils.load_data(path + ".%s.txt" % i)]
except IOError:
continue
print "found %s files to combine" % len(data)
print len(data[0]), "records"
starttime = 0.0
newdata = [[] for _ in data[0]]
for d in data:
if len(d) != len(newdata):
print "uh oh, number of records is wrong"
print len(d), len(newdata)
for i, record in enumerate(d):
for entry in record:
newdata[i] += [[entry[0] + starttime, entry[1]]]
starttime = newdata[0][-1][0]
HRLutils.save_data(path + "_combined.txt", newdata)
示例2: gen_encoders
def gen_encoders(self, N, contextD, context_scale):
"""Generate encoders for state population of learning agent.
:param N: number of neurons in state population
:param contextD: dimension of context vector representation
:param context_scale: weight on context representation relative to
state (1.0 = equal weighting)
"""
if contextD > 0:
contexts = MU.I(contextD)
else:
contexts = [[]]
# neurons each sensitive to different combinations of stimuli
encs = (list(MU.I(self.stateD)) +
[o + s + c
for o in MU.I(self.num_orientations)
for s in MU.I(self.num_shapes)
for c in MU.I(self.num_colours)])
return [HRLutils.normalize(
HRLutils.normalize(random.choice(encs)) +
[x * context_scale for x in random.choice(contexts)])
for _ in range(N)]
示例3: tick
def tick(self):
cond_active = False
for c in self.conds:
if isinstance(c, Timer):
# if it is a timer entry, just update the timer and check if it
# has expired
c.tick()
if c.ring():
self.reward = self.rewardval
self.activate()
c.reset()
cond_active = True
elif (self.env.is_in(self.env.state, c) and
(self.conds[c] is None or
HRLutils.similarity(HRLutils.normalize(self.context),
self.conds[c]) > 0.3)):
# if it is a state entry, check if the agent is in the region
# associated with that state, and check if that region is the
# one corresponding to the currently selected context
self.reward = self.rewardval
self.rewardamount += 1
if self.rewardamount > self.rewardresetamount:
self.activate()
self.rewardamount = 0
cond_active = True
# if no termination conditions met, just give default reward
if not cond_active:
self.reward = self.defaultreward
# reset rewardamount when the reset signal is sent (so that there won't
# be any leftover rewardamount from the agent's previous decision)
if self.t > self.resettime[0] and self.t < self.resettime[1]:
self.rewardamount = 0
# add a penalty if the state hasn't changed (to help prevent agent from
# getting stuck)
if sum(self.prev_state) != 0 and \
HRLutils.similarity(HRLutils.normalize(self.env.state),
HRLutils.normalize(self.prev_state)) < 1.0:
self.state_penalty = 0.0
else:
self.state_penalty += 0.0001
self.prev_state = copy.deepcopy(self.env.state)
self.reward = self.reward - self.state_penalty
示例4: __init__
def __init__(self, N, d, name="PositiveBias"):
"""Builds the PositiveBias network.
:param N: base number of neurons
:param d: dimension of input signal
:param name: name for network
"""
self.name = name
net = nef.Network(self, seed=HRLutils.SEED, quick=False)
tauPSC = 0.007
biaslevel = 0.03 # the value to be output for negative inputs
# threshold the input signal to detect positive values
nfac = HRLutils.node_fac()
nfac.setIntercept(IndicatorPDF(0, 0.1))
neg_thresh = net.make_array("neg_thresh", N, d, encoders=[[1]],
node_factory=nfac)
neg_thresh.addDecodedTermination("input", MU.I(d), tauPSC, False)
# create a population that tries to output biaslevel across
# all dimensions
bias_input = net.make_input("bias_input", [biaslevel])
bias_pop = net.make_array("bias_pop", N, d,
node_factory=HRLutils.node_fac(),
eval_points=[[x * 0.01] for x in
range(0, biaslevel * 200)])
net.connect(bias_input, bias_pop, pstc=tauPSC)
# the individual dimensions of bias_pop are then inhibited by the
# output of neg_thresh (so any positive values don't get the bias)
net.connect(neg_thresh, bias_pop, pstc=tauPSC,
func=lambda x: [1.0] if x[0] > 0 else [0.0],
transform=[[-10 if i == k else 0 for k in range(d)]
for i in range(d) for _ in
range(bias_pop.getNeurons() / d)])
# the whole population is inhibited by the learn signal, so that it
# outputs 0 if the system isn't supposed to be learning
bias_pop.addTermination("learn", [[-10] for _ in
range(bias_pop.getNeurons())],
tauPSC, False)
self.exposeTermination(neg_thresh.getTermination("input"), "input")
self.exposeTermination(bias_pop.getTermination("learn"), "learn")
self.exposeOrigin(bias_pop.getOrigin("X"), "X")
示例5: calc_optimal_move
def calc_optimal_move(self):
"""Calculate the optimal move for the agent to take in the current
state/context."""
# basically the same as PlaceCellEnvironment.calc_optimal_move, except
# we look at the current context to find the goal
goal = [c for c in self.contexts
if self.contexts[c] == self.context][0]
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in range(int(-self.imgsize[1] /
(2 * stepsize)) + 1,
int(self.imgsize[1] /
(2 * stepsize)) - 1)]:
for x in [v * stepsize for v in range(int(-self.imgsize[0] /
(2 * stepsize)) + 1,
int(self.imgsize[0] /
(2 * stepsize)) - 1)]:
if self.is_in((x, y), goal):
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
self.optimal_move = max(
self.actions, key=lambda x:-1 if
self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例6: calc_optimal_move
def calc_optimal_move(self):
"""Calculate the optimal move for the agent to take in the current
state/context."""
# basically the same as PlaceCellEnvironment.calc_optimal_move, except
# we look at whether or not we have the package to pick a goal state
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in
range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
int(self.imgsize[1] / (2 * stepsize)) - 1)]:
for x in [v * stepsize for v in
range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
int(self.imgsize[0] / (2 * stepsize)) - 1)]:
if ((self.is_in((x, y), "a") and not self.in_hand) or
(self.is_in((x, y), "b") and self.in_hand)):
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
self.optimal_move = max(
self.actions, key=lambda x:-1
if self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例7: calc_optimal_move
def calc_optimal_move(self):
"""Calculates the optimal move for the agent to make in the current state.
Used for debugging mainly.
"""
# grid search the image with the given stepsize
stepsize = 0.1
self.optimal_move = None
for y in [v * stepsize for v in range(int(-self.imgsize[1] / (2 * stepsize)) + 1,
int(self.imgsize[1] / (2 * stepsize)) - 1)]:
for x in [v * stepsize for v in range(int(-self.imgsize[0] / (2 * stepsize)) + 1,
int(self.imgsize[0] / (2 * stepsize)) - 1)]:
# if the pt you're looking at is in the region you're looking for
if self.is_in((x, y), "target"):
# generate a target point in the direction from current location to target
angle = math.atan2(y - self.state[1], x - self.state[0])
pt = (math.cos(angle), math.sin(angle))
# pick the action that is closest to the target point
# note: penalize actions that would involve moving through a wall
self.optimal_move = max(self.actions, key=lambda x:-1
if self.is_in((x[1][0] * self.dx + self.state[0],
x[1][1] * self.dx + self.state[1]),
"wall")
else HRLutils.similarity(x[1], pt))[0]
return
示例8: test_terminationnode
def test_terminationnode():
net = nef.Network("testTerminationNode")
actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]
env = deliveryenvironment.DeliveryEnvironment(
actions,
HRLutils.datafile("contextmap.bmp"),
colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"},
imgsize=(5, 5),
dx=0.001,
placedev=0.5,
)
net.add(env)
term_node = terminationnode.TerminationNode(
{"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None}, env, contextD=2, rewardval=1
)
net.add(term_node)
print term_node.conds
context_input = net.make_input("contextinput", {0.0: [0, 0.1], 0.5: [1, 0], 1.0: [0, 1]})
net.connect(context_input, term_node.getTermination("context"))
net.add_to_nengo()
net.view()
示例9: saveParams
def saveParams(self, prefix):
#save connection weights
if self.neuron_learning:
self.getNode("actionvals").saveWeights(prefix)
self.getNode("old_actionvals").saveWeights(prefix)
else:
dec = self.getNode("state_pop").getOrigin("vals").getDecoders()
with open(HRLutils.datafile(prefix + "_state_decoders.txt"), "w") as f:
f.write("\n".join([" ".join([str(x) for x in d]) for d in dec]))
dec = self.getNode("old_state_pop").getOrigin("vals").getDecoders()
with open(HRLutils.datafile(prefix + "_old_state_decoders.txt"), "w") as f:
f.write("\n".join([" ".join([str(x) for x in d]) for d in dec]))
#save state encoders
enc = self.getNode("state_pop").getEncoders()
with open(HRLutils.datafile(prefix + "_state_encoders.txt"), "w") as f:
f.write("\n".join([" ".join([str(x) for x in e]) for e in enc]))
示例10: loadParams
def loadParams(self, prefix):
print "loading params: %s" % prefix
#load connection weights
if self.neuron_learning:
self.getNode("actionvals").loadWeights(prefix)
self.getNode("old_actionvals").loadWeights(prefix)
else:
with open(HRLutils.datafile(prefix + "_state_decoders.txt")) as f:
self.getNode("state_pop").getOrigin("vals").setDecoders(
[[float(x) for x in d.split(" ")] for d in f.readlines()])
with open(HRLutils.datafile(prefix + "_old_state_decoders.txt")) as f:
self.getNode("old_state_pop").getOrigin("vals").setDecoders(
[[float(x) for x in d.split(" ")] for d in f.readlines()])
#load state encoders
with open(HRLutils.datafile(prefix + "_state_encoders.txt")) as f:
enc = [[float(x) for x in e.split(" ")] for e in f.readlines()]
self.getNode("state_pop").setEncoders(enc)
self.getNode("old_state_pop").setEncoders(enc) #note we assume that state_pop and old_state_pop use the same encoders
示例11: test_bmp
def test_bmp():
from javax.imageio import ImageIO
from java.io import File
img = ImageIO.read(File(HRLutils.datafile("contextmap.bmp")))
colours = [int(val) for val in img.getRGB(0, 0, img.getWidth(), img.getHeight(), None, 0, img.getWidth())]
unique_colours = []
for c in colours:
if c not in unique_colours:
unique_colours += [c]
print unique_colours
示例12: __init__
def __init__(self, actions, mapname, colormap, name="PlaceCellEnvironment",
imgsize=(1.0, 1.0), dx=0.01, placedev=0.1, num_places=None):
"""Initialize environment variables.
:param actions: actions available to the system
:type actions: list of tuples (action_name,action_vector)
:param mapname: name of file describing environment map
:param colormap: dict mapping pixel colours to labels
:param name: name for environment
:param imgsize: width of space represented by the map image
:param dx: distance agent moves each timestep
:param placedev: standard deviation of gaussian place cell activations
:param num_places: number of placecells to use (if None it will attempt
to fill the space)
"""
EnvironmentTemplate.__init__(self, name, 2, actions)
# parameters
self.colormap = colormap
self.rewardamount = 0 # number of timesteps spent in reward
# number of timesteps to spend in reward before agent is reset
# note: convenient to express this as time_in_reward / dt
self.rewardresetamount = 0.6 / 0.001
self.num_actions = len(actions)
self.imgsize = [float(x) for x in imgsize]
self.dx = dx
self.placedev = placedev
self.num_places = num_places
self.optimal_move = None
self.defaultreward = -0.075
# load environment
self.map = ImageIO.read(File(HRLutils.datafile(mapname)))
# generate place cells
self.gen_placecells(min_spread=1.0 * placedev)
# initial conditions
self.state = self.random_location(avoid=["wall", "target"])
self.place_activations = [0 for _ in self.placecells]
self.create_origin("place", lambda: self.place_activations)
# note: making the value small, so that the noise node will give us
# some random exploration as well
self.create_origin("optimal_move",
lambda: [0.1 if self.optimal_move == a[0] else 0.0
for a in self.actions])
示例13: saveWeights
def saveWeights(self, prefix):
"""Save the connection weights to file."""
prefix = prefix + "_" + self.name
for n in self.getNodes():
if n.getName().startswith("action"):
term = n.getTermination("learning")
weights = [t.getWeights() for t in term.getNodeTerminations()]
f = open(HRLutils.datafile(prefix + "_" + n.getName() + ".txt"), "w")
f.write(str(HRLutils.SEED) + "\n")
for row in weights:
f.write(" ".join([str(x) for x in row]) + "\n")
f.close()
示例14: run_gridworld
def run_gridworld(args, seed=None):
if seed is not None:
HRLutils.set_seed(seed)
seed = HRLutils.SEED
net = nef.Network("run_gridworld")
stateN = 400
stateD = 2
actions = [("up", [0, 1]), ("right", [1, 0]),
("down", [0, -1]), ("left", [-1, 0])]
agent = smdpagent.SMDPAgent(stateN, stateD, actions, stateradius=3,
**args)
net.add(agent)
env = gridworldenvironment.GridWorldEnvironment(
stateD, actions, HRLutils.datafile("smallgrid.txt"), cartesian=True,
delay=(0.6, 0.9), datacollection=False)
net.add(env)
net.connect(env.getOrigin("state"), agent.getTermination("state_input"))
net.connect(env.getOrigin("reward"), agent.getTermination("reward"))
net.connect(env.getOrigin("reset"), agent.getTermination("reset"))
net.connect(env.getOrigin("learn"), agent.getTermination("learn"))
net.connect(env.getOrigin("reset"), agent.getTermination("save_state"))
net.connect(env.getOrigin("reset"), agent.getTermination("save_action"))
net.connect(agent.getOrigin("action_output"), env.getTermination("action"))
net.connect(agent.getOrigin("Qs"), env.getTermination("Qs"))
net.add_to_nengo()
view = timeview.View(net.network, update_frequency=5)
view.add_watch(gridworldwatch.GridWorldWatch())
view.restore()
示例15: test_actionvalues
def test_actionvalues():
net = nef.Network("testActionValues")
stateN = 200
N = 100
stateD = 2
stateradius = 1.0
statelength = math.sqrt(2 * stateradius ** 2)
init_Qs = 0.5
learningrate = 0.0
Qradius = 1
tauPSC = 0.007
actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])]
# state
state_pop = net.make(
"state_pop",
stateN,
stateD,
radius=statelength,
node_factory=HRLutils.node_fac(),
eval_points=[
[x / statelength, y / statelength]
for x in range(-int(stateradius), int(stateradius))
for y in range(-int(stateradius), int(stateradius))
],
)
state_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])
state_pop.addDecodedTermination("state_input", MU.I(stateD), tauPSC, False)
# set up action nodes
decoders = state_pop.addDecodedOrigin("init_decoders", [ConstantFunction(stateD, init_Qs)], "AXON").getDecoders()
actionvals = actionvalues.ActionValues(
"testActionValues", N, stateN, actions, learningrate, Qradius=Qradius, init_decoders=decoders
)
net.add(actionvals)
net.connect(state_pop.getOrigin("AXON"), actionvals.getTermination("state"))
# input
inp = net.make_input("input", [0, 0])
net.connect(inp, state_pop.getTermination("state_input"))
net.add_to_nengo()
net.view()