本文整理汇总了Python中pybrain.rl.agents.LearningAgent.reset方法的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent.reset方法的具体用法?Python LearningAgent.reset怎么用?Python LearningAgent.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.agents.LearningAgent
的用法示例。
在下文中一共展示了LearningAgent.reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# The task is the game this time
task = environment
# Make the reinforcement learning agent (use a network because inputs are continuous)
network = ActionValueNetwork(task.nSenses, task.nActions)
# Use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
learner.gamma = GAMMA
agent = LearningAgent(network, learner)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the Learner
meanScores = []
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
meanScores.append(task.meanScore)
agent.learn()
agent.reset()
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
return meanScores, params, agent
示例2: main
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def main():
# if os.path.exists('./agent.dump'):
# with open('./agent.dump') as f:
# agent = pickle.load(f)
# else:
controller = ActionValueNetwork(9, 4)
learner = NFQ()
agent = LearningAgent(controller, learner)
score_list = []
for i in range(10000):
score = play(agent)
score_list.append(score)
# ここで,
# TypeError: only length-1 arrays can be converted to Python scalars
# pybrain/rl/learners/valuebased/q.py
# => learnerをQからNFQにしたら行けた.
# => http://stackoverflow.com/questions/23755927/pybrain-training-a-actionvaluenetwork-doesnt-properly-work
#agent.learn()
agent.reset()
#data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
data =[[0,0,2], [0,0,0], [0,0,2]]
agent.integrateObservation(numpy.array(data).ravel())
move = agent.getAction()
print i, int(numpy.mean(score_list)) , max(score_list), move
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump(score_list, f)
示例3: test_maze
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def test_maze():
# simplified version of the reinforcement learning tutorial example
structure = np.array([[1, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 1, 1, 1, 1]])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(' #'))[structure])
print('Maze map:')
print('\n'.join(''.join(row) for row in maze))
print('Greedy policy:')
print('\n'.join(''.join(row) for row in greedy_policy))
assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
示例4: Team
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
class Team(object):
def __init__(self, living, task, learner = ENAC()):
self.living = living
self.task = task
self.last_reward = 0
self.agent = LearningAgent(self.living.brain, learner)
self.oldparams = self.living.brain.params
def Interaction(self):
self.agent.integrateObservation(self.task.getObservation())
self.task.performAction(self.agent.getAction())
self.last_reward = self.task.getReward()
self.agent.giveReward(self.last_reward)
finished = self.task.isFinished()
if finished:
#print task.cumreward
self.agent.newEpisode()
self.task.reset()
return self.last_reward, finished
def Learn(self, episodes = 1):
self.agent.learn(episodes)
self.agent.reset()
newparams = self.living.brain.params.copy() #get_all_weights(eater.brain)[:]
dif = 0
j = 0
for i in newparams:
dif += (self.oldparams[j] - newparams[j])**2
j += 1
self.oldparams = newparams
return dif
示例5: q_learning_table
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def q_learning_table():
controller = ActionValueTable(36, 4)
learner = Q()
controller.initialize(1.)
agent = LearningAgent(controller, learner)
score_list = []
turn_list = []
# neural側のトレーニング分 +100
for i in range(600):
print_state(agent.module.getValue, 'table')
score, turn = play(agent, 'table')
score_list.append(score)
turn_list.append(turn)
agent.learn()
agent.reset()
print i, int(numpy.mean(score_list)) , max(score_list), score, turn
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump([score_list, turn_list], f)
示例6: Pause
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
class QAlgorithm:
def Pause(self):#if menu says pause pause exicution
while self.state == 1:
time.sleep(.05)
return True
def Quit(self):#if menu says quit stop running
self.process.terminate()
return False
def Start(self):#starts the Bot
if self.process == None:
self.runBot()
#self.process = multiprocessing.Process(target=self.runBot, args= [])
#self.process.start()
return True
def CheckState(self):#checks to see what state the menu says to be in
if self.state == 0 :
self.Start()
elif self.state == 1:
self.Pause()
elif self.state == 2:
self.Quit()
def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over
return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver)
def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
self.reward = rewardBox
self.bbox = box
self.environment = TEnviroment(box)#Custom environment class
if os.path.isfile("bot.txt"):
self.controller = pickle.load(open("bot.txt","rb"))
else:
self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
self.learner = Q()
gf = {0:self.GameOver}
self.agent = LearningAgent(self.controller, self.learner)
self.task = TTask(self.environment,scoreArea,gf)#needs custom task
self.experiment = EpisodicExperiment(self.task, self.agent)
self.process = None
self.endBox = endGame
def runBot(self):#runes the bot for a single Episode
self.experiment.doEpisodes()
self.agent.learn()
self.agent.reset()
file = open("bot.txt","wb+")
pickle.dump(self.controller,file)
示例7: learn
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def learn(self, number_of_iterations):
learner = Q(0.2, 0.8)
task = CartMovingTask(self.environment)
self.controller = ActionValueTable(
reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
)
self.controller.initialize(1.0)
agent = LearningAgent(self.controller, learner)
experiment = Experiment(task, agent)
for i in range(number_of_iterations):
experiment.doInteractions(1)
agent.learn()
agent.reset()
with open("test.pcl", "w+") as f:
pickle.dump(self.controller, f)
示例8: maze
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def maze():
# import sys, time
pylab.gray()
pylab.ion()
# The goal appears to be in the upper right
structure = [
"!!!!!!!!!!",
"! ! ! ! !",
"! !! ! ! !",
"! ! !",
"! !!!!!! !",
"! ! ! !",
"! ! !!!! !",
"! !",
"! !!!!! !",
"! ! !",
"!!!!!!!!!!",
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(100):
experiment.doInteractions(100)
agent.learn()
agent.reset()
# 4 actions, 81 locations/states (9x9 grid)
# max(1) gives/plots the biggest objective function value for that square
pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
pylab.draw()
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
示例9: main
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def main():
# 2048の全ての状態を保存するのは無理でしょ.
# 14^16通りの状態があるよね.
#controller = ActionValueTable(16, 4)
#learner = Q()
#controller.initialize(1.)
controller = ActionValueNetwork(16, 4)
learner = NFQ()
#learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(controller, learner)
score_list = []
for i in range(10000):
# if os.path.exists('./agent.dump'):
# with open('./agent.dump') as f:
# agent = pickle.load(f)
print i, 'playing ...'
score = play(agent)
score_list.append(score)
# ここで,
# TypeError: only length-1 arrays can be converted to Python scalars
# pybrain/rl/learners/valuebased/q.py
# => learnerをQからNFQにしたら行けた.
# => http://stackoverflow.com/questions/23755927/pybrain-training-a-actionvaluenetwork-doesnt-properly-work
print i, 'learning ...'
agent.learn()
agent.reset()
print i, 'evaluate sample ...'
data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
agent.integrateObservation(numpy.array(data).ravel())
move = agent.getAction()
print " ",i, int(numpy.mean(score_list)) , max(score_list), move
if i % 20 == 0:
print i, 'saving ...'
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump(score_list, f)
示例10: explore_maze
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
def explore_maze():
# simplified version of the reinforcement learning tutorial example
structure = [
list("!!!!!!!!!!"),
list("! ! ! ! !"),
list("! !! ! ! !"),
list("! ! !"),
list("! !!!!!! !"),
list("! ! ! !"),
list("! ! !!!! !"),
list("! !"),
list("! !!!!! !"),
list("! ! !"),
list("!!!!!!!!!!"),
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"
示例11: indim
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
self.env.reset()
@property
def indim(self):
return self.env.indim
@property
def outdim(self):
return self.env.outdim
env = TetrisEnv(10,20) #Tetris
task = TetrisTask(env)
QNet = ActionValueNetwork(10*20+11, 6);
learner = NFQ(); #Q()?
learner._setExplorer(EpsilonGreedyExplorer(0.2,decay=0.99))
agent = LearningAgent(QNet,learner);
experiment = EpisodicExperiment(task,agent)
while True:
experiment.doEpisodes(1)
agent.learn()
agent.reset() #or call more sporadically...?
task.reset()
示例12: ActionValueTable
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
# define action-value table
# number of states is:
#
# current value: 1-21
#
# number of actions:
#
# Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)
# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
while True:
experiment.doInteractions(1)
agent.learn()
agent.reset()
示例13: run
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
#.........这里部分代码省略.........
# pprint (vars(agent.learner))
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
testexperiment = EpisodicExperiment(testtask, testagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
for episode in range(0,m):
# one learning step after one episode of world-interaction
experiment.doEpisodes(parameters["EpisodesPerLearn"])
#agent.learn(1)
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
if (episode) % parameters["TestAfter"] == 0:
#print "Evaluating at episode: ", episode
#experiment.agent = testagent
#r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
#for i in range(0,parameters["TestWith"]):
# y = testexperiment.doEpisodes(1)
# print (agent.learner._allEvaluated)
#
#
# from pprint import pprint
# pprint (vars(task))
l = parameters["TestWith"]
task.N = parameters["MaxRunsPerEpisodeTest"]
experiment.doEpisodes(l)
task.N = parameters["MaxRunsPerEpisode"]
resList = (agent.learner._allEvaluations)[-l:-1]
# print agent.learner._allEvaluations
from scipy import array
rLen = len(resList)
avReward = array(resList).sum()/rLen
# print avReward
# print resList
# exit(0)
# print("Parameters:", agent.learner._bestFound())
# print(
# " Evaluation:", episode,
# " BestReward:", agent.learner.bestEvaluation,
# " AverageReward:", avReward)
# if agent.learner.bestEvaluation == 0:
#
# print resList[-20:-1]
# print "done"
# break
performance.append(avReward)
env.delay = False
testagent.reset()
#experiment.agent = agent
# performance.append(r)
if plot:
plotPerformance(performance, pf_fig)
# print "reward avg", r
# print "explorer epsilon", learner.explorer.epsilon
# print "num episodes", agent.history.getNumSequences()
# print "update step", len(performance)
# print "done"
return performance
#print "network", json.dumps(module.bn.net.E, indent=2)
#import sumatra.parameters as p
#import sys
#parameter_file = sys.argv[1]
#parameters = p.SimpleParameterSet(parameter_file)
#
#
#run(["BalanceTask",parameters])
示例14: run
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
#.........这里部分代码省略.........
agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=True, maxEvaluations=None, verbose=False))
testagent = LearningAgent(module, None)
pgpeexperiment = EpisodicExperiment(task, agent)
randexperiment = EpisodicExperiment(task, bagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
## train pgpe
for episode in range(0,50):
# one learning step after one episode of world-interaction
y =pgpeexperiment.doEpisodes(1)
be, bf = agent.learner._bestFound()
print be,bf
print "generate data"
be.numActions = 1
gdagent = LearningAgent(be, blearner)
experiment = EpisodicExperiment(task, gdagent)
for episode in range(0,1000):
# print episode, " of 1000"
# one learning step after one episode of world-interaction
y =experiment.doEpisodes(1)
# print y
x = randexperiment.doEpisodes(1)
# print len(y[0])
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
l = 5
resList = (agent.learner._allEvaluations)[-l:-1]
# print agent.learner._allEvaluations
from scipy import array
rLen = len(resList)
avReward = array(resList).sum()/rLen
# print avReward
# print resList
# exit(0)
# print("Parameters:", agent.learner._bestFound())
# print(
# " Evaluation:", episode,
# " BestReward:", agent.learner.bestEvaluation,
# " AverageReward:", avReward)
# if agent.learner.bestEvaluation == 0:
#
# print resList[-20:-1]
# print "done"
# break
#print resList
performance.append(avReward)
env.delay = False
testagent.reset()
#experiment.agent = agent
# performance.append(r)
if plot:
plotPerformance(performance, pf_fig)
# print "reward avg", r
# print "explorer epsilon", learner.explorer.epsilon
# print "num episodes", agent.history.getNumSequences()
# print "update step", len(performance)
blearner.add_ds(rlearner.dataset)
blearner.learn()
#blearner.learnX(agent.learner._allEvaluated)
print "done"
return performance
示例15: while
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import reset [as 别名]
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if not render:
pf_fig = plt.figure()
while(True):
# one learning step after one episode of world-interaction
experiment.doEpisodes(1)
agent.learn(1)
# test performance (these real-world experiences are not used for training)
if render:
env.delay = True
experiment.agent = testagent
r = mean([sum(x) for x in experiment.doEpisodes(5)])
env.delay = False
testagent.reset()
experiment.agent = agent
performance.append(r)
if not render:
plotPerformance(performance, pf_fig)
print("reward avg", r)
print("explorer epsilon", learner.explorer.epsilon)
print("num episodes", agent.history.getNumSequences())
print("update step", len(performance))