本文整理汇总了Python中pybrain.rl.agents.LearningAgent.giveReward方法的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent.giveReward方法的具体用法?Python LearningAgent.giveReward怎么用?Python LearningAgent.giveReward使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.agents.LearningAgent
的用法示例。
在下文中一共展示了LearningAgent.giveReward方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Team
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import giveReward [as 别名]
class Team(object):
def __init__(self, living, task, learner = ENAC()):
self.living = living
self.task = task
self.last_reward = 0
self.agent = LearningAgent(self.living.brain, learner)
self.oldparams = self.living.brain.params
def Interaction(self):
self.agent.integrateObservation(self.task.getObservation())
self.task.performAction(self.agent.getAction())
self.last_reward = self.task.getReward()
self.agent.giveReward(self.last_reward)
finished = self.task.isFinished()
if finished:
#print task.cumreward
self.agent.newEpisode()
self.task.reset()
return self.last_reward, finished
def Learn(self, episodes = 1):
self.agent.learn(episodes)
self.agent.reset()
newparams = self.living.brain.params.copy() #get_all_weights(eater.brain)[:]
dif = 0
j = 0
for i in newparams:
dif += (self.oldparams[j] - newparams[j])**2
j += 1
self.oldparams = newparams
return dif
示例2: ActionValueNetwork
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import giveReward [as 别名]
Program: NFQ_EXAMPLE.PY
Date: Thursday, March 1 2012
Description: Test NFQ on my cartpole simulation.
"""
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from cartpole import CartPole
import numpy as np
module = ActionValueNetwork(4,2)
learner = NFQ()
learner.explorer.epsilon = 0.4
agent = LearningAgent(module, learner)
env = CartPole()
cnt = 0
for i in range(1000):
env.reset()
print "Episode: %d, Count: %d" % (i,cnt)
cnt = 0
while not env.failure():
agent.integrateObservation(env.observation())
action = agent.getAction()
pstate, paction, reward, state = env.move(action)
cnt += 1
agent.giveReward(reward)
agent.learn(1)
示例3: xrange
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import giveReward [as 别名]
for x in xrange(1,100):
# The training
listxor = random.choice([[0, 0],[0, 1], [1, 0], [1, 1]])
qstate = listxor[0] + listxor[1]*2
resultxor = listxor[0]^listxor[1]
agent.integrateObservation([qstate])
action = agent.getAction()
if int(action) == resultxor:
reward = 1
else:
reward = -1
print "xor(",listxor,") = ", resultxor, " || action = " , action[0], "reward = ", reward
agent.giveReward(reward) # 1 for good answer, 0 for bad.
agent.learn()
print "finished"
# A debugger...
# Test
print "test : "
print "[0, 0] ", agent.learner.module.getMaxAction(0^0) # module.getMaxAction([0, 0])
print "[0, 1] ", agent.learner.module.getMaxAction(0^1)
print "[1, 0] ", agent.learner.module.getMaxAction(1^0)
print "[1, 1] ", agent.learner.module.getMaxAction(1^1)