本文整理汇总了Python中pybrain.rl.agents.LearningAgent.learning方法的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent.learning方法的具体用法?Python LearningAgent.learning怎么用?Python LearningAgent.learning使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.agents.LearningAgent
的用法示例。
在下文中一共展示了LearningAgent.learning方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: someEpisodes
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import learning [as 别名]
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
""" Return the fitness value for one episode of play, given the policy defined by a neural network. """
task = GameTask(game_env)
game_env.recordingEnabled = True
game_env.reset()
net.reset()
task.maxSteps=maxSteps
agent = LearningAgent(net)
agent.learning = False
agent.logging = False
exper = EpisodicExperiment(task, agent)
fitness = 0
for _ in range(avgOver):
rs = exper.doEpisodes(1)
# add a slight bonus for more exploration, if rewards are identical
fitness += len(set(game_env._allEvents)) * 1e-6
# the true, discounted reward
fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
fitness /= avgOver
if returnEvents:
return fitness, game_env._allEvents
else:
return fitness
示例2: TestEnv
# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import learning [as 别名]
import sys, time
from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv
env = TestEnv()
task = TestTask(env)
controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)
experiment = EpisodicExperiment(task, agent)
i = 0
while True:
experiment.doEpisodes(10)
print "Learning"
agent.learn()
agent.reset()
i += 1
print "Cycle: %d" %i
if i > 60:
agent.learning = False