本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment.agent方法的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment.agent方法的具体用法?Python EpisodicExperiment.agent怎么用?Python EpisodicExperiment.agent使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.experiments.EpisodicExperiment
的用法示例。
在下文中一共展示了EpisodicExperiment.agent方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: range
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
perform_cumrewards = []
for irehearsal in range(7000):
# Learn.
# ------
r = exp.doEpisodes(1)
# Discounted reward.
cumreward = exp.task.getTotalReward()
# print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (
# cumreward, len(r[0]), exp.agent.learner.learningRate)
if irehearsal % 50 == 0:
# Perform (no learning).
# ----------------------
# Swap out the agent.
exp.agent = performance_agent
# Perform.
r = exp.doEpisodes(1)
perform_cumreward = task.getTotalReward()
perform_cumrewards.append(perform_cumreward)
print "PERFORMANCE: cumreward:", perform_cumreward, "nsteps:", len(r[0])
# Swap back the learning agent.
performance_agent.reset()
exp.agent = agent
ax1.cla()
ax1.plot(perform_cumrewards, ".--")
# Wheel trajectories.
update_wheel_trajectories()
示例2: while
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if not render:
pf_fig = plt.figure()
while(True):
# one learning step after one episode of world-interaction
experiment.doEpisodes(1)
agent.learn(1)
# test performance (these real-world experiences are not used for training)
if render:
env.delay = True
experiment.agent = testagent
r = mean([sum(x) for x in experiment.doEpisodes(5)])
env.delay = False
testagent.reset()
experiment.agent = agent
performance.append(r)
if not render:
plotPerformance(performance, pf_fig)
print("reward avg", r)
print("explorer epsilon", learner.explorer.epsilon)
print("num episodes", agent.history.getNumSequences())
print("update step", len(performance))
示例3: run
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
def run(nao,pad):
# ################################
# choose bottom cam, so nao can see object when standing next to it
nao.camera.selectCam(1)
env = grabbingEnvironment(nao)
#env.connect(nao)
task = grabbingTask(env)
net = buildNetwork(len(task.getObservation()),8, env.indim, bias = True, recurrent=True)
print env.indim
#net = ActionValueNetwork(5,4)
#, outclass=TanhLayer)
#, hiddenclass=TanhLayer, outclass=TanhLayer
# not correct right now..
# TODO: train into RL Modules, dataset needs to be merged with exploration data
#generateTraining.generateTraining().runDeltaMovements(nao,net,env,pad)
#module = ActionValueNetwork(3, 3)
#module = NeuronLayer(40)
#agent = LearningAgent(net, SARSA())
#learner = PolicyGradientLearner()
#learner._setExplorer(StateDependentExplorer(3,3))
#learner._setModule(module)
#agent = LearningAgent(module, learner)
#agent = LearningAgent(net, ENAC())
#agent = LearningAgent(net, Reinforce())
#learner = NFQ()
#learner.explorer.epsilon = 0.4
#agent = LearningAgent(net, learner)
testagent = OptimizationAgent(net,None,env)
#agent = LearningAgent(module, Q())
#agent = LearningAgent(module, QLambda())
learner = grabbingPGPE(storeAllEvaluations = True, verbose = True, epsilon = 1.0, deltamax =5.0, sigmaLearningRate = 0.1, learningRate = 0.2)
agent = OptimizationAgent(net, learner,env)
#agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True, verbose = True))
#agent = OptimizationAgent(net, HillClimber(storeAllEvaluations = True, verbose = True))
#agent = OptimizationAgent(net, RandomSearch(storeAllEvaluations = True, verbose = True))
experiment = EpisodicExperiment(task, agent)
# only for optimizationAgent
#experiment.doOptimization = True
# only for simulator!
nao.fractionMaxSpeed = 1.0
print "#env"
print " sensors:", env.outdim
print " actions:", env.indim
print " discreteStates:", env.discreteStates
print " discreteActions:", env.discreteActions
print
print "#task"
print " sensor_limits:", task.sensor_limits
print " actor_limits:", task.actor_limits
print " epilen: ", task.epiLen
print "#EpisodicTask"
print " discount:", task.discount
print " batchsize:", task.batchSize
print
print "#PGPE"
print " exploration type:", grabbingPGPE().exploration
print " LearningRate:", grabbingPGPE().learningRate
print " sigmaLearningRate:", grabbingPGPE().sigmaLearningRate
print " epsilon:", grabbingPGPE().epsilon
print " wDecay:", grabbingPGPE().wDecay
print " momentum:", grabbingPGPE().momentum
print " rprop:", grabbingPGPE().rprop
# # switch this to True if you want to see the cart balancing the pole (slower)
# render = False
#
# plt.ion()
#
# env = CartPoleEnvironment()
# if render:
# renderer = CartPoleRenderer()
# env.setRenderer(renderer)
# renderer.start()
#
# module = ActionValueNetwork(4, 3)
#.........这里部分代码省略.........