当前位置: 首页>>代码示例>>Python>>正文


Python EpisodicExperiment.agent方法代码示例

本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment.agent方法的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment.agent方法的具体用法?Python EpisodicExperiment.agent怎么用?Python EpisodicExperiment.agent使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybrain.rl.experiments.EpisodicExperiment的用法示例。


在下文中一共展示了EpisodicExperiment.agent方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: range

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
perform_cumrewards = []
for irehearsal in range(7000):

    # Learn.
    # ------
    r = exp.doEpisodes(1)
    # Discounted reward.
    cumreward = exp.task.getTotalReward()
    # print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (
    #        cumreward, len(r[0]), exp.agent.learner.learningRate)

    if irehearsal % 50 == 0:
        # Perform (no learning).
        # ----------------------
        # Swap out the agent.
        exp.agent = performance_agent

        # Perform.
        r = exp.doEpisodes(1)
        perform_cumreward = task.getTotalReward()
        perform_cumrewards.append(perform_cumreward)
        print "PERFORMANCE: cumreward:", perform_cumreward, "nsteps:", len(r[0])

        # Swap back the learning agent.
        performance_agent.reset()
        exp.agent = agent

        ax1.cla()
        ax1.plot(perform_cumrewards, ".--")
        # Wheel trajectories.
        update_wheel_trajectories()
开发者ID:avain,项目名称:pybrain,代码行数:33,代码来源:bicycle.py

示例2: while

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
    # Without the next line, the pyplot plot won't actually show up.
    plt.pause(0.001)

performance = []

if not render:
    pf_fig = plt.figure()

while(True):
	# one learning step after one episode of world-interaction
    experiment.doEpisodes(1)
    agent.learn(1)

    # test performance (these real-world experiences are not used for training)
    if render:
        env.delay = True
    experiment.agent = testagent
    r = mean([sum(x) for x in experiment.doEpisodes(5)])
    env.delay = False
    testagent.reset()
    experiment.agent = agent

    performance.append(r)
    if not render:
        plotPerformance(performance, pf_fig)

    print("reward avg", r)
    print("explorer epsilon", learner.explorer.epsilon)
    print("num episodes", agent.history.getNumSequences())
    print("update step", len(performance))
开发者ID:vascobailao,项目名称:PYTHON,代码行数:32,代码来源:NFQ.py

示例3: run

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import agent [as 别名]
def run(nao,pad):





    # ################################
    # choose bottom cam, so nao can see object when standing next to it
    nao.camera.selectCam(1)
    
    env = grabbingEnvironment(nao)
    #env.connect(nao)

    task = grabbingTask(env)

    net = buildNetwork(len(task.getObservation()),8, env.indim, bias = True, recurrent=True)
    print env.indim
    #net = ActionValueNetwork(5,4)
    #, outclass=TanhLayer)
    #, hiddenclass=TanhLayer, outclass=TanhLayer

    # not correct right now..
    # TODO: train into RL Modules, dataset needs to be merged with exploration data
    #generateTraining.generateTraining().runDeltaMovements(nao,net,env,pad)


    #module = ActionValueNetwork(3, 3)
    #module = NeuronLayer(40)

    #agent = LearningAgent(net, SARSA())
    #learner = PolicyGradientLearner()
    #learner._setExplorer(StateDependentExplorer(3,3))
    #learner._setModule(module)
    #agent = LearningAgent(module, learner)
    #agent = LearningAgent(net, ENAC())
    #agent = LearningAgent(net, Reinforce())

    #learner = NFQ()
    #learner.explorer.epsilon = 0.4
    #agent = LearningAgent(net, learner)

    testagent = OptimizationAgent(net,None,env)
    #agent = LearningAgent(module, Q())
    #agent = LearningAgent(module, QLambda())
    learner = grabbingPGPE(storeAllEvaluations = True, verbose = True, epsilon = 1.0, deltamax =5.0, sigmaLearningRate = 0.1, learningRate = 0.2)
    agent = OptimizationAgent(net, learner,env)
    #agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True, verbose = True))
    #agent = OptimizationAgent(net, HillClimber(storeAllEvaluations = True, verbose = True))

    #agent = OptimizationAgent(net, RandomSearch(storeAllEvaluations = True, verbose = True))
    
    experiment = EpisodicExperiment(task, agent)
    # only for optimizationAgent
    #experiment.doOptimization = True

    # only for simulator!
    nao.fractionMaxSpeed = 1.0



    print "#env"
    print "  sensors:", env.outdim
    print "  actions:", env.indim
    print "  discreteStates:", env.discreteStates
    print "  discreteActions:", env.discreteActions
    
    print
    print "#task"
    print "  sensor_limits:", task.sensor_limits
    print "  actor_limits:", task.actor_limits
    print "  epilen: ", task.epiLen
    print "#EpisodicTask"
    print "  discount:", task.discount
    print "  batchsize:", task.batchSize
    

    print
    print "#PGPE"
    print "  exploration type:", grabbingPGPE().exploration
    print "  LearningRate:", grabbingPGPE().learningRate
    print "  sigmaLearningRate:", grabbingPGPE().sigmaLearningRate
    print "  epsilon:", grabbingPGPE().epsilon
    print "  wDecay:", grabbingPGPE().wDecay
    print "  momentum:", grabbingPGPE().momentum
    print "  rprop:", grabbingPGPE().rprop



#    # switch this to True if you want to see the cart balancing the pole (slower)
#    render = False
#
#    plt.ion()
#
#    env = CartPoleEnvironment()
#    if render:
#        renderer = CartPoleRenderer()
#        env.setRenderer(renderer)
#        renderer.start()
#
#    module = ActionValueNetwork(4, 3)
#.........这里部分代码省略.........
开发者ID:c0de2014,项目名称:nao-control,代码行数:103,代码来源:grabbingTest.py


注:本文中的pybrain.rl.experiments.EpisodicExperiment.agent方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。