当前位置: 首页>>代码示例>>Python>>正文


Python LearningAgent.integrateObservation方法代码示例

本文整理汇总了Python中pybrain.rl.agents.LearningAgent.integrateObservation方法的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent.integrateObservation方法的具体用法?Python LearningAgent.integrateObservation怎么用?Python LearningAgent.integrateObservation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybrain.rl.agents.LearningAgent的用法示例。


在下文中一共展示了LearningAgent.integrateObservation方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import integrateObservation [as 别名]
def main():
    # if os.path.exists('./agent.dump'):
    #     with open('./agent.dump') as f:
    #         agent = pickle.load(f)
    # else:
    controller = ActionValueNetwork(9, 4)
    learner = NFQ()
    agent = LearningAgent(controller, learner)

    score_list = []
    for i in range(10000):

        score = play(agent)
        score_list.append(score)

        # ここで,
        #   TypeError: only length-1 arrays can be converted to Python scalars
        #   pybrain/rl/learners/valuebased/q.py
        #   => learnerをQからNFQにしたら行けた.
        #   => http://stackoverflow.com/questions/23755927/pybrain-training-a-actionvaluenetwork-doesnt-properly-work

        #agent.learn()
        agent.reset()

        #data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
        data =[[0,0,2], [0,0,0], [0,0,2]]
        agent.integrateObservation(numpy.array(data).ravel())
        move = agent.getAction()
        print i, int(numpy.mean(score_list)) , max(score_list), move

        with open('./agent.dump', 'w') as f:
            pickle.dump(agent, f)
        with open('./score.dump', 'w') as f:
            pickle.dump(score_list, f)
开发者ID:kokukuma,项目名称:reinforcement_learning_2048,代码行数:36,代码来源:pybrain_rl_simple.py

示例2: Team

# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import integrateObservation [as 别名]
class Team(object):
    def __init__(self, living, task, learner = ENAC()):
        self.living = living
        self.task = task
        self.last_reward = 0
        self.agent = LearningAgent(self.living.brain, learner)
        self.oldparams = self.living.brain.params
    def Interaction(self):
        self.agent.integrateObservation(self.task.getObservation())
        self.task.performAction(self.agent.getAction())
        self.last_reward = self.task.getReward()
        self.agent.giveReward(self.last_reward)
        
        finished = self.task.isFinished()
        if finished:
            #print task.cumreward
            self.agent.newEpisode()
            self.task.reset()
        return self.last_reward, finished
    
    def Learn(self, episodes = 1):    
        self.agent.learn(episodes)
        self.agent.reset()
                        
        newparams = self.living.brain.params.copy() #get_all_weights(eater.brain)[:]
        dif = 0
        j = 0
        for i in newparams:
            dif += (self.oldparams[j] - newparams[j])**2
            j += 1
        self.oldparams = newparams
        return dif
开发者ID:ahirner,项目名称:Autonomous_Agent_Testbed,代码行数:34,代码来源:test_new.py

示例3: main

# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import integrateObservation [as 别名]
def main():

    # 2048の全ての状態を保存するのは無理でしょ.
    #   14^16通りの状態があるよね.
    #controller = ActionValueTable(16, 4)
    #learner = Q()
    #controller.initialize(1.)

    controller = ActionValueNetwork(16, 4)
    learner = NFQ()
    #learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(controller, learner)

    score_list = []
    for i in range(10000):
        # if os.path.exists('./agent.dump'):
        #     with open('./agent.dump') as f:
        #         agent = pickle.load(f)

        print i, 'playing ...'
        score = play(agent)
        score_list.append(score)

        # ここで,
        #   TypeError: only length-1 arrays can be converted to Python scalars
        #   pybrain/rl/learners/valuebased/q.py
        #   => learnerをQからNFQにしたら行けた.
        #   => http://stackoverflow.com/questions/23755927/pybrain-training-a-actionvaluenetwork-doesnt-properly-work
        print i, 'learning ...'
        agent.learn()
        agent.reset()

        print i, 'evaluate sample ...'
        data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
        agent.integrateObservation(numpy.array(data).ravel())
        move = agent.getAction()
        print "                           ",i, int(numpy.mean(score_list)) , max(score_list), move

        if i % 20 == 0:
            print i, 'saving ...'
            with open('./agent.dump', 'w') as f:
                pickle.dump(agent, f)
            with open('./score.dump', 'w') as f:
                pickle.dump(score_list, f)
开发者ID:kokukuma,项目名称:reinforcement_learning_2048,代码行数:46,代码来源:pybrain_rl.py

示例4: ActionValueNetwork

# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import integrateObservation [as 别名]
Program: NFQ_EXAMPLE.PY
Date: Thursday, March  1 2012
Description: Test NFQ on my cartpole simulation.
"""

from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners.valuebased import NFQ, ActionValueNetwork
from cartpole import CartPole
import numpy as np

module = ActionValueNetwork(4,2)
learner = NFQ()
learner.explorer.epsilon = 0.4
agent = LearningAgent(module, learner)

env = CartPole()
cnt = 0
for i in range(1000):
    
    env.reset()
    print "Episode: %d, Count: %d" % (i,cnt)
    cnt = 0
    while not env.failure():
        agent.integrateObservation(env.observation())
        action = agent.getAction()
        pstate, paction, reward, state = env.move(action)
        cnt += 1
        agent.giveReward(reward)
    agent.learn(1)

开发者ID:stober,项目名称:td,代码行数:31,代码来源:nfq_example.py

示例5: ActionValueTable

# 需要导入模块: from pybrain.rl.agents import LearningAgent [as 别名]
# 或者: from pybrain.rl.agents.LearningAgent import integrateObservation [as 别名]
# The parameters of your algorithm
av_table = ActionValueTable(4, 2)
av_table.initialize(0.) # For Action Value Table
learner = Q(0.5, 0.0) # define Q-learning agent
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)



for x in xrange(1,100):
    # The training 
    listxor = random.choice([[0, 0],[0, 1], [1, 0], [1, 1]])
    qstate = listxor[0] + listxor[1]*2
    resultxor = listxor[0]^listxor[1]

    agent.integrateObservation([qstate])
    action = agent.getAction()


    if int(action) == resultxor:
        reward = 1
    else:
        reward = -1

    print "xor(",listxor,") = ", resultxor, " || action = " , action[0], "reward = ", reward    

    agent.giveReward(reward) # 1 for good answer, 0 for bad.
    agent.learn()

print "finished"
开发者ID:BenderV,项目名称:ml-xor,代码行数:32,代码来源:reinforcement_avt.py


注:本文中的pybrain.rl.agents.LearningAgent.integrateObservation方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。