当前位置: 首页>>代码示例>>Python>>正文


Python agents.LearningAgent类代码示例

本文整理汇总了Python中pybrain.rl.agents.LearningAgent的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent类的具体用法?Python LearningAgent怎么用?Python LearningAgent使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LearningAgent类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

def train():

    # Make the environment
    environment = TwentyFortyEightEnvironment()

    # The task is the game this time
    task = environment

    # Make the reinforcement learning agent (use a network because inputs are continuous)
    network = ActionValueNetwork(task.nSenses, task.nActions)

    # Use Q learning for updating the table (NFQ is for networks)
    learner = NFQ()
    learner.gamma = GAMMA

    agent = LearningAgent(network, learner)

    # Set up an experiment
    experiment = EpisodicExperiment(task, agent)

    # Train the Learner
    meanScores = []
    for i in xrange(LEARNING_EPOCHS):
        experiment.doEpisodes(GAMES_PER_EPOCH)
        print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
        meanScores.append(task.meanScore)
        agent.learn()
        agent.reset()

    params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
    return meanScores, params, agent
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:31,代码来源:RLNFQ.py

示例2: q_learning_table

def q_learning_table():
    controller = ActionValueTable(36, 4)
    learner = Q()
    controller.initialize(1.)

    agent = LearningAgent(controller, learner)

    score_list = []
    turn_list  = []
    # neural側のトレーニング分 +100
    for i in range(600):
        print_state(agent.module.getValue, 'table')

        score, turn = play(agent, 'table')
        score_list.append(score)
        turn_list.append(turn)

        agent.learn()
        agent.reset()

        print i, int(numpy.mean(score_list)) , max(score_list), score, turn

        with open('./agent.dump', 'w') as f:
            pickle.dump(agent, f)
        with open('./score.dump', 'w') as f:
            pickle.dump([score_list, turn_list], f)
开发者ID:kokukuma,项目名称:reinforcement_learning_2048,代码行数:26,代码来源:pybrain_rl_simple2.py

示例3: getAction

	def getAction(self):
		
		#pega acao com Boltzmann ou Q-Learning
		if(self.nextAction == None):
			
			action = LearningAgent.getAction(self)
		
			self.lastaction = action
		
			return action
		else:
			#indicacao do supervisor com tolerancia
			if(self.tolerance != None):
				if( (self.expectedReward * (1 + self.tolerance)) > self.module.getActionValue(self.nextAction)):
				
					action = self.nextAction
					self.lastaction = action
					self.nextAction = None
					return action
				else:
					#acao independente
					action = LearningAgent.getAction(self)
					self.lastaction = action
					return action
			#indicacao do supervisor sem tolerancia
			else:
				action = self.nextAction
				self.lastaction = action
				self.nextAction = None
				return action
开发者ID:pieschtz,项目名称:learning-on-traffic-lights,代码行数:30,代码来源:lowlevelagent.py

示例4: test_maze

def test_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 0, 0, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1]])
    shape = np.array(structure.shape)
    environment = Maze(structure,  tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order 
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
    greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
开发者ID:gabrielhuang,项目名称:pybrain,代码行数:31,代码来源:test_maze.py

示例5: initExperiment

def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment
开发者ID:bgrant,项目名称:portfolio,代码行数:25,代码来源:td.py

示例6: run_bbox

def run_bbox(verbose=False):
    n_features = n_actions = max_time = -1

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    av_table = ActionValueTable(n_features, n_actions)
    av_table.initialize(0.2)
    print av_table._params
    learner = Q(0.5, 0.1)
    learner._setExplorer(EpsilonGreedyExplorer(0.4))
    agent = LearningAgent(av_table, learner)
    environment = GameEnvironment()
    task = GameTask(environment)
    experiment = Experiment(task, agent)

    while environment.finish_flag:
        experiment.doInteractions(1)
        agent.learn()
 
    bbox.finish(verbose=1)
开发者ID:tsvvladimir95,项目名称:simple_bot,代码行数:26,代码来源:bot.py

示例7: __init__

 def __init__(self, name, num_states, num_actions, epsilon=0.3, gamma=0.99, alpha=0.95):
     self.controller = ActionValueTable(num_states, num_actions)
     self.controller.initialize(np.random.rand(num_states * num_actions))
     self.learner = Q(gamma=gamma, alpha=alpha)
     self.learner.batchMode = False
     self.learner.explorer.epsilon = epsilon
     LearningAgent.__init__(self, self.controller, self.learner)
     Agent.__init__(self, name)
开发者ID:phelps-sg,项目名称:py-abm,代码行数:8,代码来源:abm.py

示例8: __init__

	def __init__(self, _id, module, learner=None):
		#define variaveis da class
		self.id = _id
		self.horizontal_edge = lane.getEdgeID(trafficlights.getControlledLanes(self.id)[0])
		self.vertical_edge = lane.getEdgeID(trafficlights.getControlledLanes(str(_id))[2])
		#define variaveis da classe pai
		self.horizontalLoad = []
		self.verticalLoad = []
		self.averageHorizontal = []
		self.averageVertical = []
		self.nextAction = None
		self.expectedReward = None
		self.tolerance = None
		LearningAgent.__init__(self, module, learner)
开发者ID:pieschtz,项目名称:learning-on-traffic-lights,代码行数:14,代码来源:lowlevelagent.py

示例9: learn

def learn(client):
	av_table = ActionValueNetwork(4, 1)

	learner = Reinforce()
	agent = LearningAgent(av_table, learner)

	env = CarEnvironment(client)
	task = CarTask(env)

	experiment = ContinuousExperiment(task, agent)

	while True:
		experiment.doInteractionsAndLearn(1)
		agent.learn()
开发者ID:alongubkin,项目名称:talkingcar,代码行数:14,代码来源:client_.py

示例10: __init__

 def __init__(self, module, learner = None):
     '''
     Constructor
     '''
     LearningAgent.__init__(self, module, learner)
     self.__rules=[]
     self.__states={}
     self.__input={}
     self.__buffer={}
     # self.__rules.append(BackOffRule())
     self.__rules.append(BackOffRule2())
     self.__rules.append(LocomotionPrimitives())
     self.__states["driveBackStartTime"]=AgentMind.__driveBackStartTime
     self.__states["__lostTrackTurnStartTime"]=AgentMind.__lostTrackTurnStartTime
开发者ID:dtbinh,项目名称:Lingadrome,代码行数:14,代码来源:AgentMind.py

示例11: Pause

class QAlgorithm:
  def Pause(self):#if menu says pause pause exicution 
    while self.state == 1:
      time.sleep(.05)
    return True

  def Quit(self):#if menu says quit stop running
    self.process.terminate()
    return False

  def Start(self):#starts the Bot
    if self.process == None:
      self.runBot()
      #self.process = multiprocessing.Process(target=self.runBot, args= [])
      #self.process.start() 
    return True

  def CheckState(self):#checks to see what state the menu says to be in 
    if self.state == 0 :
      self.Start()
    elif self.state == 1:
      self.Pause()
    elif self.state == 2:
      self.Quit()

  def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over
    return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver)

  def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
    self.reward = rewardBox
    self.bbox = box
    self.environment = TEnviroment(box)#Custom environment class
    if os.path.isfile("bot.txt"):
      self.controller  = pickle.load(open("bot.txt","rb")) 
    else:
      self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
    self.learner = Q()
    gf = {0:self.GameOver}
    self.agent = LearningAgent(self.controller, self.learner)
    self.task = TTask(self.environment,scoreArea,gf)#needs custom task
    self.experiment = EpisodicExperiment(self.task, self.agent)
    self.process = None
    self.endBox = endGame

  def runBot(self):#runes the bot for a single Episode
      self.experiment.doEpisodes()
      self.agent.learn()
      self.agent.reset()
      file = open("bot.txt","wb+")
      pickle.dump(self.controller,file)
开发者ID:Diesel9012,项目名称:GameLearningAI,代码行数:50,代码来源:QAlgorithm.py

示例12: learn

 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
     )
     self.controller.initialize(1.0)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)
开发者ID:pawel-k,项目名称:pendulum,代码行数:15,代码来源:ReinforcedController.py

示例13: maze

    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            "!!!!!!!!!!",
            "! !  ! ! !",
            "! !! ! ! !",
            "!    !   !",
            "! !!!!!! !",
            "! ! !    !",
            "! ! !!!! !",
            "!        !",
            "! !!!!!  !",
            "!   !    !",
            "!!!!!!!!!!",
        ]
        structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.0)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(" #"))[structure])
        print("Maze map:")
        print("\n".join("".join(row) for row in maze))
        print("Greedy policy:")
        print("\n".join("".join(row) for row in greedy_policy))
开发者ID:nvaller,项目名称:pug-ann,代码行数:45,代码来源:example.py

示例14: run

def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i+1),
            av_table.getActionValues(i)[0],
            av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
        )
开发者ID:Petlefeu,项目名称:Q_Blackjack,代码行数:43,代码来源:main.py

示例15: __init__

    def __init__(self, x, y, brain, learner, env):
        LearningAgent.__init__(self, brain.net, learner)
        self.cellType = 3
        self.brain = brain
        self.module = brain.net
        self.learner = learner
        self.env = env
        self.color = cell.BLACK
        self.x = x
        self.y = y
        self.num_interactions = 0
        self.age = 0
        self.colddown = 0

        self.speed = self.Speeds[0]
        self.energy = self.MaxEnergy
        self.food_sensor = 0;
        self.hunger_sensor = 0;
        self.target = [-1, -1]
开发者ID:xjie0403,项目名称:communication-swarm-intelligence,代码行数:19,代码来源:stupid_animat.py


注:本文中的pybrain.rl.agents.LearningAgent类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。