Python Experiment.doInteractions方法代码示例

本文整理汇总了Python中pybrain.rl.experiments.Experiment.doInteractions方法的典型用法代码示例。如果您正苦于以下问题：Python Experiment.doInteractions方法的具体用法？Python Experiment.doInteractions怎么用？Python Experiment.doInteractions使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.experiments.Experiment的用法示例。

在下文中一共展示了Experiment.doInteractions方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testMaze

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
def testMaze():
    # simplified version of the reinforcement learning tutorial example
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 0, 0, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1]])
    shape = np.array(structure.shape)
    environment = Maze(structure,  tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(3):
        experiment.doInteractions(40)

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order 
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
    greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'

开发者ID:gabrielhuang，项目名称:pybrain，代码行数:31，代码来源:optimizationtest.py

示例2: testNet

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
def testNet(learner, moduleNet, env, maxPlaneStartDist, stepSize,numAngs,thermRadius):
    # Turn off exploration
    from pybrain.rl.explorers.discrete.egreedy import EpsilonGreedyExplorer
    learner._setExplorer(EpsilonGreedyExplorer(0))
    agent = LearningAgent(moduleNet, learner)      

    # Move the plane back to the start by resetting the environment
    env = contEnv.contThermEnvironment(maxPlaneStartDist, stepSize,numAngs,thermRadius) 
    from simpleThermalTask import SimpThermTask
    task = SimpThermTask(env)
    from pybrain.rl.experiments import Experiment
    experiment = Experiment(task, agent)

    # Have the plane move 100 times, and plot the position of the plane (hopefully it moves to the high reward area)
    testIter = 100
    trainResults = [env.distPlane()]
    for i in range(testIter):
        experiment.doInteractions(1) 
        trainResults.append(env.distPlane())  
        
    # Plot the training results
    import matplotlib.pyplot as plt
    plt.figure(1)
    plt.plot(trainResults,'o')
    plt.ylabel('Distance from center of thermal')
    plt.xlabel('Interaction iteration')
    plt.title('Test Results for Neural Fitted Q Learner')
    plt.show()

开发者ID:ThermalSoaring，项目名称:Machine-Learning-Policy-Formation，代码行数:30，代码来源:main.py

示例3: run_bbox

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
def run_bbox(verbose=False):
    n_features = n_actions = max_time = -1

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    av_table = ActionValueTable(n_features, n_actions)
    av_table.initialize(0.2)
    print av_table._params
    learner = Q(0.5, 0.1)
    learner._setExplorer(EpsilonGreedyExplorer(0.4))
    agent = LearningAgent(av_table, learner)
    environment = GameEnvironment()
    task = GameTask(environment)
    experiment = Experiment(task, agent)

    while environment.finish_flag:
        experiment.doInteractions(1)
        agent.learn()
 
    bbox.finish(verbose=1)

开发者ID:tsvvladimir95，项目名称:simple_bot，代码行数:28，代码来源:bot.py

示例4: learn

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
     )
     self.controller.initialize(1.0)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)

开发者ID:pawel-k，项目名称:pendulum，代码行数:17，代码来源:ReinforcedController.py

示例5: maze

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            "!!!!!!!!!!",
            "! !  ! ! !",
            "! !! ! ! !",
            "!    !   !",
            "! !!!!!! !",
            "! ! !    !",
            "! ! !!!! !",
            "!        !",
            "! !!!!!  !",
            "!   !    !",
            "!!!!!!!!!!",
        ]
        structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.0)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(" #"))[structure])
        print("Maze map:")
        print("\n".join("".join(row) for row in maze))
        print("Greedy policy:")
        print("\n".join("".join(row) for row in greedy_policy))

开发者ID:nvaller，项目名称:pug-ann，代码行数:47，代码来源:example.py

示例6: run

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i+1),
            av_table.getActionValues(i)[0],
            av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
        )

开发者ID:Petlefeu，项目名称:Q_Blackjack，代码行数:45，代码来源:main.py

示例7: init

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
class RL:
    def __init__(self):
	self.av_table = ActionValueTable(4, 5)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

    def go(self):
      global rl_params
      rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
      self.experiment.doInteractions(1)
      self.agent.learn()

开发者ID:savamarius，项目名称:rassh，代码行数:22，代码来源:rl.py

示例8: explore_maze

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
def explore_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list("!!!!!!!!!!"),
        list("! !  ! ! !"),
        list("! !! ! ! !"),
        list("!    !   !"),
        list("! !!!!!! !"),
        list("! ! !    !"),
        list("! ! !!!! !"),
        list("!        !"),
        list("! !!!!!  !"),
        list("!   !    !"),
        list("!!!!!!!!!!"),
    ]
    structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.0)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(" #"))[structure])
    print("Maze map:")
    print("\n".join("".join(row) for row in maze))
    print("Greedy policy:")
    print("\n".join("".join(row) for row in greedy_policy))
    assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"

开发者ID:nvaller，项目名称:pug-ann，代码行数:42，代码来源:example.py

示例9: PlayYourCardsRight

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
class PlayYourCardsRight(Feature):
  
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if(self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)
 
        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)
 
        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)
 
        # setup task
        task = GameTask(environment, self.game_interaction)
 
        # setup experiment
        self.experiment = Experiment(task, self.agent)
    
    @property
    def is_speaking(self):
        return self.game_interaction.is_speaking

    def _thread(self, args):
        # let's play our cards right!
        while not self.is_stop:
            self.experiment.doInteractions(1)
            self.agent.learn()
            self.av_table.saveParameters()

开发者ID:MYMSK4K，项目名称:SaltwashAR，代码行数:39，代码来源:playyourcardsright.py

示例10: ActionValueTable

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
# define action-value table
# number of states is:
#
#    current value: 1-21
#
# number of actions:
#
#    Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)

# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)

# define the environment
env = BlackjackEnv()

# define the task
task = BlackjackTask(env)

# finally, define experiment
experiment = Experiment(task, agent)

# ready to go, start the process
while True:
    experiment.doInteractions(1)
    agent.learn()
    agent.reset()

开发者ID:tsvvladimir95，项目名称:blacvkjack，代码行数:32，代码来源:main.py

示例11: Q

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
  )
  predTable.initialize(0.)

  predLearner = Q(ALPHA, GAMMA)
  predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  predAgent = LearningAgent(predTable, predLearner)

  predEnv = PredatorEnvironment(world)
  predTask = PredatorTask(predEnv)
  predExp = Experiment(predTask, predAgent)

  try:
    for t in xrange(MAX_TIME):
      print 't = %d' % t 
      world.t = t
      predExp.doInteractions(1)
      predAgent.learn()
      print 'Colors vs. Q-table:'
      table_print(predTable._params, PredatorInteraction.NSTATES)
      print

  except KeyboardInterrupt:
    pass

  finally:
    print 'Background: %s' % BKGD_COLOR
    print 'Colors vs. Final Q-table:'
    table_print(predTable._params, PredatorInteraction.NSTATES)
    print

    counts = {'ate' : {}, 'poison' : 0, 'death' : 0, 'poisondeath' : 0, 'rejected' : {}}

开发者ID:ericmarcincuddy，项目名称:cs263c，代码行数:33，代码来源:animats.py

示例12: PropensityTable

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
table = PropensityTable(payouts.shape[0])
table.initialize(500.0)

#learner = RothErev(experimentation=0.55, recency=0.3)
learner = VariantRothErev(experimentation=0.65, recency=0.3)
learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995)

agent = LearningAgent(table, learner)

experiment = Experiment(task, agent)

epis = int(1e1)
batch = 2
avgRewards = scipy.zeros(epis)
allActions = scipy.zeros(epis * batch)
c = 0
for i in range(epis):
    experiment.doInteractions(batch)
    avgRewards[i] = scipy.mean(agent.history["reward"])
    allActions[c:c + batch] = agent.history["action"].flatten() + 1
    agent.learn()
    agent.reset()

    c += batch

pylab.figure(figsize=(16, 6))
#pylab.plot(avgRewards)
pylab.plot(allActions)
pylab.show()

开发者ID:Waqquas，项目名称:pylon，代码行数:31，代码来源:bandit.py

示例13: Q

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
#    controller.initialize(0.)

#    learner = Q(0.5, 0.8) # alpha 0.5, gamma 0.8
    learner = Q() # default alpha 0.5, gamma 0.99
#    learner._setExplorer(EpsilonGreedyExplorer(0.5))
    agent = LearningAgent(controller, learner)

    task = ChainTask(env)
    exp = Experiment(task, agent)

    reward = 0
    xs = []
    ys = []

    import matplotlib.pyplot as plt
    for i in xrange(5000):
        exp.doInteractions(1)
        agent.learn()

        reward += agent.lastreward

        if i%100 == 0:
            xs.append(i)
            ys.append(reward)
            print i
        # print learner.laststate, learner.lastaction, learner.lastreward
#        print controller.params.reshape(5, 2)

    print "TOTAL REWARD:", reward
    print ys

开发者ID:alexandrwang，项目名称:6882project，代码行数:32，代码来源:chain.py

示例14: Environment

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
import pickle
import time

# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)

# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)

# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)

# Establish a task
task = InteractTask(world, animat)

brain.validate_net()
experiment = Experiment(task, animat)
while True:
    experiment.doInteractions(10000)
    animat.learn()
    animat.reset()
    brain.validate_net()
    time.sleep(3)

开发者ID:xjie0403，项目名称:communication-swarm-intelligence，代码行数:32，代码来源:test_stupid.py

示例15: init

# 需要导入模块: from pybrain.rl.experiments import Experiment [as 别名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 别名]
class Player:
    def __init__(self):
        self.environment = GameEnv()

        av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
        av_table.initialize(0.)  # todo: save & restore agents state
        learner = Q()
        learner._setExplorer(EpsilonGreedyExplorer())
        agent = LearningAgent(av_table, learner)

        self.agent = agent
        self.task = GameTask(self.environment)
        self.experiment = Experiment(self.task, self.agent)

    def name(self, index):
        self.me = index
        [self.opp1, self.opp2] = [i for i in range(3) if i != self.me]

    def hand(self, card):
        self.environment.reset()
        self.environment.setHand(card)
        self.environment.setStack(300)

    def bet1(self, min):
        self.environment.setPhase('bet-1')
        self.environment.setMinBet(min)
        self.experiment.doInteractions(1)
        bet = self.environment.getTranslatedAction()
        return bet

    def bet1_info(self, bets):
        opp1_bet = bets[self.opp1]
        opp2_bet = bets[self.opp2]
        self.environment.setOpponentsBets(opp1_bet, opp2_bet)

    def call1(self, current_bet):
        self.environment.setPhase('call-1')
        self.environment.setToCall(current_bet)
        self.experiment.doInteractions(1)
        is_calling = self.environment.getTranslatedAction()
        return is_calling

    def call1_info(self, in_game):
        opp1_in_game = in_game[self.opp1]
        opp2_in_game = in_game[self.opp2]
        self.environment.setOpponentsFolded(not opp1_in_game, not opp2_in_game)

    def bet2(self, min):
        self.environment.setPhase('bet-2')
        self.environment.setMinBet(min)
        self.experiment.doInteractions(1)
        bet = self.environment.getTranslatedAction()
        return bet

    def bet2_info(self, bets):
        opp1_bet = bets[self.opp1]
        opp2_bet = bets[self.opp2]
        self.environment.setOpponentsBets(opp1_bet, opp2_bet)

    def call2(self, current_bet):
        self.environment.setPhase('call-1')
        self.environment.setToCall(current_bet)
        self.experiment.doInteractions(1)
        is_calling = self.environment.getTranslatedAction()
        return is_calling

    def call2_info(self, in_game):
        opp1_in_game = in_game[self.opp1]
        opp2_in_game = in_game[self.opp2]

    def showdown(self, hand):
        opp1_hand = hand[self.opp1]
        opp2_hand = hand[self.opp2]

    def result(self, winnings):
        my_winnings = winnings[self.me]
        opp1_winnings = winnings[self.opp1]
        opp2_winnings = winnings[self.opp2]

        self.environment.setPhase('results')
        self.task.setWinnings(my_winnings)
        self.experiment.doInteractions(1)

        self.agent.learn()
        self.agent.reset()

开发者ID:zmuda，项目名称:iwium，代码行数:87，代码来源:bot.py

注：本文中的pybrain.rl.experiments.Experiment.doInteractions方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。