本文整理匯總了Python中pybrain.rl.experiments.Experiment.doInteractions方法的典型用法代碼示例。如果您正苦於以下問題:Python Experiment.doInteractions方法的具體用法?Python Experiment.doInteractions怎麽用?Python Experiment.doInteractions使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pybrain.rl.experiments.Experiment
的用法示例。
在下文中一共展示了Experiment.doInteractions方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: testMaze
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def testMaze():
# simplified version of the reinforcement learning tutorial example
structure = np.array([[1, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 1, 1, 1, 1]])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(3):
experiment.doInteractions(40)
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(' #'))[structure])
print('Maze map:')
print('\n'.join(''.join(row) for row in maze))
print('Greedy policy:')
print('\n'.join(''.join(row) for row in greedy_policy))
assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
示例2: testNet
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def testNet(learner, moduleNet, env, maxPlaneStartDist, stepSize,numAngs,thermRadius):
# Turn off exploration
from pybrain.rl.explorers.discrete.egreedy import EpsilonGreedyExplorer
learner._setExplorer(EpsilonGreedyExplorer(0))
agent = LearningAgent(moduleNet, learner)
# Move the plane back to the start by resetting the environment
env = contEnv.contThermEnvironment(maxPlaneStartDist, stepSize,numAngs,thermRadius)
from simpleThermalTask import SimpThermTask
task = SimpThermTask(env)
from pybrain.rl.experiments import Experiment
experiment = Experiment(task, agent)
# Have the plane move 100 times, and plot the position of the plane (hopefully it moves to the high reward area)
testIter = 100
trainResults = [env.distPlane()]
for i in range(testIter):
experiment.doInteractions(1)
trainResults.append(env.distPlane())
# Plot the training results
import matplotlib.pyplot as plt
plt.figure(1)
plt.plot(trainResults,'o')
plt.ylabel('Distance from center of thermal')
plt.xlabel('Interaction iteration')
plt.title('Test Results for Neural Fitted Q Learner')
plt.show()
示例3: run_bbox
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def run_bbox(verbose=False):
n_features = n_actions = max_time = -1
if bbox.is_level_loaded():
bbox.reset_level()
else:
bbox.load_level("../levels/train_level.data", verbose=1)
n_features = bbox.get_num_of_features()
n_actions = bbox.get_num_of_actions()
max_time = bbox.get_max_time()
av_table = ActionValueTable(n_features, n_actions)
av_table.initialize(0.2)
print av_table._params
learner = Q(0.5, 0.1)
learner._setExplorer(EpsilonGreedyExplorer(0.4))
agent = LearningAgent(av_table, learner)
environment = GameEnvironment()
task = GameTask(environment)
experiment = Experiment(task, agent)
while environment.finish_flag:
experiment.doInteractions(1)
agent.learn()
bbox.finish(verbose=1)
示例4: learn
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def learn(self, number_of_iterations):
learner = Q(0.2, 0.8)
task = CartMovingTask(self.environment)
self.controller = ActionValueTable(
reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
)
self.controller.initialize(1.0)
agent = LearningAgent(self.controller, learner)
experiment = Experiment(task, agent)
for i in range(number_of_iterations):
experiment.doInteractions(1)
agent.learn()
agent.reset()
with open("test.pcl", "w+") as f:
pickle.dump(self.controller, f)
示例5: maze
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def maze():
# import sys, time
pylab.gray()
pylab.ion()
# The goal appears to be in the upper right
structure = [
"!!!!!!!!!!",
"! ! ! ! !",
"! !! ! ! !",
"! ! !",
"! !!!!!! !",
"! ! ! !",
"! ! !!!! !",
"! !",
"! !!!!! !",
"! ! !",
"!!!!!!!!!!",
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(100):
experiment.doInteractions(100)
agent.learn()
agent.reset()
# 4 actions, 81 locations/states (9x9 grid)
# max(1) gives/plots the biggest objective function value for that square
pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
pylab.draw()
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
示例6: run
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
示例7: __init__
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
class RL:
def __init__(self):
self.av_table = ActionValueTable(4, 5)
self.av_table.initialize(0.1)
learner = SARSA()
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
env = HASSHEnv()
task = HASSHTask(env)
self.experiment = Experiment(task, self.agent)
def go(self):
global rl_params
rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
self.experiment.doInteractions(1)
self.agent.learn()
示例8: explore_maze
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
def explore_maze():
# simplified version of the reinforcement learning tutorial example
structure = [
list("!!!!!!!!!!"),
list("! ! ! ! !"),
list("! !! ! ! !"),
list("! ! !"),
list("! !!!!!! !"),
list("! ! ! !"),
list("! ! !!!! !"),
list("! !"),
list("! !!!!! !"),
list("! ! !"),
list("!!!!!!!!!!"),
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"
示例9: PlayYourCardsRight
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
class PlayYourCardsRight(Feature):
def __init__(self, text_to_speech, speech_to_text):
Feature.__init__(self)
# setup AV Table
self.av_table = GameTable(13, 2)
if(self.av_table.loadParameters() == False):
self.av_table.initialize(0.)
# setup a Q-Learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
# setup game interaction
self.game_interaction = GameInteraction(text_to_speech, speech_to_text)
# setup environment
environment = GameEnvironment(self.game_interaction)
# setup task
task = GameTask(environment, self.game_interaction)
# setup experiment
self.experiment = Experiment(task, self.agent)
@property
def is_speaking(self):
return self.game_interaction.is_speaking
def _thread(self, args):
# let's play our cards right!
while not self.is_stop:
self.experiment.doInteractions(1)
self.agent.learn()
self.av_table.saveParameters()
示例10: ActionValueTable
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
# define action-value table
# number of states is:
#
# current value: 1-21
#
# number of actions:
#
# Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)
# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
while True:
experiment.doInteractions(1)
agent.learn()
agent.reset()
示例11: Q
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
)
predTable.initialize(0.)
predLearner = Q(ALPHA, GAMMA)
predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
predAgent = LearningAgent(predTable, predLearner)
predEnv = PredatorEnvironment(world)
predTask = PredatorTask(predEnv)
predExp = Experiment(predTask, predAgent)
try:
for t in xrange(MAX_TIME):
print 't = %d' % t
world.t = t
predExp.doInteractions(1)
predAgent.learn()
print 'Colors vs. Q-table:'
table_print(predTable._params, PredatorInteraction.NSTATES)
print
except KeyboardInterrupt:
pass
finally:
print 'Background: %s' % BKGD_COLOR
print 'Colors vs. Final Q-table:'
table_print(predTable._params, PredatorInteraction.NSTATES)
print
counts = {'ate' : {}, 'poison' : 0, 'death' : 0, 'poisondeath' : 0, 'rejected' : {}}
示例12: PropensityTable
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
table = PropensityTable(payouts.shape[0])
table.initialize(500.0)
#learner = RothErev(experimentation=0.55, recency=0.3)
learner = VariantRothErev(experimentation=0.65, recency=0.3)
learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995)
agent = LearningAgent(table, learner)
experiment = Experiment(task, agent)
epis = int(1e1)
batch = 2
avgRewards = scipy.zeros(epis)
allActions = scipy.zeros(epis * batch)
c = 0
for i in range(epis):
experiment.doInteractions(batch)
avgRewards[i] = scipy.mean(agent.history["reward"])
allActions[c:c + batch] = agent.history["action"].flatten() + 1
agent.learn()
agent.reset()
c += batch
pylab.figure(figsize=(16, 6))
#pylab.plot(avgRewards)
pylab.plot(allActions)
pylab.show()
示例13: Q
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
# controller.initialize(0.)
# learner = Q(0.5, 0.8) # alpha 0.5, gamma 0.8
learner = Q() # default alpha 0.5, gamma 0.99
# learner._setExplorer(EpsilonGreedyExplorer(0.5))
agent = LearningAgent(controller, learner)
task = ChainTask(env)
exp = Experiment(task, agent)
reward = 0
xs = []
ys = []
import matplotlib.pyplot as plt
for i in xrange(5000):
exp.doInteractions(1)
agent.learn()
reward += agent.lastreward
if i%100 == 0:
xs.append(i)
ys.append(reward)
print i
# print learner.laststate, learner.lastaction, learner.lastreward
# print controller.params.reshape(5, 2)
print "TOTAL REWARD:", reward
print ys
示例14: Environment
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
import pickle
import time
# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)
# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)
# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)
# Establish a task
task = InteractTask(world, animat)
brain.validate_net()
experiment = Experiment(task, animat)
while True:
experiment.doInteractions(10000)
animat.learn()
animat.reset()
brain.validate_net()
time.sleep(3)
示例15: __init__
# 需要導入模塊: from pybrain.rl.experiments import Experiment [as 別名]
# 或者: from pybrain.rl.experiments.Experiment import doInteractions [as 別名]
class Player:
def __init__(self):
self.environment = GameEnv()
av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
av_table.initialize(0.) # todo: save & restore agents state
learner = Q()
learner._setExplorer(EpsilonGreedyExplorer())
agent = LearningAgent(av_table, learner)
self.agent = agent
self.task = GameTask(self.environment)
self.experiment = Experiment(self.task, self.agent)
def name(self, index):
self.me = index
[self.opp1, self.opp2] = [i for i in range(3) if i != self.me]
def hand(self, card):
self.environment.reset()
self.environment.setHand(card)
self.environment.setStack(300)
def bet1(self, min):
self.environment.setPhase('bet-1')
self.environment.setMinBet(min)
self.experiment.doInteractions(1)
bet = self.environment.getTranslatedAction()
return bet
def bet1_info(self, bets):
opp1_bet = bets[self.opp1]
opp2_bet = bets[self.opp2]
self.environment.setOpponentsBets(opp1_bet, opp2_bet)
def call1(self, current_bet):
self.environment.setPhase('call-1')
self.environment.setToCall(current_bet)
self.experiment.doInteractions(1)
is_calling = self.environment.getTranslatedAction()
return is_calling
def call1_info(self, in_game):
opp1_in_game = in_game[self.opp1]
opp2_in_game = in_game[self.opp2]
self.environment.setOpponentsFolded(not opp1_in_game, not opp2_in_game)
def bet2(self, min):
self.environment.setPhase('bet-2')
self.environment.setMinBet(min)
self.experiment.doInteractions(1)
bet = self.environment.getTranslatedAction()
return bet
def bet2_info(self, bets):
opp1_bet = bets[self.opp1]
opp2_bet = bets[self.opp2]
self.environment.setOpponentsBets(opp1_bet, opp2_bet)
def call2(self, current_bet):
self.environment.setPhase('call-1')
self.environment.setToCall(current_bet)
self.experiment.doInteractions(1)
is_calling = self.environment.getTranslatedAction()
return is_calling
def call2_info(self, in_game):
opp1_in_game = in_game[self.opp1]
opp2_in_game = in_game[self.opp2]
def showdown(self, hand):
opp1_hand = hand[self.opp1]
opp2_hand = hand[self.opp2]
def result(self, winnings):
my_winnings = winnings[self.me]
opp1_winnings = winnings[self.opp1]
opp2_winnings = winnings[self.opp2]
self.environment.setPhase('results')
self.task.setWinnings(my_winnings)
self.experiment.doInteractions(1)
self.agent.learn()
self.agent.reset()