本文整理汇总了Python中pybrain.rl.learners.valuebased.ActionValueTable.initialize方法的典型用法代码示例。如果您正苦于以下问题:Python ActionValueTable.initialize方法的具体用法?Python ActionValueTable.initialize怎么用?Python ActionValueTable.initialize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.learners.valuebased.ActionValueTable
的用法示例。
在下文中一共展示了ActionValueTable.initialize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
class SpadesPlayer:
def __init__(self,game_deck, game_env):
self.gameDeck = game_deck
self.hand = SpadesDeckTest.SpadesDeckTest.draw_hand(self.gameDeck)
self.gamesWon = 0
self.gamesTied = 0
self.av_table = ActionValueTable(4, 1)
self.av_table.initialize(0.0)
self.env = game_env
self.task = SpadesTask.SpadesTask(game_env)
self.agent = None
self.learner = None
def get_value(self):
return self.hand
def play_card(self, cardindex):
print cardindex
retCard = copy.copy(self.hand[cardindex])
self.hand.remove(self.hand[cardindex])
return retCard
def get_new_hand(self):
self.hand = SpadesDeckTest.SpadesDeckTest.draw_hand(self.gameDeck)
示例2: initExperiment
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def initExperiment(alg, optimistic=True):
env = Maze(envmatrix, (7, 7))
# create task
task = MDPMazeTask(env)
# create value table and initialize with ones
table = ActionValueTable(81, 4)
if optimistic:
table.initialize(1.)
else:
table.initialize(0.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = alg()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
agent = LearningAgent(table, learner)
agent.batchMode = False
experiment = Experiment(task, agent)
experiment.allRewards = []
return experiment
示例3: q_learning_table
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def q_learning_table():
controller = ActionValueTable(36, 4)
learner = Q()
controller.initialize(1.)
agent = LearningAgent(controller, learner)
score_list = []
turn_list = []
# neural側のトレーニング分 +100
for i in range(600):
print_state(agent.module.getValue, 'table')
score, turn = play(agent, 'table')
score_list.append(score)
turn_list.append(turn)
agent.learn()
agent.reset()
print i, int(numpy.mean(score_list)) , max(score_list), score, turn
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump([score_list, turn_list], f)
示例4: testMaze
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def testMaze():
# simplified version of the reinforcement learning tutorial example
structure = np.array([[1, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 1, 1, 1, 1]])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(3):
experiment.doInteractions(40)
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(' #'))[structure])
print('Maze map:')
print('\n'.join(''.join(row) for row in maze))
print('Greedy policy:')
print('\n'.join(''.join(row) for row in greedy_policy))
assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
示例5: run_bbox
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def run_bbox(verbose=False):
n_features = n_actions = max_time = -1
if bbox.is_level_loaded():
bbox.reset_level()
else:
bbox.load_level("../levels/train_level.data", verbose=1)
n_features = bbox.get_num_of_features()
n_actions = bbox.get_num_of_actions()
max_time = bbox.get_max_time()
av_table = ActionValueTable(n_features, n_actions)
av_table.initialize(0.2)
print av_table._params
learner = Q(0.5, 0.1)
learner._setExplorer(EpsilonGreedyExplorer(0.4))
agent = LearningAgent(av_table, learner)
environment = GameEnvironment()
task = GameTask(environment)
experiment = Experiment(task, agent)
while environment.finish_flag:
experiment.doInteractions(1)
agent.learn()
bbox.finish(verbose=1)
示例6: IntelligentAgent
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
class IntelligentAgent(Agent, LearningAgent):
"""An agent that learns through a value-based RL algorithm"""
def __init__(self, name, num_states, num_actions, epsilon=0.3, gamma=0.99, alpha=0.95):
self.controller = ActionValueTable(num_states, num_actions)
self.controller.initialize(np.random.rand(num_states * num_actions))
self.learner = Q(gamma=gamma, alpha=alpha)
self.learner.batchMode = False
self.learner.explorer.epsilon = epsilon
LearningAgent.__init__(self, self.controller, self.learner)
Agent.__init__(self, name)
def choose_action(self):
return self.getAction()[0]
示例7: __init__
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def __init__(self, name, clientID, sensorHandle, bodyHandle):
'''
Constructor
'''
self.resetParameters()
controller = ActionValueTable(150, 5) # pyBrain
controller.initialize(1.) # pyBrain
learner = Q() # pyBrain
self.__mind=AgentMind(controller, learner) # with pyBrain
self.__controller=controller
self.__name=name
self.__clientID=clientID # Client ID of the Dummy object
self.__sensorHandle=sensorHandle # Proximity sensor handle of the V-Rep agent
self.__bodyHandle=bodyHandle # BubbleRob body handle
self.__mind.setInput("name", name)
self.__pybrainEnvironment = LocomotionEnvironment()
self.__pybrainTask = LocomotionTask(self.__pybrainEnvironment)
示例8: initExperiment
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def initExperiment(learnalg='Q', history=None, binEdges='10s',
scriptfile='./rlRunExperiment_v2.pl',
resetscript='./rlResetExperiment.pl'):
if binEdges == '10s':
centerBinEdges = centerBinEdges_10s
elif binEdges == '30s':
centerBinEdges = centerBinEdges_30s
elif binEdges == 'lessperturbed':
centerBinEdges = centerBinEdges_10s_lessperturbed
elif binEdges is None:
centerBinEdges = None
else:
raise Exception("No bins for given binEdges setting")
env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
if history is not None:
env.data = history['data']
task = OmnetTask(env, centerBinEdges)
if history is not None:
task.allrewards = history['rewards']
if learnalg == 'Q':
nstates = env.numSensorBins ** env.numSensors
if history is None:
av_table = ActionValueTable(nstates, env.numActions)
av_table.initialize(1.)
else:
av_table = history['av_table']
learner = Q(0.1, 0.9) # alpha, gamma
learner._setExplorer(EpsilonGreedyExplorer(0.05)) # epsilon
elif learnalg == 'NFQ':
av_table = ActionValueNetwork(env.numSensors, env.numActions)
learner = NFQ()
else:
raise Exception("learnalg unknown")
agent = LearningAgent(av_table, learner)
experiment = Experiment(task, agent)
if history is None:
experiment.nruns = 0
else:
experiment.nruns = history['nruns']
return experiment
示例9: maze
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def maze():
# import sys, time
pylab.gray()
pylab.ion()
# The goal appears to be in the upper right
structure = [
"!!!!!!!!!!",
"! ! ! ! !",
"! !! ! ! !",
"! ! !",
"! !!!!!! !",
"! ! ! !",
"! ! !!!! !",
"! !",
"! !!!!! !",
"! ! !",
"!!!!!!!!!!",
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(100):
experiment.doInteractions(100)
agent.learn()
agent.reset()
# 4 actions, 81 locations/states (9x9 grid)
# max(1) gives/plots the biggest objective function value for that square
pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
pylab.draw()
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
示例10: initialize
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def initialize(self, grid):
"""
initializes all the (s,a) pairs with the no-traffic travel time
"""
ActionValueTable.initialize(self, float("-inf")) #not every action is possible from every state
for node, time in grid.all_shortest_path_lengths():
in_edges = grid.grid.in_edges([node])
for edge in in_edges:
for period in xrange(const.PERIODS):
s = task.get_state(g.node_number(edge[0]), period) #state involves node previous to current node
a = g.action(edge)
q = - time - grid.grid.get_edge_data(*edge)["weight"]
self.updateValue(s, a, q)
#Q(s_final, a) for all actions is 0
for p in xrange(const.PERIODS):
s = task.get_state(const.NODES - 1, p)
for a in xrange(const.POSSIBLE_ACTIONS):
self.updateValue(s, a, 0)
示例11: run
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
示例12: testValueBased
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def testValueBased(self):
""" Test value-based learner.
"""
mkt = SmartMarket(self.case)
exp = MarketExperiment([], [], mkt)
for g in self.case.generators:
env = DiscreteMarketEnvironment([g], mkt)
dim_state, num_actions = (10, 10)
exp.tasks.append(ProfitTask(env, dim_state, num_actions))
module = ActionValueTable(dim_state, num_actions)
module.initialize(1.0)
# module = ActionValueNetwork(dimState=1, numActions=4)
learner = SARSA() #Q() QLambda()
# learner.explorer = BoltzmannExplorer() # default is e-greedy.
exp.agents.append(LearningAgent(module, learner))
for _ in range(1000):
exp.doInteractions(24) # interact with the env in batch mode
for agent in exp.agents:
agent.learn()
agent.reset()
示例13: __init__
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
class RL:
def __init__(self):
self.av_table = ActionValueTable(4, 5)
self.av_table.initialize(0.1)
learner = SARSA()
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
env = HASSHEnv()
task = HASSHTask(env)
self.experiment = Experiment(task, self.agent)
def go(self):
global rl_params
rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
self.experiment.doInteractions(1)
self.agent.learn()
示例14: __init__
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def __init__(self):
self.interactionscount = 0
# Define action-value table
controller = ActionValueTable(DerivedConstants.NUM_STATES,
DerivedConstants.NUM_ACTIONS)
controller.initialize(INITIAL_ACTION_VALUE_TABLE_VALUE)
# Define Q-learning agent
learner = Q(ALPHA, GAMMA)
learner._setExplorer(EpsilonGreedyExplorer(EPSILON))
self.agent = LearningAgent(controller, learner)
# Define the environment
self.environment = BeaverEnv()
# Define the task
self.task = BeaverTask(self.environment)
# Finally, define experiment
self.experiment = Experiment(self.task, self.agent)
示例15: explore_maze
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import initialize [as 别名]
def explore_maze():
# simplified version of the reinforcement learning tutorial example
structure = [
list("!!!!!!!!!!"),
list("! ! ! ! !"),
list("! !! ! ! !"),
list("! ! !"),
list("! !!!!!! !"),
list("! ! ! !"),
list("! ! !!!! !"),
list("! !"),
list("! !!!!! !"),
list("! ! !"),
list("!!!!!!!!!!"),
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"