本文整理匯總了Python中pybrain.rl.learners.valuebased.ActionValueTable.getActionValues方法的典型用法代碼示例。如果您正苦於以下問題:Python ActionValueTable.getActionValues方法的具體用法?Python ActionValueTable.getActionValues怎麽用?Python ActionValueTable.getActionValues使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pybrain.rl.learners.valuebased.ActionValueTable
的用法示例。
在下文中一共展示了ActionValueTable.getActionValues方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: run
# 需要導入模塊: from pybrain.rl.learners.valuebased import ActionValueTable [as 別名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import getActionValues [as 別名]
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
示例2: runMainProg
# 需要導入模塊: from pybrain.rl.learners.valuebased import ActionValueTable [as 別名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import getActionValues [as 別名]
def runMainProg():
# define action value table
av_table = ActionValueTable(32, 2)
av_table.initialize(0.)
for i in range (0,32):
print "The AV Value At ",i," is: ", av_table.getActionValues(i)
# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0,0))
agent = LearningAgent(av_table, learner)
#define a blackjack deck
theDeck = BlackjackCardDeck()
#define the environment
env = BlackjackEnv(theDeck)
env.createHand()
#define a Dealer
dealer = BlackjackDealer(theDeck)
#define the task
task = BlackjackTask(env)
#define the experiment
experiment = Experiment(task, agent)
#run the game
for i in range(0,10000):
playGame(dealer, task, env, experiment, agent)
print "Games Agent Won: ", GamesAgentWon
print "Games Dealer won: ", GamesDealerWon
print "Games Tied: ", GamesTied
print "Total Games Played: ", TotalGames
for i in range (0,32):
print "The AV Value At ",i," is: ", av_table.getActionValues(i)