本文整理匯總了Python中rlglue.types.Reward_observation_terminal類的典型用法代碼示例。如果您正苦於以下問題:Python Reward_observation_terminal類的具體用法?Python Reward_observation_terminal怎麽用?Python Reward_observation_terminal使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Reward_observation_terminal類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: env_step
def env_step(self,thisAction):
# プレーヤーの移動
self.player.update(thisAction)
# 移動後のスコア計算
theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0])
#print("Reward:%d" %theReward)
episodeOver = self.field.get_gameover()
#print("EdgeTracer:episodeOver %03d" %episodeOver)
# フィールドの描畫
self.draw_field()
returnObs=Observation()
returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ])
#scipy.misc.imsave('l_screen.png', img_src)
#scipy.misc.imsave('r_screen.png', img_afn)
returnRO=Reward_observation_terminal()
returnRO.r=theReward
returnRO.o=returnObs
returnRO.terminal=episodeOver
return returnRO
示例2: env_step
def env_step(self,thisAction):
episodeOver=0
theReward=0
if thisAction.intArray[0]==0:
self.currentState=self.currentState-1
if thisAction.intArray[0]==1:
self.currentState=self.currentState+1
if self.currentState <= 0:
self.currentState=0
theReward=-1
episodeOver=1
if self.currentState >= 20:
self.currentState=20
theReward=1
episodeOver=1
theObs=Observation()
theObs.intArray=[self.currentState]
returnRO=Reward_observation_terminal()
returnRO.r=theReward
returnRO.o=theObs
returnRO.terminal=episodeOver
return returnRO
示例3: env_step
def env_step(self,action):
ro=Reward_observation_terminal()
terminal=False
if self.stepCount < 5:
self.o.doubleArray=[]
self.o.charArray=[]
self.o.intArray=[self.stepCount]
self.stepCount=self.stepCount+1
if self.stepCount==5:
terminal=True
ro.r=1.0
else:
self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
self.o.charArray=['g','F','?',' ','&']
self.o.intArray=[173,-173,2147483647,0,-2147483648]
ro.r=-2.0
ro.o=self.o
ro.terminal=terminal
return ro
示例4: env_step
def env_step(self, action):
state, reward, terminal = self.environment.step(self.get_action(action))
rot = Reward_observation_terminal()
rot.r = reward
rot.o = self.create_observation(state)
rot.terminal = terminal
return rot
示例5: env_step
def env_step(self,action):
ro=Reward_observation_terminal()
if self.whichEpisode % 2 == 0:
ro.o=self.emptyObservation
else:
ro.o=self.nonEmptyObservation
return ro
示例6: env_step
def env_step(self,thisAction):
intAction = thisAction.intArray[0]
theReward, episodeOver = self.takeAction(intAction)
theObs = Observation()
theObs.doubleArray = self.state.tolist()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = int(episodeOver)
return returnRO
示例7: env_step
def env_step(self,thisAction):
intAction = int(thisAction.intArray[0])
theReward = self.takeAction(intAction)
theObs = Observation()
theObs.intArray = self.getState()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = 0
return returnRO
示例8: env_step
def env_step(self,thisAction):
intAction = thisAction.intArray[0]
obs, reward = self.takeAction(intAction)
theObs = obs
returnRO = Reward_observation_terminal()
returnRO.r = reward
returnRO.o = theObs
returnRO.terminal = mdptetris.isgameover()
return returnRO
示例9: env_step
def env_step(self, thisAction):
# print self.agentRow, self.agentCol
hitBoundary = self.updatePosition(thisAction.doubleArray[0])
theObs = Observation()
theObs.doubleArray = [self.agentRow, self.agentCol]
returnRO = Reward_observation_terminal()
returnRO.r = self.calculateReward(hitBoundary)
returnRO.o = theObs
returnRO.terminal = self.checkCurrentTerminal()
return returnRO
示例10: env_step
def env_step(self,thisAction):
intAction = thisAction.intArray[0]
obs, reward = self.takeAction(intAction)
theObs = Observation()
theObs.doubleArray = [obs]
returnRO = Reward_observation_terminal()
returnRO.r = reward
returnRO.o = theObs
returnRO.terminal = 0
return returnRO
示例11: env_step
def env_step(self,action):
self.stepCount=self.stepCount+1
if self.whichEpisode % 2 == 0:
self.o.intArray=list(range(0,50000))
#cheating, might break something
self.o.doubleArray=list(range(0,50000))
terminal=0
if self.stepCount==200:
terminal=1
ro=Reward_observation_terminal()
ro.r=1.0
ro.o=self.o
ro.terminal=terminal
return ro
self.o.intArray=list(range(0,5))
#cheating, might break something
self.o.doubleArray=list(range(0,5))
terminal=0
if self.stepCount==5000:
terminal=1
ro=Reward_observation_terminal()
ro.r=1.0
ro.o=self.o
ro.terminal=terminal
return ro
示例12: env_step
def env_step(self,thisAction):
# validate the action
assert len(thisAction.doubleArray)==2,"Expected 4 double actions."
self.takeAction(thisAction.doubleArray)
theObs = Observation()
theObs.doubleArray = self.getState().tolist()
theReward,terminate = self.getReward()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = int(terminate)
return returnRO
示例13: env_step
def env_step(self,thisAction):
episodeOver = 0
theReward = -1.0
intAction = thisAction.intArray[0]
theReward = self.takeAction(intAction)
if self.isAtGoal() or (self.fuel_loc is not None and self.fuel) < 0:
episodeOver = 1
theObs = self.makeObservation()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = episodeOver
return returnRO
示例14: env_step
def env_step(self,thisAction):
# Make sure the action is valid
assert len(thisAction.intArray)==1,"Expected 1 integer action."
assert thisAction.intArray[0]>=0, "Expected action to be in [0,3]"
assert thisAction.intArray[0]<4, "Expected action to be in [0,3]"
self.updatePosition(thisAction.intArray[0])
theObs=Observation()
theObs.intArray=[self.calculateFlatState()]
returnRO=Reward_observation_terminal()
returnRO.r=self.calculateReward()
returnRO.o=theObs
returnRO.terminal=self.checkCurrentTerminal()
return returnRO
示例15: env_step
def env_step(self,thisAction):
self.screen.fill((0,0,0))
if self.gameover:
self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score)
else:
if self.paused:
self.center_msg("Paused")
else:
pygame.draw.line(self.screen,
(255,255,255),
(self.rlim+1, 0),
(self.rlim+1, self.height-1))
self.disp_msg("Next:", (
self.rlim+cell_size,
2))
self.disp_msg("Score: %d\n\nLevel: %d\nLines: %d" % (self.score, self.level, self.lines),(self.rlim+cell_size, cell_size*5))
self.draw_matrix(self.bground_grid, (0,0))
self.draw_matrix(self.board, (0,0))
self.draw_matrix(self.stone,
(self.stone_x, self.stone_y))
self.draw_matrix(self.next_stone,
(cols+1,2))
pygame.display.update()
for event in pygame.event.get():
if event.type == pygame.USEREVENT+1:
self.drop(False)
elif event.type == pygame.QUIT:
self.quit()
elif event.type == pygame.KEYDOWN:
for key in key_actions:
if event.key == eval("pygame.K_"+key):
key_actions[key]()
episodeOver=0
theReward=0
theObs=Observation()
theObs.intArray=np.zeros(50816)
returnRO=Reward_observation_terminal()
returnRO.r=theReward
returnRO.o=theObs
returnRO.terminal=episodeOver
return returnRO