當前位置: 首頁>>代碼示例>>Python>>正文


Python types.Reward_observation_terminal類代碼示例

本文整理匯總了Python中rlglue.types.Reward_observation_terminal的典型用法代碼示例。如果您正苦於以下問題:Python Reward_observation_terminal類的具體用法?Python Reward_observation_terminal怎麽用?Python Reward_observation_terminal使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了Reward_observation_terminal類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: env_step

    def env_step(self,thisAction):

        # プレーヤーの移動
        self.player.update(thisAction)
      
        # 移動後のスコア計算
        theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0])
        #print("Reward:%d" %theReward)
        episodeOver = self.field.get_gameover()
        #print("EdgeTracer:episodeOver %03d" %episodeOver)
      
        # フィールドの描畫
        self.draw_field()

        returnObs=Observation()
        returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ])
        #scipy.misc.imsave('l_screen.png', img_src)
        #scipy.misc.imsave('r_screen.png', img_afn)

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=returnObs
        returnRO.terminal=episodeOver
 
        return returnRO
開發者ID:hashima,項目名稱:DQN_Framework,代碼行數:25,代碼來源:env.py

示例2: env_step

    def env_step(self,thisAction):
        episodeOver=0
        theReward=0

        if    thisAction.intArray[0]==0:
            self.currentState=self.currentState-1
        if    thisAction.intArray[0]==1:
            self.currentState=self.currentState+1

        if self.currentState <= 0:
            self.currentState=0
            theReward=-1
            episodeOver=1

        if self.currentState >= 20:
            self.currentState=20
            theReward=1
            episodeOver=1

        theObs=Observation()
        theObs.intArray=[self.currentState]

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=theObs
        returnRO.terminal=episodeOver

        return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:28,代碼來源:skeleton_environment.py

示例3: env_step

	def env_step(self,action):
		ro=Reward_observation_terminal()
		terminal=False

		if self.stepCount < 5:
			self.o.doubleArray=[]
			self.o.charArray=[]
			self.o.intArray=[self.stepCount]
	
			self.stepCount=self.stepCount+1
				
			if self.stepCount==5:
				terminal=True

			ro.r=1.0

		else:
			self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
			self.o.charArray=['g','F','?',' ','&']
			self.o.intArray=[173,-173,2147483647,0,-2147483648]

			ro.r=-2.0

		ro.o=self.o
		ro.terminal=terminal
		return ro	
開發者ID:junzhez,項目名稱:rl_glue_python3_codec,代碼行數:26,代碼來源:test_1_environment.py

示例4: env_step

    def env_step(self, action):
        state, reward, terminal = self.environment.step(self.get_action(action))

        rot = Reward_observation_terminal()
        rot.r = reward
        rot.o = self.create_observation(state)
        rot.terminal = terminal
        return rot
開發者ID:ProjectGameTheory,項目名稱:PyALE,代碼行數:8,代碼來源:RLGlueEnvironment.py

示例5: env_step

	def env_step(self,action):
		ro=Reward_observation_terminal()
		
		if self.whichEpisode % 2 == 0:
			ro.o=self.emptyObservation
		else:
			ro.o=self.nonEmptyObservation

		return ro	
開發者ID:junzhez,項目名稱:rl_glue_python3_codec,代碼行數:9,代碼來源:test_empty_environment.py

示例6: env_step

    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        theReward, episodeOver = self.takeAction(intAction)

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(episodeOver)

        return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:12,代碼來源:bicycle.py

示例7: env_step

    def env_step(self,thisAction):
        intAction = int(thisAction.intArray[0])
        theReward = self.takeAction(intAction)
        theObs = Observation()
        theObs.intArray = self.getState()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = 0

        return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:12,代碼來源:chain.py

示例8: env_step

    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        obs, reward = self.takeAction(intAction)

        theObs = obs

        returnRO = Reward_observation_terminal()
        returnRO.r = reward
        returnRO.o = theObs
        returnRO.terminal = mdptetris.isgameover()

        return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:12,代碼來源:tetris.py

示例9: env_step

    def env_step(self, thisAction):
        # print self.agentRow, self.agentCol
        hitBoundary = self.updatePosition(thisAction.doubleArray[0])

        theObs = Observation()
        theObs.doubleArray = [self.agentRow, self.agentCol]

        returnRO = Reward_observation_terminal()
        returnRO.r = self.calculateReward(hitBoundary)
        returnRO.o = theObs
        returnRO.terminal = self.checkCurrentTerminal()

        return returnRO
開發者ID:tknandu,項目名稱:ContinuousMaze,代碼行數:13,代碼來源:environment.py

示例10: env_step

	def env_step(self,thisAction):
		intAction = thisAction.intArray[0]
		obs, reward = self.takeAction(intAction)

		theObs = Observation()
		theObs.doubleArray = [obs]
		
		returnRO = Reward_observation_terminal()
		returnRO.r = reward
		returnRO.o = theObs
		returnRO.terminal = 0

		return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:13,代碼來源:pomdp.py

示例11: env_step

        def env_step(self,action):
                self.stepCount=self.stepCount+1
                
                if self.whichEpisode % 2 == 0:
                        self.o.intArray=list(range(0,50000))
                        #cheating, might break something
                        self.o.doubleArray=list(range(0,50000))
                        terminal=0
                        if self.stepCount==200:
                                terminal=1
                        ro=Reward_observation_terminal()
                        ro.r=1.0
                        ro.o=self.o
                        ro.terminal=terminal
                        return ro

                self.o.intArray=list(range(0,5))
                #cheating, might break something
                self.o.doubleArray=list(range(0,5))
                terminal=0
                if self.stepCount==5000:
                        terminal=1
                ro=Reward_observation_terminal()
                ro.r=1.0
                ro.o=self.o
                ro.terminal=terminal
                return ro
開發者ID:steckdenis,項目名稱:rlglue-py3,代碼行數:27,代碼來源:test_speed_environment.py

示例12: env_step

    def env_step(self,thisAction):
        # validate the action 
        assert len(thisAction.doubleArray)==2,"Expected 4 double actions."
        
        self.takeAction(thisAction.doubleArray)
        
        theObs = Observation()
        theObs.doubleArray = self.getState().tolist()
        
        theReward,terminate = self.getReward()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(terminate)

        return returnRO
開發者ID:hughhugh,項目名稱:dqn-vrep,代碼行數:16,代碼來源:env_vrep.py

示例13: env_step

	def env_step(self,thisAction):
		episodeOver = 0
		theReward = -1.0
		intAction = thisAction.intArray[0]

		theReward = self.takeAction(intAction)

		if self.isAtGoal() or (self.fuel_loc is not None and self.fuel) < 0:
			episodeOver = 1

		theObs = self.makeObservation()
		returnRO = Reward_observation_terminal()
		returnRO.r = theReward
		returnRO.o = theObs
		returnRO.terminal = episodeOver

		return returnRO
開發者ID:AAHays,項目名稱:python-rl,代碼行數:17,代碼來源:taxi.py

示例14: env_step

	def env_step(self,thisAction):
		# Make sure the action is valid 
		assert len(thisAction.intArray)==1,"Expected 1 integer action."
		assert thisAction.intArray[0]>=0, "Expected action to be in [0,3]"
		assert thisAction.intArray[0]<4, "Expected action to be in [0,3]"
		
		self.updatePosition(thisAction.intArray[0])

		theObs=Observation()
		theObs.intArray=[self.calculateFlatState()]

		returnRO=Reward_observation_terminal()
		returnRO.r=self.calculateReward()
		returnRO.o=theObs
		returnRO.terminal=self.checkCurrentTerminal()

		return returnRO
開發者ID:zydeon,項目名稱:rl-comp2014,代碼行數:17,代碼來源:sample_mines_environment.py

示例15: env_step

	def env_step(self,thisAction):
		self.screen.fill((0,0,0))
		if self.gameover:
			self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score)
		else:
			if self.paused:
				self.center_msg("Paused")
			else:
				pygame.draw.line(self.screen,
					(255,255,255),
					(self.rlim+1, 0),
					(self.rlim+1, self.height-1))
				self.disp_msg("Next:", (
					self.rlim+cell_size,
					2))
				self.disp_msg("Score: %d\n\nLevel: %d\nLines: %d" % (self.score, self.level, self.lines),(self.rlim+cell_size, cell_size*5))
				self.draw_matrix(self.bground_grid, (0,0))
				self.draw_matrix(self.board, (0,0))
				self.draw_matrix(self.stone,
					(self.stone_x, self.stone_y))
				self.draw_matrix(self.next_stone,
					(cols+1,2))
		pygame.display.update()
			
		for event in pygame.event.get():
			if event.type == pygame.USEREVENT+1:
				self.drop(False)
			elif event.type == pygame.QUIT:
				self.quit()
			elif event.type == pygame.KEYDOWN:
				for key in key_actions:
					if event.key == eval("pygame.K_"+key):
						key_actions[key]()

		episodeOver=0
		theReward=0

		theObs=Observation()
		theObs.intArray=np.zeros(50816)
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO
開發者ID:ProjectRune,項目名稱:DQN_Tetris,代碼行數:46,代碼來源:tetris.py


注:本文中的rlglue.types.Reward_observation_terminal類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。