当前位置: 首页>>代码示例>>Python>>正文


Python Reward_observation_terminal.o方法代码示例

本文整理汇总了Python中rlglue.types.Reward_observation_terminal.o方法的典型用法代码示例。如果您正苦于以下问题:Python Reward_observation_terminal.o方法的具体用法?Python Reward_observation_terminal.o怎么用?Python Reward_observation_terminal.o使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在rlglue.types.Reward_observation_terminal的用法示例。


在下文中一共展示了Reward_observation_terminal.o方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
        def env_step(self,action):
                self.stepCount=self.stepCount+1
                
                if self.whichEpisode % 2 == 0:
                        self.o.intArray=list(range(0,50000))
                        #cheating, might break something
                        self.o.doubleArray=list(range(0,50000))
                        terminal=0
                        if self.stepCount==200:
                                terminal=1
                        ro=Reward_observation_terminal()
                        ro.r=1.0
                        ro.o=self.o
                        ro.terminal=terminal
                        return ro

                self.o.intArray=list(range(0,5))
                #cheating, might break something
                self.o.doubleArray=list(range(0,5))
                terminal=0
                if self.stepCount==5000:
                        terminal=1
                ro=Reward_observation_terminal()
                ro.r=1.0
                ro.o=self.o
                ro.terminal=terminal
                return ro
开发者ID:steckdenis,项目名称:rlglue-py3,代码行数:29,代码来源:test_speed_environment.py

示例2: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
	def env_step(self,action):
		ro=Reward_observation_terminal()
		
		if self.whichEpisode % 2 == 0:
			ro.o=self.emptyObservation
		else:
			ro.o=self.nonEmptyObservation

		return ro	
开发者ID:junzhez,项目名称:rl_glue_python3_codec,代码行数:11,代码来源:test_empty_environment.py

示例3: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):
        episodeOver=0
        theReward=0

        if    thisAction.intArray[0]==0:
            self.currentState=self.currentState-1
        if    thisAction.intArray[0]==1:
            self.currentState=self.currentState+1

        if self.currentState <= 0:
            self.currentState=0
            theReward=-1
            episodeOver=1

        if self.currentState >= 20:
            self.currentState=20
            theReward=1
            episodeOver=1

        theObs=Observation()
        theObs.intArray=[self.currentState]

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=theObs
        returnRO.terminal=episodeOver

        return returnRO
开发者ID:AAHays,项目名称:python-rl,代码行数:30,代码来源:skeleton_environment.py

示例4: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):

        # プレーヤーの移動
        self.player.update(thisAction)
      
        # 移動後のスコア計算
        theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0])
        #print("Reward:%d" %theReward)
        episodeOver = self.field.get_gameover()
        #print("EdgeTracer:episodeOver %03d" %episodeOver)
      
        # フィールドの描画
        self.draw_field()

        returnObs=Observation()
        returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ])
        #scipy.misc.imsave('l_screen.png', img_src)
        #scipy.misc.imsave('r_screen.png', img_afn)

        returnRO=Reward_observation_terminal()
        returnRO.r=theReward
        returnRO.o=returnObs
        returnRO.terminal=episodeOver
 
        return returnRO
开发者ID:hashima,项目名称:DQN_Framework,代码行数:27,代码来源:env.py

示例5: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
	def env_step(self,action):
		ro=Reward_observation_terminal()
		terminal=False

		if self.stepCount < 5:
			self.o.doubleArray=[]
			self.o.charArray=[]
			self.o.intArray=[self.stepCount]
	
			self.stepCount=self.stepCount+1
				
			if self.stepCount==5:
				terminal=True

			ro.r=1.0

		else:
			self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
			self.o.charArray=['g','F','?',' ','&']
			self.o.intArray=[173,-173,2147483647,0,-2147483648]

			ro.r=-2.0

		ro.o=self.o
		ro.terminal=terminal
		return ro	
开发者ID:junzhez,项目名称:rl_glue_python3_codec,代码行数:28,代码来源:test_1_environment.py

示例6: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self, action):
        action = action.intArray

        if len(action) != 3:
            print action, len(action)

        assert len(action) == self.simulationParameterObj.nbrReaches, "Expected " + str(
            self.simulationParameterObj.nbrReaches) + " integer action."

        if not InvasiveUtility.is_action_allowable(action, self.state):
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO

        cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(self.state,
            self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach
        stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches(
            self.state) * self.actionParameterObj.costPerTree

        stateCost = stateCost + InvasiveUtility.get_empty_slots(self.state) * self.actionParameterObj.emptyCost

        costAction = InvasiveUtility.get_budget_cost_actions(action, self.state, self.actionParameterObj)

        if costAction > self.actionParameterObj.budget:
            theObs = Observation()
            InvasiveUtility.is_action_allowable(action, self.state)
            #map(int, results)
            theObs.intArray = [-1]
            returnRO = Reward_observation_terminal()
            returnRO.r = self.Bad_Action_Penalty
            returnRO.o = theObs
            return returnRO

        nextState = simulateNextState(self.state, action, self.simulationParameterObj,
            self.actionParameterObj, self.dispertionTable, self.germinationObj)
        self.state = nextState
        theObs = Observation()
        theObs.intArray = self.state
        returnRO = Reward_observation_terminal()
        returnRO.r = -1 * (costAction + stateCost)
        returnRO.o = theObs
        return returnRO
开发者ID:Wojje,项目名称:Reinforcement-Learning-Competition-2014,代码行数:49,代码来源:InvasiveEnvironment.py

示例7: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self, action):
        state, reward, terminal = self.environment.step(self.get_action(action))

        rot = Reward_observation_terminal()
        rot.r = reward
        rot.o = self.create_observation(state)
        rot.terminal = terminal
        return rot
开发者ID:ProjectGameTheory,项目名称:PyALE,代码行数:10,代码来源:RLGlueEnvironment.py

示例8: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):
        intAction = int(thisAction.intArray[0])
        theReward = self.takeAction(intAction)
        theObs = Observation()
        theObs.intArray = self.getState()

        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = 0

        return returnRO
开发者ID:AAHays,项目名称:python-rl,代码行数:14,代码来源:chain.py

示例9: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        obs, reward = self.takeAction(intAction)

        theObs = obs

        returnRO = Reward_observation_terminal()
        returnRO.r = reward
        returnRO.o = theObs
        returnRO.terminal = mdptetris.isgameover()

        return returnRO
开发者ID:AAHays,项目名称:python-rl,代码行数:14,代码来源:tetris.py

示例10: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):
        intAction = thisAction.intArray[0]
        theReward, episodeOver = self.takeAction(intAction)

        theObs = Observation()
        theObs.doubleArray = self.state.tolist()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(episodeOver)

        return returnRO
开发者ID:AAHays,项目名称:python-rl,代码行数:14,代码来源:bicycle.py

示例11: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self, thisAction):
        # print self.agentRow, self.agentCol
        hitBoundary = self.updatePosition(thisAction.doubleArray[0])

        theObs = Observation()
        theObs.doubleArray = [self.agentRow, self.agentCol]

        returnRO = Reward_observation_terminal()
        returnRO.r = self.calculateReward(hitBoundary)
        returnRO.o = theObs
        returnRO.terminal = self.checkCurrentTerminal()

        return returnRO
开发者ID:tknandu,项目名称:ContinuousMaze,代码行数:15,代码来源:environment.py

示例12: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
	def env_step(self,thisAction):
		intAction = thisAction.intArray[0]
		obs, reward = self.takeAction(intAction)

		theObs = Observation()
		theObs.doubleArray = [obs]
		
		returnRO = Reward_observation_terminal()
		returnRO.r = reward
		returnRO.o = theObs
		returnRO.terminal = 0

		return returnRO
开发者ID:AAHays,项目名称:python-rl,代码行数:15,代码来源:pomdp.py

示例13: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
    def env_step(self,thisAction):
        # validate the action 
        assert len(thisAction.doubleArray)==2,"Expected 4 double actions."
        
        self.takeAction(thisAction.doubleArray)
        
        theObs = Observation()
        theObs.doubleArray = self.getState().tolist()
        
        theReward,terminate = self.getReward()
        returnRO = Reward_observation_terminal()
        returnRO.r = theReward
        returnRO.o = theObs
        returnRO.terminal = int(terminate)

        return returnRO
开发者ID:hughhugh,项目名称:dqn-vrep,代码行数:18,代码来源:env_vrep.py

示例14: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
	def env_step(self,thisAction):
		# Make sure the action is valid 
		assert len(thisAction.intArray)==1,"Expected 1 integer action."
		assert thisAction.intArray[0]>=0, "Expected action to be in [0,3]"
		assert thisAction.intArray[0]<4, "Expected action to be in [0,3]"
		
		self.updatePosition(thisAction.intArray[0])

		theObs=Observation()
		theObs.intArray=[self.calculateFlatState()]

		returnRO=Reward_observation_terminal()
		returnRO.r=self.calculateReward()
		returnRO.o=theObs
		returnRO.terminal=self.checkCurrentTerminal()

		return returnRO
开发者ID:zydeon,项目名称:rl-comp2014,代码行数:19,代码来源:sample_mines_environment.py

示例15: env_step

# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import o [as 别名]
	def env_step(self,thisAction):
		self.screen.fill((0,0,0))
		if self.gameover:
			self.center_msg("""Game Over!\nYour score: %d Press space to continue""" % self.score)
		else:
			if self.paused:
				self.center_msg("Paused")
			else:
				pygame.draw.line(self.screen,
					(255,255,255),
					(self.rlim+1, 0),
					(self.rlim+1, self.height-1))
				self.disp_msg("Next:", (
					self.rlim+cell_size,
					2))
				self.disp_msg("Score: %d\n\nLevel: %d\nLines: %d" % (self.score, self.level, self.lines),(self.rlim+cell_size, cell_size*5))
				self.draw_matrix(self.bground_grid, (0,0))
				self.draw_matrix(self.board, (0,0))
				self.draw_matrix(self.stone,
					(self.stone_x, self.stone_y))
				self.draw_matrix(self.next_stone,
					(cols+1,2))
		pygame.display.update()
			
		for event in pygame.event.get():
			if event.type == pygame.USEREVENT+1:
				self.drop(False)
			elif event.type == pygame.QUIT:
				self.quit()
			elif event.type == pygame.KEYDOWN:
				for key in key_actions:
					if event.key == eval("pygame.K_"+key):
						key_actions[key]()

		episodeOver=0
		theReward=0

		theObs=Observation()
		theObs.intArray=np.zeros(50816)
		
		returnRO=Reward_observation_terminal()
		returnRO.r=theReward
		returnRO.o=theObs
		returnRO.terminal=episodeOver
		
		return returnRO
开发者ID:ProjectRune,项目名称:DQN_Tetris,代码行数:48,代码来源:tetris.py


注:本文中的rlglue.types.Reward_observation_terminal.o方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。