本文整理汇总了Python中rlglue.types.Reward_observation_terminal.terminal方法的典型用法代码示例。如果您正苦于以下问题:Python Reward_observation_terminal.terminal方法的具体用法?Python Reward_observation_terminal.terminal怎么用?Python Reward_observation_terminal.terminal使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rlglue.types.Reward_observation_terminal
的用法示例。
在下文中一共展示了Reward_observation_terminal.terminal方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,action):
self.stepCount=self.stepCount+1
if self.whichEpisode % 2 == 0:
self.o.intArray=list(range(0,50000))
#cheating, might break something
self.o.doubleArray=list(range(0,50000))
terminal=0
if self.stepCount==200:
terminal=1
ro=Reward_observation_terminal()
ro.r=1.0
ro.o=self.o
ro.terminal=terminal
return ro
self.o.intArray=list(range(0,5))
#cheating, might break something
self.o.doubleArray=list(range(0,5))
terminal=0
if self.stepCount==5000:
terminal=1
ro=Reward_observation_terminal()
ro.r=1.0
ro.o=self.o
ro.terminal=terminal
return ro
示例2: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,thisAction):
episodeOver=0
theReward=0
if thisAction.intArray[0]==0:
self.currentState=self.currentState-1
if thisAction.intArray[0]==1:
self.currentState=self.currentState+1
if self.currentState <= 0:
self.currentState=0
theReward=-1
episodeOver=1
if self.currentState >= 20:
self.currentState=20
theReward=1
episodeOver=1
theObs=Observation()
theObs.intArray=[self.currentState]
returnRO=Reward_observation_terminal()
returnRO.r=theReward
returnRO.o=theObs
returnRO.terminal=episodeOver
return returnRO
示例3: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,action):
ro=Reward_observation_terminal()
terminal=False
if self.stepCount < 5:
self.o.doubleArray=[]
self.o.charArray=[]
self.o.intArray=[self.stepCount]
self.stepCount=self.stepCount+1
if self.stepCount==5:
terminal=True
ro.r=1.0
else:
self.o.doubleArray=[0.0078125,-0.0078125,0.0,0.0078125e150,-0.0078125e150]
self.o.charArray=['g','F','?',' ','&']
self.o.intArray=[173,-173,2147483647,0,-2147483648]
ro.r=-2.0
ro.o=self.o
ro.terminal=terminal
return ro
示例4: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,thisAction):
# プレーヤーの移動
self.player.update(thisAction)
# 移動後のスコア計算
theReward = self.field.decision(int(self.player.x+0.5), int(self.player.y+0.5), thisAction.intArray[0])
#print("Reward:%d" %theReward)
episodeOver = self.field.get_gameover()
#print("EdgeTracer:episodeOver %03d" %episodeOver)
# フィールドの描画
self.draw_field()
returnObs=Observation()
returnObs.intArray=np.append(np.zeros(128), [ item for innerlist in self.img_state for item in innerlist ])
#scipy.misc.imsave('l_screen.png', img_src)
#scipy.misc.imsave('r_screen.png', img_afn)
returnRO=Reward_observation_terminal()
returnRO.r=theReward
returnRO.o=returnObs
returnRO.terminal=episodeOver
return returnRO
示例5: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self, action):
state, reward, terminal = self.environment.step(self.get_action(action))
rot = Reward_observation_terminal()
rot.r = reward
rot.o = self.create_observation(state)
rot.terminal = terminal
return rot
示例6: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,thisAction):
intAction = thisAction.intArray[0]
theReward, episodeOver = self.takeAction(intAction)
theObs = Observation()
theObs.doubleArray = self.state.tolist()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = int(episodeOver)
return returnRO
示例7: env_step
# 需要导入模块: from rlglue.types import Reward_observation_terminal [as 别名]
# 或者: from rlglue.types.Reward_observation_terminal import terminal [as 别名]
def env_step(self,thisAction):
intAction = int(thisAction.intArray[0])
theReward = self.takeAction(intAction)
theObs = Observation()
theObs.intArray = self.getState()
returnRO = Reward_observation_terminal()
returnRO.r = theReward
returnRO.o = theObs
returnRO.terminal = 0
return returnRO