当前位置: 首页>>代码示例>>Python>>正文


Python Memory.getMemory方法代码示例

本文整理汇总了Python中memory.Memory.getMemory方法的典型用法代码示例。如果您正苦于以下问题:Python Memory.getMemory方法的具体用法?Python Memory.getMemory怎么用?Python Memory.getMemory使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在memory.Memory的用法示例。


在下文中一共展示了Memory.getMemory方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import getMemory [as 别名]

#.........这里部分代码省略.........
                action = self.getMaxIndex(qValues)
            else :
                action = self.getMaxIndex(qValues2)
        return action

    def selectActionAverage(self, qValues, qValues2, explorationRate):
        rand = random.random()
        if rand < explorationRate :
            action = np.random.randint(0, self.output_size)
        else :
            avgQValues = []
            for i in range(0, len(qValues)-1):
                value1 = qValues[i]
                value2 = qValues2[i]
                avg = (value1 + value2) / 2.0
                avgQValues.append(avg)
            action = self.getMaxIndex(avgQValues)
        return action

    def selectActionAdded(self, qValues, qValues2, explorationRate):
        rand = random.random()
        if rand < explorationRate :
            action = np.random.randint(0, self.output_size)
        else :
            addedQValues = qValues + qValues2
            action = self.getMaxIndex(addedQValues)
        return action

    def selectActionMostPreferred(self, qValues, qValues2, qValues3, explorationRate):
        rand = random.random()
        if rand < explorationRate :
            action = np.random.randint(0, self.output_size)
        else :
            action1 = self.getMaxIndex(qValues)
            action2 = self.getMaxIndex(qValues2)
            action3 = self.getMaxIndex(qValues3)
            actionsChosen = [0, 0]
            actionsChosen[action1] += 1
            actionsChosen[action2] += 1
            actionsChosen[action3] += 1
            if (actionsChosen[0] > actionsChosen[1]):
                action = 0
            else :
                action = 1
        return action

    def selectActionByProbability(self, qValues, bias):
        qValueSum = 0
        shiftBy = 0
        for value in qValues:
            if value + shiftBy < 0:
                shiftBy = - (value + shiftBy)
        shiftBy += 1e-06

        for value in qValues:
            qValueSum += (value + shiftBy) ** bias

        probabilitySum = 0
        qValueProbabilities = []
        for value in qValues:
            probability = ((value + shiftBy) ** bias) / float(qValueSum)
            qValueProbabilities.append(probability + probabilitySum)
            probabilitySum += probability
        qValueProbabilities[len(qValueProbabilities) - 1] = 1

        rand = random.random()
        i = 0
        for value in qValueProbabilities:
            if (rand <= value):
                return i
            i += 1

    def addMemory(self, state, action, reward, newState, isFinal):
        self.memory.addMemory(state, action, reward, newState, isFinal)

    def learnOnLastState(self):
        if self.memory.getCurrentSize() >= 1:
            return self.memory.getMemory(self.memory.getCurrentSize() - 1)

    def learnOnMiniBatch(self, miniBatchSize, modelNr=0): 
        if self.memory.getCurrentSize() > self.learnStart :
            miniBatch = self.memory.getMiniBatch(miniBatchSize)
            X_batch = np.empty((0,self.input_size), dtype = np.float64)
            Y_batch = np.empty((0,self.output_size), dtype = np.float64)
            for sample in miniBatch:
                isFinal = sample['isFinal']
                state = sample['state']
                action = sample['action']
                reward = sample['reward']
                newState = sample['newState']

                qValues = self.getQValues(state)
                qValuesNewState = self.getQValues(newState)
                targetValue = self.calculateTarget(qValuesNewState, reward, isFinal)

                X_batch = np.append(X_batch, np.array([state]), axis=0)
                Y_sample = qValues.copy()
                Y_sample[action] = targetValue
                Y_batch = np.append(Y_batch, np.array([Y_sample]), axis=0)
            self.models[modelNr].fit(X_batch, Y_batch, batch_size = 1, verbose = 0)
开发者ID:vyraun,项目名称:deep-q-learning,代码行数:104,代码来源:deepq.py


注:本文中的memory.Memory.getMemory方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。