本文整理汇总了Python中memory.Memory.getMemory方法的典型用法代码示例。如果您正苦于以下问题:Python Memory.getMemory方法的具体用法?Python Memory.getMemory怎么用?Python Memory.getMemory使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类memory.Memory
的用法示例。
在下文中一共展示了Memory.getMemory方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import getMemory [as 别名]
#.........这里部分代码省略.........
action = self.getMaxIndex(qValues)
else :
action = self.getMaxIndex(qValues2)
return action
def selectActionAverage(self, qValues, qValues2, explorationRate):
rand = random.random()
if rand < explorationRate :
action = np.random.randint(0, self.output_size)
else :
avgQValues = []
for i in range(0, len(qValues)-1):
value1 = qValues[i]
value2 = qValues2[i]
avg = (value1 + value2) / 2.0
avgQValues.append(avg)
action = self.getMaxIndex(avgQValues)
return action
def selectActionAdded(self, qValues, qValues2, explorationRate):
rand = random.random()
if rand < explorationRate :
action = np.random.randint(0, self.output_size)
else :
addedQValues = qValues + qValues2
action = self.getMaxIndex(addedQValues)
return action
def selectActionMostPreferred(self, qValues, qValues2, qValues3, explorationRate):
rand = random.random()
if rand < explorationRate :
action = np.random.randint(0, self.output_size)
else :
action1 = self.getMaxIndex(qValues)
action2 = self.getMaxIndex(qValues2)
action3 = self.getMaxIndex(qValues3)
actionsChosen = [0, 0]
actionsChosen[action1] += 1
actionsChosen[action2] += 1
actionsChosen[action3] += 1
if (actionsChosen[0] > actionsChosen[1]):
action = 0
else :
action = 1
return action
def selectActionByProbability(self, qValues, bias):
qValueSum = 0
shiftBy = 0
for value in qValues:
if value + shiftBy < 0:
shiftBy = - (value + shiftBy)
shiftBy += 1e-06
for value in qValues:
qValueSum += (value + shiftBy) ** bias
probabilitySum = 0
qValueProbabilities = []
for value in qValues:
probability = ((value + shiftBy) ** bias) / float(qValueSum)
qValueProbabilities.append(probability + probabilitySum)
probabilitySum += probability
qValueProbabilities[len(qValueProbabilities) - 1] = 1
rand = random.random()
i = 0
for value in qValueProbabilities:
if (rand <= value):
return i
i += 1
def addMemory(self, state, action, reward, newState, isFinal):
self.memory.addMemory(state, action, reward, newState, isFinal)
def learnOnLastState(self):
if self.memory.getCurrentSize() >= 1:
return self.memory.getMemory(self.memory.getCurrentSize() - 1)
def learnOnMiniBatch(self, miniBatchSize, modelNr=0):
if self.memory.getCurrentSize() > self.learnStart :
miniBatch = self.memory.getMiniBatch(miniBatchSize)
X_batch = np.empty((0,self.input_size), dtype = np.float64)
Y_batch = np.empty((0,self.output_size), dtype = np.float64)
for sample in miniBatch:
isFinal = sample['isFinal']
state = sample['state']
action = sample['action']
reward = sample['reward']
newState = sample['newState']
qValues = self.getQValues(state)
qValuesNewState = self.getQValues(newState)
targetValue = self.calculateTarget(qValuesNewState, reward, isFinal)
X_batch = np.append(X_batch, np.array([state]), axis=0)
Y_sample = qValues.copy()
Y_sample[action] = targetValue
Y_batch = np.append(Y_batch, np.array([Y_sample]), axis=0)
self.models[modelNr].fit(X_batch, Y_batch, batch_size = 1, verbose = 0)