本文整理汇总了Python中memory.Memory.save方法的典型用法代码示例。如果您正苦于以下问题:Python Memory.save方法的具体用法?Python Memory.save怎么用?Python Memory.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类memory.Memory
的用法示例。
在下文中一共展示了Memory.save方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DQNAgent
# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import save [as 别名]
#.........这里部分代码省略.........
# Second layers
model.add(Dense(32))
model.add(Activation('relu'))
# Thirsday layers
model.add(Dense(16))
model.add(Activation('relu'))
# Output layers, output dimension is the number of the classes in the multi-classifier
model.add(Dense(self.action_dim))
model.add(Activation('linear'))
## adam is optimization function, loss function is mse
adam = Adam(lr=self.LEARNING_RATE)
model.compile(loss='mse',optimizer=adam)
return model
def _update_target_model_(self):
# Copy weights from predictor NN to target network.
self.target_model.set_weights(self.model.get_weights())
def decide(self, curstate, testmode=False):
""" Accepts current state as input and returns action to take """
# Do not do eps greedy policy for test trials
if not testmode:
if (random.random() <= self.eps) or (not self.started_learning):
return random.randint(0, self.action_dim-1)
# convert state to a matrix with one row
s = np.array([self._preprocess_state_(curstate)])
# Return the action with maximum predicted Q value.
return np.argmax(self.model.predict(s)[0])
def observe(self, prevstate, action, reward, curstate, done):
""" Accepts an observation (s,a,r,s',done) as input, store them in memory buffer for
experience replay """
# Normalize both states
prevstate_normalized = self._preprocess_state_(prevstate)
curstate_normalized = self._preprocess_state_(curstate)
# Save a singe observation into the format of the <curr_state, actin, reward, next_State, done>
self.memory.save(prevstate_normalized, action, reward, curstate_normalized, done)
if done:
# Finished episode, so time to decay epsilon
self.eps *= self.epsdecay
if self.steps % self.update_target_freq == 0:
# Time to update the weights of target network
self._update_target_model_()
# Increment step count
self.steps += 1
def learn(self):
# Do not learn if number of observations in buffer is low
if self.memory.getsize() <= self.minsamples:
return 0.0
# Start training
if not self.started_learning:
self.started_learning = True
# Compute a batch of inputs and targets for training the predictor DQN.
X, y = self._compute_training_batch_()
# Do one learning step (epoch=1) with the give (X, y)
history = self.model.fit(X, y, batch_size=self.samplesize, epochs=1, verbose=False)
# Return the loss of this training step.
return history.history['loss'][-1]
def _compute_training_batch_(self):
# Get a random sample of specified size from the buffer
s, a, r, s1, done = self.memory.sample(self.samplesize)
# Convert plain list of states to numpy matrices
s = np.array(s)
s1 = np.array(s1)
# Get prediction of s with predictor DQN.
q = self.model.predict(s)
# Get prediction of s1 with target DQN if possible or else do with predictor DQN.
q1 = self.target_model.predict(s1)
# Input batch X has been computed (s)
X = s
# Make space for storing targets.
y = np.zeros((self.samplesize, self.action_dim))
# Iterate over each observation in the random sample
for i in range(self.samplesize):
reward = r[i]
action = a[i]
target = q[i]
# We can improve only the target for the action in the observation <s,a,r,s'>
target_for_action = reward
if not done[i]:
# if not add to it the discounted future rewards per current policy
target_for_action += ( self.gamma*max(q1[i]) )
# this is on e it's the terminal state
# now store into target library
target[action] = target_for_action
# Assign computed target for the observation index = idx
y[i, :] = target
return X, y
示例2: read_user_input
# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import save [as 别名]
from memory import Memory
def read_user_input(prompt='> ', end_program='end_program'):
line_list = []
line = input(prompt).strip()
while line != '':
if line == end_program:
return end_program
line_list.append(line)
line = input(prompt).strip()
return ' '.join(line_list)
def converse(converse_memory, seed_index='', end_program='end_program'):
prev_index = seed_index.strip()
while True:
utterance, prev_index = converse_memory.construct(prev_index)
print(utterance)
user_input = read_user_input(end_program=end_program)
if user_input == end_program:
break
prev_index = converse_memory.remember(user_input, prev_index)
return prev_index
if __name__ == "__main__":
converse_memory = Memory()
converse_memory.read('', '../data/converse_preload.txt')
prev_index = converse(converse_memory, '')
converse_memory.save('../data/converse_memory.pkl')