当前位置: 首页>>代码示例>>Python>>正文


Python Memory.save方法代码示例

本文整理汇总了Python中memory.Memory.save方法的典型用法代码示例。如果您正苦于以下问题:Python Memory.save方法的具体用法?Python Memory.save怎么用?Python Memory.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在memory.Memory的用法示例。


在下文中一共展示了Memory.save方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DQNAgent

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import save [as 别名]

#.........这里部分代码省略.........

        # Second layers
        model.add(Dense(32))
        model.add(Activation('relu'))

        # Thirsday layers
        model.add(Dense(16))
        model.add(Activation('relu'))

        # Output layers, output dimension is the number of the classes in the multi-classifier
        model.add(Dense(self.action_dim))
        model.add(Activation('linear'))

        ## adam is optimization function, loss function is mse
        adam = Adam(lr=self.LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        return model

    def _update_target_model_(self):
        # Copy weights from predictor NN to target network.
        self.target_model.set_weights(self.model.get_weights())

    def decide(self, curstate, testmode=False):
        """ Accepts current state as input and returns action to take """
        # Do not do eps greedy policy for test trials
        if not testmode:
            if (random.random() <= self.eps) or (not self.started_learning):
                return random.randint(0, self.action_dim-1)
        # convert state to a matrix with one row
        s = np.array([self._preprocess_state_(curstate)])
        # Return the action with maximum predicted Q value.
        return np.argmax(self.model.predict(s)[0])

    def observe(self, prevstate, action, reward, curstate, done):
        """ Accepts an observation (s,a,r,s',done) as input, store them in memory buffer for
            experience replay """
        # Normalize both states
        prevstate_normalized = self._preprocess_state_(prevstate)
        curstate_normalized  = self._preprocess_state_(curstate)

        # Save a singe observation into the format of the <curr_state, actin, reward, next_State, done>
        self.memory.save(prevstate_normalized, action, reward, curstate_normalized, done)
        if done:
            # Finished episode, so time to decay epsilon
            self.eps *= self.epsdecay
        if self.steps % self.update_target_freq == 0:
            # Time to update the weights of target network
            self._update_target_model_()
        # Increment step count
        self.steps += 1

    def learn(self):
        # Do not learn if number of observations in buffer is low
        if self.memory.getsize() <= self.minsamples:
            return 0.0
        # Start training
        if not self.started_learning:
            self.started_learning = True
        # Compute a batch of inputs and targets for training the predictor DQN.
        X, y = self._compute_training_batch_()
        # Do one learning step (epoch=1) with the give (X, y)
        history = self.model.fit(X, y, batch_size=self.samplesize, epochs=1, verbose=False)
        # Return the loss of this training step.
        return history.history['loss'][-1]

    def _compute_training_batch_(self):
        # Get a random sample of specified size from the buffer
        s, a, r, s1, done = self.memory.sample(self.samplesize)
        # Convert plain list of states to numpy matrices
        s  = np.array(s)
        s1 = np.array(s1)
        # Get prediction of s with predictor DQN.
        q  = self.model.predict(s)
        # Get prediction of s1 with target DQN if possible or else do with predictor DQN.
        q1 = self.target_model.predict(s1)

        # Input batch X has been computed (s)
        X = s
        # Make space for storing targets.
        y = np.zeros((self.samplesize, self.action_dim))
        # Iterate over each observation in the random sample


        for i in range(self.samplesize):
            reward = r[i]
            action = a[i]
            target = q[i]
            # We can improve only the target for the action in the observation <s,a,r,s'>
            target_for_action = reward
            if not done[i]:
                # if not add to it the discounted future rewards per current policy
                target_for_action += ( self.gamma*max(q1[i]) )
            # this is on e it's the terminal state


            # now store into target library
            target[action] = target_for_action
            # Assign computed target for the observation index = idx
            y[i, :] = target
        return X, y
开发者ID:rding0731,项目名称:rding.github.io,代码行数:104,代码来源:agent.py

示例2: read_user_input

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import save [as 别名]
from memory import Memory


def read_user_input(prompt='> ', end_program='end_program'):
    line_list = []
    line = input(prompt).strip()
    while line != '':
        if line == end_program:
            return end_program
        line_list.append(line)
        line = input(prompt).strip()
    return ' '.join(line_list)


def converse(converse_memory, seed_index='', end_program='end_program'):
    prev_index = seed_index.strip()
    while True:
        utterance, prev_index = converse_memory.construct(prev_index)
        print(utterance)
        user_input = read_user_input(end_program=end_program)
        if user_input == end_program:
            break
        prev_index = converse_memory.remember(user_input, prev_index)
    return prev_index

if __name__ == "__main__":
    converse_memory = Memory()
    converse_memory.read('', '../data/converse_preload.txt')
    prev_index = converse(converse_memory, '')
    converse_memory.save('../data/converse_memory.pkl')
开发者ID:mobatmedia,项目名称:exploration,代码行数:32,代码来源:converse.py


注:本文中的memory.Memory.save方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。