当前位置: 首页>>代码示例>>Python>>正文


Python Memory.add方法代码示例

本文整理汇总了Python中memory.Memory.add方法的典型用法代码示例。如果您正苦于以下问题:Python Memory.add方法的具体用法?Python Memory.add怎么用?Python Memory.add使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在memory.Memory的用法示例。


在下文中一共展示了Memory.add方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: zip

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import add [as 别名]
        action, q = sess.run([train_actor_output, train_critic_current_action], feed_dict={k: [[v]] for k, v in zip(state_placeholders, env_state)})

        action = action[0]

        action = action if testing else eta_noise.reflected_ou(action * np.array([1, 1, 0, 1]), theta=[.15, .15, .75, .15], sigma=[.10, .10, .10, .10], min=-1, max=1)

        assert action.shape == env.action_space.sample().shape, (action.shape, env.action_space.sample().shape)

        max_xvel = 20
        max_yvel = 8
        max_yawrate = 0.2
        max_altitude = 15
        action = np.clip(action, -1, 1) * np.array([max_xvel, max_yvel, max_yawrate, max_altitude / 4.0]) - np.array([0, 0, 0, max_altitude])

        env_next_state, env_reward, env_done, env_info = env.step(action)
        replay_buffer.add(env_state, env_reward, action, env_done, priority=300)

        env_state = env_next_state

        total_reward += env_reward

        if training:
            states_batch, action_batch, reward_batch, next_states_batch, done_batch, indexes = replay_buffer.sample(BATCH_SIZE, prioritized=True)

            feed = {
                action_placeholder: action_batch,
                reward_placeholder: reward_batch,
                done_placeholder: done_batch
            }

            feed.update({k: v for k, v in zip(state_placeholders, states_batch)})
开发者ID:superjax,项目名称:NNOA,代码行数:33,代码来源:ddpg.py

示例2: __init__

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import add [as 别名]
class DQN:

    def __init__(self, env, params):
        self.env = env
        params.actions = env.actions()
        self.num_actions = env.actions()
        self.episodes = params.episodes
        self.steps = params.steps
        self.train_steps = params.train_steps
        self.update_freq = params.update_freq
        self.save_weights = params.save_weights
        self.history_length = params.history_length
        self.discount = params.discount
        self.eps = params.init_eps
        self.eps_delta = (params.init_eps - params.final_eps) / params.final_eps_frame
        self.replay_start_size = params.replay_start_size
        self.eps_endt = params.final_eps_frame
        self.random_starts = params.random_starts
        self.batch_size = params.batch_size
        self.ckpt_file = params.ckpt_dir+'/'+params.game

        self.global_step = tf.Variable(0, trainable=False)
        if params.lr_anneal:
            self.lr = tf.train.exponential_decay(params.lr, self.global_step, params.lr_anneal, 0.96, staircase=True)
        else:
            self.lr = params.lr

        self.buffer = Buffer(params)
        self.memory = Memory(params.size, self.batch_size)

        with tf.variable_scope("train") as self.train_scope:
            self.train_net = ConvNet(params, trainable=True)
        with tf.variable_scope("target") as self.target_scope:
            self.target_net = ConvNet(params, trainable=False)

        self.optimizer = tf.train.RMSPropOptimizer(self.lr, params.decay_rate, 0.0, self.eps)

        self.actions = tf.placeholder(tf.float32, [None, self.num_actions])
        self.q_target = tf.placeholder(tf.float32, [None])
        self.q_train = tf.reduce_max(tf.mul(self.train_net.y, self.actions), reduction_indices=1)
        self.diff = tf.sub(self.q_target, self.q_train)

        half = tf.constant(0.5)
        if params.clip_delta > 0:
            abs_diff = tf.abs(self.diff)
            clipped_diff = tf.clip_by_value(abs_diff, 0, 1)
            linear_part = abs_diff - clipped_diff
            quadratic_part = tf.square(clipped_diff)
            self.diff_square = tf.mul(half, tf.add(quadratic_part, linear_part))
        else:
            self.diff_square = tf.mul(half, tf.square(self.diff))

        if params.accumulator == 'sum':
            self.loss = tf.reduce_sum(self.diff_square)
        else:
            self.loss = tf.reduce_mean(self.diff_square)

        # backprop with RMS loss
        self.task = self.optimizer.minimize(self.loss, global_step=self.global_step)

    def randomRestart(self):
        self.env.restart()
        for _ in range(self.random_starts):
            action = rand.randrange(self.num_actions)
            reward = self.env.act(action)
            state = self.env.getScreen()
            terminal = self.env.isTerminal()
            self.buffer.add(state)

            if terminal:
                self.env.restart()

    def trainEps(self, train_step):
        if train_step < self.eps_endt:
            return self.eps - train_step * self.eps_delta
        else:
            return self.eps_endt

    def observe(self, exploration_rate):
        if rand.random() < exploration_rate:
            a = rand.randrange(self.num_actions)
        else:
            x = self.buffer.getInput()
            action_values = self.train_net.y.eval( feed_dict={ self.train_net.x: x } )
            a = np.argmax(action_values)
        
        state = self.buffer.getState()
        action = np.zeros(self.num_actions)
        action[a] = 1.0
        reward = self.env.act(a)
        screen = self.env.getScreen()
        self.buffer.add(screen)
        next_state = self.buffer.getState()
        terminal = self.env.isTerminal()

        self.memory.add(state, action, reward, next_state, terminal)
        
        return state, action, reward, next_state, terminal

    def doMinibatch(self, sess, successes, failures):
#.........这里部分代码省略.........
开发者ID:chagge,项目名称:Game-AI,代码行数:103,代码来源:dqn.py

示例3: __init__

# 需要导入模块: from memory import Memory [as 别名]
# 或者: from memory.Memory import add [as 别名]
class Agent:
	def __init__(self, env, model, epsilon=.9, min_epsilon=.1, epsilon_decay=1e-3):
		self.env = env
		self.model = model
		self.epsilon = epsilon
		self.min_epsilon = min_epsilon
		self.epsilon_decay = epsilon_decay
		self.episode = 0
		self.positiveMemory = Memory(model=self.model, episode_max_size=20)
		self.negativeMemory = Memory(model=self.model, episode_max_size=10)

	def play(self):
		terminal = False
		observation = self.env.reset()
		X = np.zeros((2,) + observation.shape)
		X[0] = observation
		X[1] = observation

		total_reward = 0
		while terminal == False and total_reward < 200:
			y = self.model.predict(X)
			action = np.argmax(y)

			observation, reward, terminal, info = self.env.executeAction(action)
			total_reward += reward

			X[0] = X[1]
			X[1] = observation

		return total_reward

	def learn(self, overfit=False, games=1, warmup=0, skip_frames=4):
		self.episode += 1.
		epsilon = max(self.min_epsilon, self.epsilon - self.episode * self.epsilon_decay)

		total_reward = 0
		qs = []
		predictions = None

		if warmup > 0:
			print "Adding %d warmup games"%(warmup)
			games += warmup

		for game in range(1, games + 1):
			print "Game %d/%d..."%(game, games)
			terminal = False
			observation = self.env.reset()
			framebuffer = np.zeros((skip_frames,) + observation.shape)
			framebuffer[-1] = observation
			frame = 0
			action = np.random.randint(0, 2)
			episode = []
			while terminal == False:
				frame += 1

				if frame%skip_frames != 0:
					observation, reward, terminal, info = self.env.executeAction(action)

				if frame%skip_frames == 0 or reward != 0 or terminal:
					X = framebuffer.copy()
					y = self.model.predict(X)
					qs.append(max(y))
					if predictions is None:
						predictions = np.zeros_like(y)
					predictions[np.argmax(y)] += 1

					if frame%skip_frames == 0:
						if np.random.rand() <= epsilon:
							action = np.random.randint(0, len(y))
						else:
							action = np.argmax(y)

						observation, reward, terminal, info = self.env.executeAction(action)

					total_reward += reward

					y[action] = 1. # encourage current action, for now
					episode.append((X, y, action, reward, terminal))

					if reward == 1:
						self.positiveMemory.add(episode, positive=True)
						episode = []
					if reward == -1:
						self.negativeMemory.add(episode, positive=False)
						episode = []

				framebuffer[0:skip_frames-1] = framebuffer[1:]
				framebuffer[-1] = observation

		print "Score %.1f"%(total_reward / games)

		X_pos, y_pos = self.positiveMemory.sample(nbr_positive=(games-warmup)*25)
		X_neg, y_neg = self.negativeMemory.sample(nbr_negative=(games-warmup)*100)

		if not X_pos is None:
			print "Sample %d positive and %d negative memories"%(len(y_pos), len(y_neg))
			X_t = np.concatenate((X_pos, X_neg))
			y_t = np.concatenate((y_pos, y_neg))
		else:
			print "Sample %d negative memories"%(len(y_neg))
#.........这里部分代码省略.........
开发者ID:blazer82,项目名称:ai,代码行数:103,代码来源:agent.py


注:本文中的memory.Memory.add方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。