Python Agent.play方法代码示例

本文整理汇总了Python中agent.Agent.play方法的典型用法代码示例。如果您正苦于以下问题：Python Agent.play方法的具体用法？Python Agent.play怎么用？Python Agent.play使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类agent.Agent的用法示例。

在下文中一共展示了Agent.play方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
def run(args):
    logging.basicConfig(filename=args.LOG_FILE, level=logging.DEBUG)
    logging.getLogger().addHandler(logging.StreamHandler())

    game_handler = GameStateHandler(random_seed=123, frame_skip=args.FRAME_SKIP, use_sdl=False,
                                    image_processing=lambda x: crop_and_resize(x, args.IMAGE_HEIGHT, args.IMAGE_WIDTH))
    game_handler.loadROM(args.ROM_FILE)

    height, width = game_handler.getScreenDims()
    logging.info('Screen resolution is %dx%d' % (height, width))
    num_actions = game_handler.num_actions

    net = theano_qnetwork.DeepQNetwork(args.IMAGE_HEIGHT, args.IMAGE_WIDTH, num_actions, args.STATE_FRAMES, args.DISCOUNT_FACTOR)

    replay_memory = ReplayMemoryManager(args.IMAGE_HEIGHT, args.IMAGE_WIDTH, args.STATE_FRAMES, args.REPLAY_MEMORY_SIZE)

    monitor = Monitoring(log_train_step_every=100, smooth_episode_scores_over=50)
    agent = Agent(game_handler, net, replay_memory, None, monitor, args.TRAIN_FREQ, batch_size=args.BATCH_SIZE)

    start_epsilon = args.START_EPSILON
    exploring_duration = args.EXPLORING_DURATION

    agent.populate_replay_memory(args.MIN_REPLAY_MEMORY)
    agent.play(train_steps_limit=args.LEARNING_BEYOND_EXPLORING+args.EXPLORING_DURATION, start_eps=start_epsilon,
               final_eps=args.FINAL_EPSILON, exploring_duration=exploring_duration)

开发者ID:adrianoo，项目名称:rl_atari，代码行数:27，代码来源:theano_luncher.py

示例2: main

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
def main():
    game_width = 12
    game_height = 9
    nb_frames = 4
    actions = ((-1, 0), (1, 0), (0, -1), (0, 1), (0, 0))

    # Recipe of deep reinforcement learning model
    model = Sequential()
    model.add(Convolution2D(
        16,
        nb_row=3,
        nb_col=3,
        activation='relu',
        input_shape=(nb_frames, game_height, game_width)))
    model.add(Convolution2D(32, nb_row=3, nb_col=3, activation='relu'))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(len(actions)))
    model.compile(RMSprop(), 'MSE')

    agent = Agent(
        model, nb_frames, snake_game, actions, size=(game_width, game_height))
    agent.train(nb_epochs=10000, batch_size=64, gamma=0.8, save_model=True)
    agent.play(nb_rounds=10)

开发者ID:wing3s，项目名称:snake_game，代码行数:26，代码来源:run_bot.py

示例3: GymEnvironment

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
antarg.add_argument("--exploration_decay_steps", type=float, default=1000000, help="How many steps to decay the exploration rate.")
antarg.add_argument("--exploration_rate_test", type=float, default=0.05, help="Exploration rate used during testing.")
antarg.add_argument("--train_frequency", type=int, default=4, help="Perform training after this many game steps.")
antarg.add_argument("--train_repeat", type=int, default=1, help="Number of times to sample minibatch during training.")
antarg.add_argument("--random_starts", type=int, default=30, help="Perform max this number of dummy actions after game restart, to produce more random game dynamics.")

mainarg = parser.add_argument_group('Main loop')
mainarg.add_argument("--load_weights", help="Load network from file.")
mainarg.add_argument("--save_weights_prefix", help="Save network to given file. Epoch and extension will be appended.")

comarg = parser.add_argument_group('Common')
comarg.add_argument("output_folder", help="Where to write results to.")
comarg.add_argument("--num_episodes", type=int, default=100, help="Number of episodes to test.")
comarg.add_argument("--random_seed", type=int, help="Random seed for repeatable experiments.")
args = parser.parse_args()

if args.random_seed:
  random.seed(args.random_seed)

env = GymEnvironment(args.env_id, args)
net = DeepQNetwork(env.numActions(), args)
mem = None
agent = Agent(env, mem, net, args)

if args.load_weights:
  print "Loading weights from %s" % args.load_weights
  net.load_weights(args.load_weights)

env.gym.monitor.start(args.output_folder, force=True)
agent.play(args.num_episodes)
env.gym.monitor.close()

开发者ID:briangormanly，项目名称:gym-test1，代码行数:33，代码来源:test_gym.py

示例4: ReplayMemory

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
else:
  assert False, "Unknown environment" + args.environment

mem = ReplayMemory(args.replay_size, args)
net = DeepQNetwork(env.numActions(), args)
agent = Agent(env, mem, net, args)
stats = Statistics(agent, net, mem, env, args)

if args.load_weights:
  logger.info("Loading weights from %s" % args.load_weights)
  net.load_weights(args.load_weights)

if args.play_games:
  logger.info("Playing for %d game(s)" % args.play_games)
  stats.reset()
  agent.play(args.play_games)
  stats.write(0, "play")
  if args.visualization_file:
    from visualization import visualize
    # use states recorded during gameplay. NB! Check buffer size, that it can accomodate one game!
    states = [agent.mem.getState(i) for i in xrange(agent.history_length, agent.mem.current - agent.random_starts)]
    logger.info("Collected %d game states" % len(states))
    import numpy as np
    states = np.array(states)
    states = states / 255.
    visualize(net.model, states, args.visualization_filters, args.visualization_file)
  sys.exit()

if args.random_steps:
  # populate replay memory with random steps
  logger.info("Populating replay memory with %d random moves" % args.random_steps)

开发者ID:Deanout，项目名称:simple_dqn，代码行数:33，代码来源:main.py

示例5: Model

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
from agent import Agent
from environment import Environment
from model import Model
import matplotlib.pyplot as plt

model = Model(batch_size=1024, lr=1e-4, load='model.h5')
env = Environment(env_type=Environment.TYPE_PONG, render=True)
agent = Agent(env=env, model=model)

episode = 0

scores = []
losses = []
qs = []
eps = []
while True:
	episode += 1
	score = agent.play()

	print "#%d score: %d"%(episode, score)

开发者ID:blazer82，项目名称:ai，代码行数:22，代码来源:play.py

示例6: main

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
def main(_=None):
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        d = os.path.dirname(args.log_file)
        if not os.path.exists(d):
            os.makedirs(d)
        if not args.continue_training:
            with open(args.log_file, 'w') as f:
                f.write('')
        logging.basicConfig(filename=args.log_file, level=logging.DEBUG)
        logging.getLogger().addHandler(logging.StreamHandler())

        game_handler = GameStateHandler(
                args.rom_directory + args.rom_name,
                random_seed=args.random_seed,
                frame_skip=args.frame_skip,
                use_sdl=args.use_sdl,
                repeat_action_probability=args.repeat_action_probability,
                minimum_actions=args.minimum_action_set,
                test_mode=args.test_mode,
                image_processing=lambda x: crop_and_resize(x, args.image_height, args.image_width, args.cut_top))
        num_actions = game_handler.num_actions

        if args.optimizer == 'rmsprop':
            optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=args.learning_rate,
                    decay=args.decay,
                    momentum=0.0,
                    epsilon=args.rmsprop_epsilon)

        if not args.multi_gpu:
            if args.double_dqn:
                net = qnetwork.DualDeepQNetwork(args.image_height, args.image_width, sess, num_actions,
                                                args.state_frames, args.discount_factor, args.target_net_refresh_rate,
                                                net_type=args.net_type, optimizer=optimizer)
            else:
                net = qnetwork.DeepQNetwork(args.image_height, args.image_width, sess, num_actions, args.state_frames,
                                            args.discount_factor, net_type=args.net_type, optimizer=optimizer)
        else:
            net = multi_gpu_qnetwork.MultiGPUDualDeepQNetwork(args.image_height, args.image_width, sess, num_actions,
                                                              args.state_frames, args.discount_factor,
                                                              optimizer=optimizer, gpus=[0, 1, 2, 3])

        saver = Saver(sess, args.data_dir, args.continue_training)
        if saver.replay_memory_found():
            replay_memory = saver.get_replay_memory()
        else:
            if args.test_mode:
                logging.error('NO SAVED NETWORKS IN TEST MODE!!!')
            replay_memory = ReplayMemoryManager(args.image_height, args.image_width, args.state_frames,
                                                args.replay_memory_size, reward_clip_min=args.reward_clip_min,
                                                reward_clip_max=args.reward_clip_max)

        # todo: add parameters to handle monitor
        monitor = Monitoring(log_train_step_every=100, smooth_episode_scores_over=50)

        agent = Agent(
                game_handler=game_handler,
                qnetwork=net,
                replay_memory=replay_memory,
                saver=saver,
                monitor=monitor,
                train_freq=args.train_freq,
                test_mode=args.test_mode,
                batch_size=args.batch_size,
                save_every_x_episodes=args.saving_freq)

        sess.run(tf.initialize_all_variables())
        saver.restore(args.data_dir)
        start_epsilon = max(args.final_epsilon,
                            args.start_epsilon - saver.get_start_frame() * (args.start_epsilon - args.final_epsilon) / args.exploration_duration)
        exploring_duration = max(args.exploration_duration - saver.get_start_frame(), 1)

        if args.test_mode:
            agent.populate_replay_memory(args.state_frames, force_early_stop=True)
            agent.play_in_test_mode(args.epsilon_in_test_mode)
        else:
            agent.populate_replay_memory(args.min_replay_memory)
            agent.play(train_steps_limit=args.number_of_train_steps, start_eps=start_epsilon,
                       final_eps=args.final_epsilon, exploring_duration=exploring_duration)

开发者ID:adrianoo，项目名称:rl_atari，代码行数:81，代码来源:tf_luncher.py

示例7: Model

# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import play [as 别名]
from agent import Agent
from environment import Environment
from model import Model

model = Model(batch_size=128, lr=1e-2, load=None)
env = Environment(env_type=Environment.TYPE_CART_POLE, render=False, monitor=False)
agent = Agent(env=env, model=model)

episode = 0
first_reward = 0
while True:
	episode += 1
	reward = agent.learn(overfit=False)

	if first_reward == 0:
		first_reward = reward

	print "Reward delta: %d"%(reward - first_reward)

	if reward >= 200:
		print "SOLVED after %d episodes!"%(episode)
		p = 0
		while reward > 150:
			p += 1
			reward = agent.play()
			print reward

			if p > 120:
				env.close()
				exit()

开发者ID:blazer82，项目名称:ai，代码行数:32，代码来源:learn.py

注：本文中的agent.Agent.play方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。