当前位置: 首页>>代码示例>>Python>>正文


Python a3c.A3C属性代码示例

本文整理汇总了Python中a3c.A3C属性的典型用法代码示例。如果您正苦于以下问题:Python a3c.A3C属性的具体用法?Python a3c.A3C怎么用?Python a3c.A3C使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在a3c的用法示例。


在下文中一共展示了a3c.A3C属性的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: import a3c [as 别名]
# 或者: from a3c import A3C [as 别名]
def train(args, server):
    
    os.environ['OMP_NUM_THREADS'] = '1'
    set_random_seed(args.task * 17)
    log_dir = os.path.join(args.log_dir, '{}/train'.format(args.env))
    if not tf.gfile.Exists(log_dir):
        tf.gfile.MakeDirs(log_dir)

    game, parameter = new_environment(args.env)
    a3c = A3C(game, log_dir, parameter.get(), agent_index=args.task, callback=None)

    global_vars = [v for v in tf.global_variables() if not v.name.startswith("local")]    
    ready_op = tf.report_uninitialized_variables(global_vars)
    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])

    with tf.Session(target=server.target, config=config) as sess:
        saver = tf.train.Saver()
        path = os.path.join(log_dir, 'log_%d' % args.task)
        writer = tf.summary.FileWriter(delete_dir(path), sess.graph_def)
        a3c.set_summary_writer(writer)
        
        if args.task == 0:
            sess.run(tf.global_variables_initializer())
        else:
            while len(sess.run(ready_op)) > 0:
                print("Waiting for task 0 initializing the global variables.")
                time.sleep(1)
        a3c.run(sess, saver) 
开发者ID:PacktPublishing,项目名称:Python-Reinforcement-Learning-Projects,代码行数:30,代码来源:worker.py

示例2: test

# 需要导入模块: import a3c [as 别名]
# 或者: from a3c import A3C [as 别名]
def test(args, server):
    
    log_dir = os.path.join(args.log_dir, '{}/train'.format(args.env))
    game, parameter = new_environment(name=args.env, test=True)
    a3c = A3C(game, log_dir, parameter.get(), agent_index=args.task, callback=game.draw)
    
    config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)])
    with tf.Session(target=server.target, config=config) as sess:
        saver = tf.train.Saver()
        a3c.load(sess, saver, model_name='best_a3c_model.ckpt')
        a3c.evaluate(sess, n_episode=10, saver=None, verbose=True) 
开发者ID:PacktPublishing,项目名称:Python-Reinforcement-Learning-Projects,代码行数:13,代码来源:test.py

示例3: run_a3c

# 需要导入模块: import a3c [as 别名]
# 或者: from a3c import A3C [as 别名]
def run_a3c(processes, make_env, model_opt, phi, t_max=1, beta=1e-2,
            profile=False, steps=8 * 10 ** 7, eval_frequency=10 ** 6,
            eval_n_runs=10, args={}):

    # Prevent numpy from using multiple threads
    os.environ['OMP_NUM_THREADS'] = '1'

    outdir = prepare_output_dir(args, None)

    print('Output files are saved in {}'.format(outdir))

    n_actions = 20 * 20

    model, opt = model_opt()

    shared_params = async.share_params_as_shared_arrays(model)
    shared_states = async.share_states_as_shared_arrays(opt)

    max_score = mp.Value('f', np.finfo(np.float32).min)
    counter = mp.Value('l', 0)
    start_time = time.time()

    # Write a header line first
    with open(os.path.join(outdir, 'scores.txt'), 'a+') as f:
        column_names = ('steps', 'elapsed', 'mean', 'median', 'stdev')
        print('\t'.join(column_names), file=f)

    def run_func(process_idx):
        env = make_env(process_idx, test=False)
        model, opt = model_opt()
        async.set_shared_params(model, shared_params)
        async.set_shared_states(opt, shared_states)

        agent = a3c.A3C(model, opt, t_max, 0.99, beta=beta,
                        process_idx=process_idx, phi=phi)

        if profile:
            train_loop_with_profile(process_idx, counter, make_env, max_score,
                                    args, agent, env, start_time,
                                    outdir=outdir)
        else:
            train_loop(process_idx, counter, make_env, max_score,
                       args, agent, env, start_time, outdir=outdir)

    async.run_async(processes, run_func) 
开发者ID:muupan,项目名称:async-rl,代码行数:47,代码来源:run_a3c.py

示例4: run

# 需要导入模块: import a3c [as 别名]
# 或者: from a3c import A3C [as 别名]
def run(args):
    env = create_env(args.env_id)
    trainer = A3C(env, None, args.visualise, args.intrinsic_type, args.bptt)

    # Variable names that start with "local" are not saved in checkpoints.
    variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")]
    init_op = tf.variables_initializer(variables_to_save)
    init_all_op = tf.global_variables_initializer()
    saver = FastSaver(variables_to_save)

    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
    logger.info('Trainable vars:')
    for v in var_list:
        logger.info('  %s %s', v.name, v.get_shape())

    def init_fn(ses):
        logger.info("Initializing all parameters.")
        ses.run(init_all_op)

    logdir = os.path.join(args.log_dir, 'train')
    summary_writer = tf.summary.FileWriter(logdir)
    logger.info("Events directory: %s", logdir)

    sv = tf.train.Supervisor(is_chief=True,
                             logdir=logdir,
                             saver=saver,
                             summary_op=None,
                             init_op=init_op,
                             init_fn=init_fn,
                             summary_writer=summary_writer,
                             ready_op=tf.report_uninitialized_variables(variables_to_save),
                             global_step=None,
                             save_model_secs=0,
                             save_summaries_secs=0)

    video_dir = os.path.join(args.log_dir, 'test_videos_' + args.intrinsic_type)
    if not os.path.exists(video_dir):
        os.makedirs(video_dir)
    video_filename = video_dir + "/%s_%02d_%d.gif"
    print("Video saved at %s" % video_dir)

    with sv.managed_session() as sess, sess.as_default():
        trainer.start(sess, summary_writer)
        rewards = []
        lengths = []
        for i in range(10):
            frames, reward, length = trainer.evaluate(sess)
            rewards.append(reward)
            lengths.append(length)
            imageio.mimsave(video_filename % (args.env_id, i, reward), frames, fps=30)

        print('Evaluation: avg. reward %.2f    avg.length %.2f' %
              (sum(rewards) / 10.0, sum(lengths) / 10.0))

    # Ask for all the services to stop.
    sv.stop() 
开发者ID:clvrai,项目名称:FeatureControlHRL-Tensorflow,代码行数:58,代码来源:test.py


注:本文中的a3c.A3C属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。