当前位置: 首页>>代码示例>>Python>>正文


Python tf_util.get_session方法代码示例

本文整理汇总了Python中baselines.common.tf_util.get_session方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.get_session方法的具体用法?Python tf_util.get_session怎么用?Python tf_util.get_session使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在baselines.common.tf_util的用法示例。


在下文中一共展示了tf_util.get_session方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)

        
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)    
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count() 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:24,代码来源:running_mean_std.py

示例2: __init__

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)


        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count() 
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:24,代码来源:running_mean_std.py

示例3: test_nonfreeze

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def test_nonfreeze():
    np.random.seed(0)
    tf.set_random_seed(0)

    a = tf.Variable(np.random.randn(3).astype('float32'))
    b = tf.Variable(np.random.randn(2,5).astype('float32'))
    loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))

    stepsize = 1e-2
    # for some reason the session config with inter_op_parallelism_threads was causing
    # nested sess.run calls to freeze
    config = tf.ConfigProto(inter_op_parallelism_threads=1)
    sess = U.get_session(config=config)
    update_op = MpiAdamOptimizer(comm=MPI.COMM_WORLD, learning_rate=stepsize).minimize(loss)
    sess.run(tf.global_variables_initializer())
    losslist_ref = []
    for i in range(100):
        l,_ = sess.run([loss, update_op])
        print(i, l)
        losslist_ref.append(l) 
开发者ID:openai,项目名称:baselines,代码行数:22,代码来源:mpi_adam_optimizer.py

示例4: add_all_summary

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter) 
开发者ID:Hwhitetooth,项目名称:lirpg,代码行数:14,代码来源:statistics.py

示例5: _serialize_variables

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()    
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)} 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:7,代码来源:test_serialization.py

示例6: _serialize_variables

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)} 
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:7,代码来源:test_serialization.py

示例7: add_all_summary

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the 
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k:v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter) 
开发者ID:wenh123,项目名称:NoisyNet-DQN,代码行数:14,代码来源:statistics.py

示例8: load_visual_foresight

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load_visual_foresight(game_name):
    sess = U.get_session()
    from baselines.deepq.prediction.tfacvp.model import ActionConditionalVideoPredictionModel
    gen_dir = './atari-visual-foresight/'
    model_path = os.path.join(gen_dir, '{}/model.ckpt'.format(game_name))
    mean_path = os.path.join(gen_dir, '{}/mean.npy'.format(game_name))
    game_screen_mean = np.load(mean_path)
    with tf.variable_scope('G'):
        foresight = ActionConditionalVideoPredictionModel(num_act=env.action_space.n, num_channel=1, is_train=False)
        foresight.restore(sess, model_path, 'G')
    return foresight, game_screen_mean 
开发者ID:yenchenlin,项目名称:rl-attack-detection,代码行数:13,代码来源:enjoy.py

示例9: load

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load(path):
    with open(path, "rb") as f:
        model_data= cloudpickle.load(f)
    sess = U.get_session()
    sess.__enter__()
    with tempfile.TemporaryDirectory() as td:
        arc_path = os.path.join(td, "packed.zip")
        with open(arc_path, "wb") as f:
            f.write(model_data)

        zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
        U.load_state(os.path.join(td, "model"))
    #return ActWrapper(act, act_params) 
开发者ID:alexsax,项目名称:midlevel-reps,代码行数:15,代码来源:pposgd_simple.py

示例10: load

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load(path):
    with open(path, "rb") as f:
        model_data = cloudpickle.load(f)
    sess = U.get_session()
    sess.__enter__()
    with tempfile.TemporaryDirectory() as td:
        arc_path = os.path.join(td, "packed.zip")
        with open(arc_path, "wb") as f:
            f.write(model_data)

        zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
        U.load_state(os.path.join(td, "model"))
    # return ActWrapper(act, act_params) 
开发者ID:alexsax,项目名称:midlevel-reps,代码行数:15,代码来源:pposgd_fuse.py

示例11: __init__

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, policy, env, nsteps,
            ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4,
            alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'):

        sess = tf_util.get_session()
        nenvs = env.num_envs
        nbatch = nenvs*nsteps


        with tf.variable_scope('a2c_model', reuse=tf.AUTO_REUSE):
            step_model = policy(nenvs, 1, sess)
            train_model = policy(nbatch, nsteps, sess)

        A = tf.placeholder(train_model.action.dtype, train_model.action.shape)
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        neglogpac = train_model.pd.neglogp(A)
        entropy = tf.reduce_mean(train_model.pd.entropy())

        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = losses.mean_squared_error(tf.squeeze(train_model.vf), R)

        loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("a2c_model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()

            td_map = {train_model.X:obs, A:actions, ADV:advs, R:rewards, LR:cur_lr}
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, policy_entropy, _ = sess.run(
                [pg_loss, vf_loss, entropy, _train],
                td_map
            )
            return policy_loss, value_loss, policy_entropy


        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = functools.partial(tf_util.save_variables, sess=sess)
        self.load = functools.partial(tf_util.load_variables, sess=sess)
        tf.global_variables_initializer().run(session=sess) 
开发者ID:MaxSobolMark,项目名称:HardRLWithYoutube,代码行数:63,代码来源:a2c.py

示例12: play

# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

	# Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

	# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf,
                        env.action_space.n, t)
            )
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"]) 
开发者ID:yenchenlin,项目名称:rl-attack-detection,代码行数:53,代码来源:enjoy.py


注:本文中的baselines.common.tf_util.get_session方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。