本文整理汇总了Python中baselines.common.tf_util.get_session方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.get_session方法的具体用法?Python tf_util.get_session怎么用?Python tf_util.get_session使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.tf_util
的用法示例。
在下文中一共展示了tf_util.get_session方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, epsilon=1e-4, shape=(), scope=''):
sess = get_session()
self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
self._new_count = tf.placeholder(shape=(), dtype=tf.float64)
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64)
self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64)
self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)
self.update_ops = tf.group([
self._var.assign(self._new_var),
self._mean.assign(self._new_mean),
self._count.assign(self._new_count)
])
sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
self.sess = sess
self._set_mean_var_count()
示例2: __init__
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, epsilon=1e-4, shape=(), scope=''):
sess = get_session()
self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
self._new_count = tf.placeholder(shape=(), dtype=tf.float64)
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64)
self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64)
self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)
self.update_ops = tf.group([
self._var.assign(self._new_var),
self._mean.assign(self._new_mean),
self._count.assign(self._new_count)
])
sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
self.sess = sess
self._set_mean_var_count()
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:24,代码来源:running_mean_std.py
示例3: test_nonfreeze
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def test_nonfreeze():
np.random.seed(0)
tf.set_random_seed(0)
a = tf.Variable(np.random.randn(3).astype('float32'))
b = tf.Variable(np.random.randn(2,5).astype('float32'))
loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))
stepsize = 1e-2
# for some reason the session config with inter_op_parallelism_threads was causing
# nested sess.run calls to freeze
config = tf.ConfigProto(inter_op_parallelism_threads=1)
sess = U.get_session(config=config)
update_op = MpiAdamOptimizer(comm=MPI.COMM_WORLD, learning_rate=stepsize).minimize(loss)
sess.run(tf.global_variables_initializer())
losslist_ref = []
for i in range(100):
l,_ = sess.run([loss, update_op])
print(i, l)
losslist_ref.append(l)
示例4: add_all_summary
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def add_all_summary(self, writer, values, iter):
# Note that the order of the incoming ```values``` should be the same as the that of the
# ```scalar_keys``` given in ```__init__```
if np.sum(np.isnan(values)+0) != 0:
return
sess = U.get_session()
keys = self.scalar_summaries_ph + self.histogram_summaries_ph
feed_dict = {}
for k, v in zip(keys, values):
feed_dict.update({k: v})
summaries_str = sess.run(self.summaries, feed_dict)
writer.add_summary(summaries_str, iter)
示例5: _serialize_variables
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def _serialize_variables():
sess = get_session()
variables = tf.trainable_variables()
values = sess.run(variables)
return {var.name: value for var, value in zip(variables, values)}
示例6: _serialize_variables
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def _serialize_variables():
sess = get_session()
variables = tf.trainable_variables()
values = sess.run(variables)
return {var.name: value for var, value in zip(variables, values)}
开发者ID:quantumiracle,项目名称:Reinforcement_Learning_for_Traffic_Light_Control,代码行数:7,代码来源:test_serialization.py
示例7: add_all_summary
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def add_all_summary(self, writer, values, iter):
# Note that the order of the incoming ```values``` should be the same as the that of the
# ```scalar_keys``` given in ```__init__```
if np.sum(np.isnan(values)+0) != 0:
return
sess = U.get_session()
keys = self.scalar_summaries_ph + self.histogram_summaries_ph
feed_dict = {}
for k, v in zip(keys, values):
feed_dict.update({k:v})
summaries_str = sess.run(self.summaries, feed_dict)
writer.add_summary(summaries_str, iter)
示例8: load_visual_foresight
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load_visual_foresight(game_name):
sess = U.get_session()
from baselines.deepq.prediction.tfacvp.model import ActionConditionalVideoPredictionModel
gen_dir = './atari-visual-foresight/'
model_path = os.path.join(gen_dir, '{}/model.ckpt'.format(game_name))
mean_path = os.path.join(gen_dir, '{}/mean.npy'.format(game_name))
game_screen_mean = np.load(mean_path)
with tf.variable_scope('G'):
foresight = ActionConditionalVideoPredictionModel(num_act=env.action_space.n, num_channel=1, is_train=False)
foresight.restore(sess, model_path, 'G')
return foresight, game_screen_mean
示例9: load
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load(path):
with open(path, "rb") as f:
model_data= cloudpickle.load(f)
sess = U.get_session()
sess.__enter__()
with tempfile.TemporaryDirectory() as td:
arc_path = os.path.join(td, "packed.zip")
with open(arc_path, "wb") as f:
f.write(model_data)
zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
U.load_state(os.path.join(td, "model"))
#return ActWrapper(act, act_params)
示例10: load
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def load(path):
with open(path, "rb") as f:
model_data = cloudpickle.load(f)
sess = U.get_session()
sess.__enter__()
with tempfile.TemporaryDirectory() as td:
arc_path = os.path.join(td, "packed.zip")
with open(arc_path, "wb") as f:
f.write(model_data)
zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
U.load_state(os.path.join(td, "model"))
# return ActWrapper(act, act_params)
示例11: __init__
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def __init__(self, policy, env, nsteps,
ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4,
alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'):
sess = tf_util.get_session()
nenvs = env.num_envs
nbatch = nenvs*nsteps
with tf.variable_scope('a2c_model', reuse=tf.AUTO_REUSE):
step_model = policy(nenvs, 1, sess)
train_model = policy(nbatch, nsteps, sess)
A = tf.placeholder(train_model.action.dtype, train_model.action.shape)
ADV = tf.placeholder(tf.float32, [nbatch])
R = tf.placeholder(tf.float32, [nbatch])
LR = tf.placeholder(tf.float32, [])
neglogpac = train_model.pd.neglogp(A)
entropy = tf.reduce_mean(train_model.pd.entropy())
pg_loss = tf.reduce_mean(ADV * neglogpac)
vf_loss = losses.mean_squared_error(tf.squeeze(train_model.vf), R)
loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef
params = find_trainable_variables("a2c_model")
grads = tf.gradients(loss, params)
if max_grad_norm is not None:
grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
grads = list(zip(grads, params))
trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon)
_train = trainer.apply_gradients(grads)
lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)
def train(obs, states, rewards, masks, actions, values):
advs = rewards - values
for step in range(len(obs)):
cur_lr = lr.value()
td_map = {train_model.X:obs, A:actions, ADV:advs, R:rewards, LR:cur_lr}
if states is not None:
td_map[train_model.S] = states
td_map[train_model.M] = masks
policy_loss, value_loss, policy_entropy, _ = sess.run(
[pg_loss, vf_loss, entropy, _train],
td_map
)
return policy_loss, value_loss, policy_entropy
self.train = train
self.train_model = train_model
self.step_model = step_model
self.step = step_model.step
self.value = step_model.value
self.initial_state = step_model.initial_state
self.save = functools.partial(tf_util.save_variables, sess=sess)
self.load = functools.partial(tf_util.load_variables, sess=sess)
tf.global_variables_initializer().run(session=sess)
示例12: play
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import get_session [as 别名]
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense):
if defense == 'foresight':
vf, game_screen_mean = load_visual_foresight(game_name)
pred_obs = deque(maxlen=4)
num_episodes = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
t = 0
obs = env.reset()
while True:
#env.unwrapped.render()
video_recorder.capture_frame()
# Attack
if craft_adv_obs != None:
# Craft adv. examples
adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0]
action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
else:
# Normal
action = act(np.array(obs)[None], stochastic=stochastic)[0]
# Defense
if t > 4 and defense == 'foresight':
pred_obs.append(
foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf,
env.action_space.n, t)
)
if len(pred_obs) == 4:
action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0]
old_obs = obs
old_action = action
# RL loop
obs, rew, done, info = env.step(action)
t += 1
if done:
t = 0
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print(info["rewards"][-1])
num_episodes = len(info["rewards"])