本文整理汇总了Python中baselines.a2c.utils.conv_to_fc方法的典型用法代码示例。如果您正苦于以下问题:Python utils.conv_to_fc方法的具体用法?Python utils.conv_to_fc怎么用?Python utils.conv_to_fc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.a2c.utils
的用法示例。
在下文中一共展示了utils.conv_to_fc方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def __init__(self, params, ob_space, ac_space, nbatch, nsteps): #pylint: disable=W0613
nenv = nbatch // nsteps
ob_shape = (nbatch,) + ob_space.shape
nact = ac_space.n
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs
with tf.name_scope('policy_new'):
activ = tf.nn.relu
h1 = activ(tf.nn.conv2d(X/255.0, params['policy/c1/w:0'], [1, 4, 4, 1], 'VALID') + params['policy/c1/b:0'])
h2 = activ(tf.nn.conv2d(h1, params['policy/c2/w:0'], [1, 2, 2, 1], 'VALID') + params['policy/c2/b:0'])
h3 = activ(tf.nn.conv2d(h2, params['policy/c3/w:0'], [1, 1, 1, 1], 'VALID') + params['policy/c3/b:0'])
h3 = conv_to_fc(h3)
h4 = activ(tf.nn.xw_plus_b(h3, params['policy/fc1/w:0'], params['policy/fc1/b:0']))
pi = tf.nn.xw_plus_b(h4, params['policy/pi/w:0'], params['policy/pi/b:0'])
self.pdtype = make_pdtype(ac_space)
self.pd = self.pdtype.pdfromflat(pi)
self.X = X
示例2: nature_cnn
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def nature_cnn(unscaled_images):
"""
CNN from Nature paper.
"""
scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)))
h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)))
h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)))
h3 = conv_to_fc(h3)
return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
示例3: nature_cnn
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def nature_cnn(unscaled_images, **conv_kwargs):
"""
CNN from Nature paper.
"""
scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2),
**conv_kwargs))
h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
h3 = conv_to_fc(h3)
return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
示例4: cnn_small
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def cnn_small(**conv_kwargs):
def network_fn(X):
h = tf.cast(X, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs))
h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
h = conv_to_fc(h)
h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2)))
return h, None
return network_fn
示例5: cnn_small
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def cnn_small(**conv_kwargs):
def network_fn(X):
h = tf.cast(X, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs))
h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
h = conv_to_fc(h)
h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2)))
return h
return network_fn
示例6: nature_cnn
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def nature_cnn(unscaled_images, first_layer_mode='', trainable=True):
h3, _h3 = nature_cnn_h3(unscaled_images, first_layer_mode, trainable)
h3 = conv_to_fc(h3)
return tf.nn.relu(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2), trainable=trainable))
示例7: __init__
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, nlstm=256, reuse=False):
nbatch = nenv*nsteps
nh, nw, nc = ob_space.shape
ob_shape = (nbatch, nh, nw, nc*nstack)
nact = ac_space.n
X = tf.placeholder(tf.uint8, ob_shape) #obs
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
with tf.variable_scope("model", reuse=reuse):
h = conv(tf.cast(X, tf.float32)/255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
h3 = conv_to_fc(h3)
h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
xs = batch_to_seq(h4, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm)
h5 = seq_to_batch(h5)
pi = fc(h5, 'pi', nact, act=lambda x:x)
vf = fc(h5, 'v', 1, act=lambda x:x)
v0 = vf[:, 0]
a0 = sample(pi)
self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)
def step(ob, state, mask):
a, v, s = sess.run([a0, v0, snew], {X:ob, S:state, M:mask})
return a, v, s
def value(ob, state, mask):
return sess.run(v0, {X:ob, S:state, M:mask})
self.X = X
self.M = M
self.S = S
self.pi = pi
self.vf = vf
self.step = step
self.value = value
示例8: __init__
# 需要导入模块: from baselines.a2c import utils [as 别名]
# 或者: from baselines.a2c.utils import conv_to_fc [as 别名]
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False):
nbatch = nenv * nsteps
nh, nw, nc = ob_space.shape
ob_shape = (nbatch, nh, nw, nc * nstack)
nact = ac_space.n
X = tf.placeholder(tf.uint8, ob_shape) # obs
with tf.variable_scope("model", reuse=reuse):
h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
h3 = conv_to_fc(h3)
h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
pi_logits = fc(h4, 'pi', nact, act=lambda x: x, init_scale=0.01)
pi = tf.nn.softmax(pi_logits)
q = fc(h4, 'q', nact, act=lambda x: x)
a = sample(pi_logits) # could change this to use self.pi instead
self.initial_state = [] # not stateful
self.X = X
self.pi = pi # actual policy params now
self.q = q
def step(ob, *args, **kwargs):
# returns actions, mus, states
a0, pi0 = sess.run([a, pi], {X: ob})
return a0, pi0, [] # dummy state
def out(ob, *args, **kwargs):
pi0, q0 = sess.run([pi, q], {X: ob})
return pi0, q0
def act(ob, *args, **kwargs):
return sess.run(a, {X: ob})
self.step = step
self.out = out
self.act = act