本文整理汇总了Python中baselines.common.distributions.make_pdtype方法的典型用法代码示例。如果您正苦于以下问题:Python distributions.make_pdtype方法的具体用法?Python distributions.make_pdtype怎么用?Python distributions.make_pdtype使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.distributions
的用法示例。
在下文中一共展示了distributions.make_pdtype方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def __init__(self, params, ob_space, ac_space, nbatch, nsteps): #pylint: disable=W0613
nenv = nbatch // nsteps
ob_shape = (nbatch,) + ob_space.shape
nact = ac_space.n
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs
with tf.name_scope('policy_new'):
activ = tf.nn.relu
h1 = activ(tf.nn.conv2d(X/255.0, params['policy/c1/w:0'], [1, 4, 4, 1], 'VALID') + params['policy/c1/b:0'])
h2 = activ(tf.nn.conv2d(h1, params['policy/c2/w:0'], [1, 2, 2, 1], 'VALID') + params['policy/c2/b:0'])
h3 = activ(tf.nn.conv2d(h2, params['policy/c3/w:0'], [1, 1, 1, 1], 'VALID') + params['policy/c3/b:0'])
h3 = conv_to_fc(h3)
h4 = activ(tf.nn.xw_plus_b(h3, params['policy/fc1/w:0'], params['policy/fc1/b:0']))
pi = tf.nn.xw_plus_b(h4, params['policy/pi/w:0'], params['policy/pi/b:0'])
self.pdtype = make_pdtype(ac_space)
self.pd = self.pdtype.pdfromflat(pi)
self.X = X
示例2: __init__
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def __init__(self, params, ob_space, ac_space, nbatch, nsteps): #pylint: disable=W0613
ob_shape = (nbatch,) + ob_space.shape
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs
with tf.name_scope('policy_new'):
activ = tf.tanh
h1 = activ(tf.nn.xw_plus_b(X, params['policy/pi_fc1/w:0'], params['policy/pi_fc1/b:0']))
h2 = activ(tf.nn.xw_plus_b(h1, params['policy/pi_fc2/w:0'], params['policy/pi_fc2/b:0']))
pi = tf.nn.xw_plus_b(h2, params['policy/pi/w:0'], params['policy/pi/b:0'])
logstd = params['policy/logstd:0']
pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1)
self.pdtype = make_pdtype(ac_space)
self.pd = self.pdtype.pdfromflat(pdparam)
self.X = X
示例3: __init__
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, **conv_kwargs): #pylint: disable=W0613
nh, nw, nc = ob_space.shape
ob_shape = (nbatch, nh, nw, nc)
self.pdtype = make_pdtype(ac_space)
X = tf.placeholder(tf.uint8, ob_shape) #obs
with tf.variable_scope("model", reuse=reuse):
h = nature_cnn(X, **conv_kwargs)
vf = fc(h, 'v', 1)[:,0]
self.pd, self.pi = self.pdtype.pdfromlatent(h, init_scale=0.01)
a0 = self.pd.sample()
neglogp0 = self.pd.neglogp(a0)
self.initial_state = None
def step(ob, *_args, **_kwargs):
a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
return a, v, self.initial_state, neglogp
def value(ob, *_args, **_kwargs):
return sess.run(vf, {X:ob})
self.X = X
self.vf = vf
self.step = step
self.value = value
示例4: __init__
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, **conv_kwargs): #pylint: disable=W0613
self.pdtype = make_pdtype(ac_space)
X, processed_x = observation_input(ob_space, nbatch)
with tf.variable_scope("model", reuse=reuse):
h = nature_cnn(processed_x, **conv_kwargs)
vf = fc(h, 'v', 1)[:,0]
self.pd, self.pi = self.pdtype.pdfromlatent(h, init_scale=0.01)
a0 = self.pd.sample()
neglogp0 = self.pd.neglogp(a0)
self.initial_state = None
def step(ob, *_args, **_kwargs):
a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
return a, v, self.initial_state, neglogp
def value(ob, *_args, **_kwargs):
return sess.run(vf, {X:ob})
self.X = X
self.vf = vf
self.step = step
self.value = value
示例5: _init
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
with tf.variable_scope("obfilter"):
self.ob_rms = RunningMeanStd(shape=ob_space.shape)
obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
last_out = obz
for i in range(num_hid_layers):
last_out = tf.nn.tanh(dense(last_out, hid_size, "vffc%i" % (i+1), weight_init=U.normc_initializer(1.0)))
self.vpred = dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:, 0]
last_out = obz
for i in range(num_hid_layers):
last_out = tf.nn.tanh(dense(last_out, hid_size, "polfc%i" % (i+1), weight_init=U.normc_initializer(1.0)))
if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
mean = dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
else:
pdparam = dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(pdparam)
self.state_in = []
self.state_out = []
# change for BC
stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=())
ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
self.ac = ac
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例6: _init
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def _init(self, ob_space, ac_space):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
obscaled = ob / 255.0
with tf.variable_scope("pol"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 128, name='lin', kernel_initializer=U.normc_initializer(1.0)))
logits = tf.layers.dense(x, pdtype.param_shape()[0], name='logits', kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
with tf.variable_scope("vf"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 128, name='lin', kernel_initializer=U.normc_initializer(1.0)))
self.vpred = tf.layers.dense(x, 1, name='value', kernel_initializer=U.normc_initializer(1.0))
self.vpredz = self.vpred
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample()
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例7: _init
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def _init(self, ob_space, ac_space, kind):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
x = ob / 255.0
if kind == 'small': # from A3C paper
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 32, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 256, name='lin', kernel_initializer=U.normc_initializer(1.0)))
elif kind == 'large': # Nature DQN
x = tf.nn.relu(U.conv2d(x, 32, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l2", [4, 4], [2, 2], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l3", [3, 3], [1, 1], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 512, name='lin', kernel_initializer=U.normc_initializer(1.0)))
else:
raise NotImplementedError
logits = tf.layers.dense(x, pdtype.param_shape()[0], name='logits', kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
self.vpred = tf.layers.dense(x, 1, name='value', kernel_initializer=U.normc_initializer(1.0))[:,0]
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample() # XXX
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例8: _init
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
with tf.variable_scope("obfilter"):
self.ob_rms = RunningMeanStd(shape=ob_space.shape)
with tf.variable_scope('vf'):
obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
last_out = obz
for i in range(num_hid_layers):
last_out = tf.nn.tanh(tf.layers.dense(last_out, hid_size, name="fc%i"%(i+1), kernel_initializer=U.normc_initializer(1.0)))
self.vpred = tf.layers.dense(last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:,0]
with tf.variable_scope('pol'):
last_out = obz
for i in range(num_hid_layers):
last_out = tf.nn.tanh(tf.layers.dense(last_out, hid_size, name='fc%i'%(i+1), kernel_initializer=U.normc_initializer(1.0)))
if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
mean = tf.layers.dense(last_out, pdtype.param_shape()[0]//2, name='final', kernel_initializer=U.normc_initializer(0.01))
logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
else:
pdparam = tf.layers.dense(last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(pdparam)
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例9: __init__
# 需要导入模块: from baselines.common import distributions [as 别名]
# 或者: from baselines.common.distributions import make_pdtype [as 别名]
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False):
nenv = nbatch // nsteps
nh, nw, nc = ob_space.shape
ob_shape = (nbatch, nh, nw, nc)
nact = ac_space.n
X = tf.placeholder(tf.uint8, ob_shape) #obs
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
with tf.variable_scope("model", reuse=reuse):
h = nature_cnn(X)
xs = batch_to_seq(h, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
h5, snew = lnlstm(xs, ms, S, 'lstm1', nh=nlstm)
h5 = seq_to_batch(h5)
pi = fc(h5, 'pi', nact)
vf = fc(h5, 'v', 1)
self.pdtype = make_pdtype(ac_space)
self.pd = self.pdtype.pdfromflat(pi)
v0 = vf[:, 0]
a0 = self.pd.sample()
neglogp0 = self.pd.neglogp(a0)
self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)
def step(ob, state, mask):
return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})
def value(ob, state, mask):
return sess.run(v0, {X:ob, S:state, M:mask})
self.X = X
self.M = M
self.S = S
self.pi = pi
self.vf = vf
self.step = step
self.value = value