本文整理汇总了Python中baselines.common.tf_util.flattenallbut0方法的典型用法代码示例。如果您正苦于以下问题:Python tf_util.flattenallbut0方法的具体用法?Python tf_util.flattenallbut0怎么用?Python tf_util.flattenallbut0使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.common.tf_util
的用法示例。
在下文中一共展示了tf_util.flattenallbut0方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, ac_space):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
obscaled = ob / 255.0
with tf.variable_scope("pol"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 128, name='lin', kernel_initializer=U.normc_initializer(1.0)))
logits = tf.layers.dense(x, pdtype.param_shape()[0], name='logits', kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
with tf.variable_scope("vf"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 128, name='lin', kernel_initializer=U.normc_initializer(1.0)))
self.vpred = tf.layers.dense(x, 1, name='value', kernel_initializer=U.normc_initializer(1.0))
self.vpredz = self.vpred
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample()
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例2: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, ac_space, kind):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
x = ob / 255.0
if kind == 'small': # from A3C paper
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 32, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 256, name='lin', kernel_initializer=U.normc_initializer(1.0)))
elif kind == 'large': # Nature DQN
x = tf.nn.relu(U.conv2d(x, 32, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l2", [4, 4], [2, 2], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l3", [3, 3], [1, 1], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 512, name='lin', kernel_initializer=U.normc_initializer(1.0)))
else:
raise NotImplementedError
logits = tf.layers.dense(x, pdtype.param_shape()[0], name='logits', kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
self.vpred = tf.layers.dense(x, 1, name='value', kernel_initializer=U.normc_initializer(1.0))[:,0]
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample() # XXX
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例3: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, ac_space):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
obscaled = ob / 255.0
with tf.variable_scope("pol"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
logits = U.dense(x, pdtype.param_shape()[0], "logits", U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
with tf.variable_scope("vf"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))
self.vpredz = self.vpred
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample() # XXX
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例4: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, ac_space, kind):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
x = ob / 255.0
if kind == 'small': # from A3C paper
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 32, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 256, 'lin', U.normc_initializer(1.0)))
elif kind == 'large': # Nature DQN
x = tf.nn.relu(U.conv2d(x, 32, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l2", [4, 4], [2, 2], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l3", [3, 3], [1, 1], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 512, 'lin', U.normc_initializer(1.0)))
else:
raise NotImplementedError
logits = U.dense(x, pdtype.param_shape()[0], "logits", U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))[:,0]
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample() # XXX
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例5: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, ac_space):
assert isinstance(ob_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
obscaled = ob / 255.0
with tf.variable_scope("pol"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
logits = U.dense(x, pdtype.param_shape()[0], "logits", U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
with tf.variable_scope("vf"):
x = obscaled
x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))
self.vpredz = self.vpred
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample()
self._act = U.function([stochastic, ob], [ac, self.vpred])
示例6: _init
# 需要导入模块: from baselines.common import tf_util [as 别名]
# 或者: from baselines.common.tf_util import flattenallbut0 [as 别名]
def _init(self, ob_space, sensor_space, ac_space, hid_size, num_hid_layers, kind):
assert isinstance(ob_space, gym.spaces.Box)
assert isinstance(sensor_space, gym.spaces.Box)
self.pdtype = pdtype = make_pdtype(ac_space)
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
ob_sensor = U.get_placeholder(name="ob_sensor", dtype=tf.float32, shape=[sequence_length] + list(sensor_space.shape))
## Obfilter on sensor output
with tf.variable_scope("obfilter"):
self.ob_rms = RunningMeanStd(shape=sensor_space.shape)
obz_sensor = tf.clip_by_value((ob_sensor - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
#x = tf.nn.relu(tf.layers.dense(x, 256, name='lin', kernel_initializer=U.normc_initializer(1.0)))
## Adapted from mlp_policy
last_out = obz_sensor
for i in range(num_hid_layers):
last_out = tf.nn.tanh(tf.layers.dense(last_out, hid_size, name="vffc%i"%(i+1), kernel_initializer=U.normc_initializer(1.0)))
y = tf.layers.dense(last_out, 64, name="vffinal", kernel_initializer=U.normc_initializer(1.0))
#y = ob_sensor
#y = obz_sensor
#y = tf.nn.relu(U.dense(y, 64, 'lin_ob', U.normc_initializer(1.0)))
x = ob / 255.0
if kind == 'small': # from A3C paper
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 32, "l2", [4, 4], [2, 2], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 256, name='lin', kernel_initializer=U.normc_initializer(1.0)))
elif kind == 'large': # Nature DQN
x = tf.nn.relu(U.conv2d(x, 32, "l1", [8, 8], [4, 4], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l2", [4, 4], [2, 2], pad="VALID"))
x = tf.nn.relu(U.conv2d(x, 64, "l3", [3, 3], [1, 1], pad="VALID"))
x = U.flattenallbut0(x)
x = tf.nn.relu(tf.layers.dense(x, 64, name='lin', kernel_initializer=U.normc_initializer(1.0)))
else:
raise NotImplementedError
print(x.shape, y.shape)
x = tf.concat([x,y], 1)
## Saver
# self.saver = tf.train.Saver()
logits = tf.layers.dense(x, pdtype.param_shape()[0], name="logits", kernel_initializer=U.normc_initializer(0.01))
self.pd = pdtype.pdfromflat(logits)
self.vpred = tf.layers.dense(x, 1, name="value", kernel_initializer=U.normc_initializer(1.0))[:, 0]
self.state_in = []
self.state_out = []
stochastic = tf.placeholder(dtype=tf.bool, shape=())
ac = self.pd.sample() # XXX
self._act = U.function([stochastic, ob, ob_sensor], [ac, self.vpred, logits])