本文整理汇总了Python中theano.sandbox.cuda.rng_curand.CURAND_RandomStreams类的典型用法代码示例。如果您正苦于以下问题:Python CURAND_RandomStreams类的具体用法?Python CURAND_RandomStreams怎么用?Python CURAND_RandomStreams使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CURAND_RandomStreams类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compare_speed
def compare_speed():
# To run this speed comparison
# cd <directory of this file>
# THEANO_FLAGS=device=gpu \
# python -c 'import test_rng_curand; test_rng_curand.compare_speed()'
mrg = MRG_RandomStreams()
crn = CURAND_RandomStreams(234)
N = 1000 * 100
dest = theano.shared(numpy.zeros(N, dtype=theano.config.floatX))
mrg_u = theano.function([], [], updates={dest: mrg.uniform((N,))},
profile='mrg uniform')
crn_u = theano.function([], [], updates={dest: crn.uniform((N,))},
profile='crn uniform')
mrg_n = theano.function([], [], updates={dest: mrg.normal((N,))},
profile='mrg normal')
crn_n = theano.function([], [], updates={dest: crn.normal((N,))},
profile='crn normal')
for f in mrg_u, crn_u, mrg_n, crn_n:
# don't time the first call, it has some startup cost
print('DEBUGPRINT')
print('----------')
theano.printing.debugprint(f)
for i in range(100):
for f in mrg_u, crn_u, mrg_n, crn_n:
# don't time the first call, it has some startup cost
f.fn.time_thunks = (i > 0)
f()
示例2: check_uniform_basic
def check_uniform_basic(shape_as_symbolic, dim_as_symbolic=False):
"""
check_uniform_basic(shape_as_symbolic, dim_as_symbolic=False)
Runs a basic sanity check on the `uniform` method of a
`CURAND_RandomStreams` object.
Checks that variates
* are in the range [0, 1]
* have a mean in the right neighbourhood (near 0.5)
* are of the specified shape
* successive calls produce different arrays of variates
Parameters
----------
shape_as_symbolic : boolean
If `True`, est the case that the shape tuple is a symbolic
variable rather than known at compile-time.
dim_as_symbolic : boolean
If `True`, test the case that an element of the shape
tuple is a Theano symbolic. Irrelevant if `shape_as_symbolic`
is `True`.
"""
rng = CURAND_RandomStreams(234)
if shape_as_symbolic:
# instantiate a TensorConstant with the value (10, 10)
shape = constant((10, 10))
else:
# Only one dimension is symbolic, with the others known
if dim_as_symbolic:
shape = (10, constant(10))
else:
shape = (10, 10)
u0 = rng.uniform(shape)
u1 = rng.uniform(shape)
f0 = theano.function([], u0, mode=mode_with_gpu)
f1 = theano.function([], u1, mode=mode_with_gpu)
v0list = [f0() for i in range(3)]
v1list = [f1() for i in range(3)]
# print v0list
# print v1list
# assert that elements are different in a few ways
assert numpy.all(v0list[0] != v0list[1])
assert numpy.all(v1list[0] != v1list[1])
assert numpy.all(v0list[0] != v1list[0])
for v in v0list:
assert v.shape == (10, 10)
assert v.min() >= 0
assert v.max() <= 1
assert v.min() < v.max()
assert .25 <= v.mean() <= .75
示例3: Training
class Training(Layer):
def __init__(self,rng, W=None,m=1.0, n_samples=50,shape=None,batch_size=1000):
if W is None:
W = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (shape[0] + shape[1])),
high=numpy.sqrt(6. / (shape[0] + shape[1])),
size=(shape[0], shape[1])), dtype=theano.config.floatX)
self.W = theano.shared(value=W, name='Hashtag_emb', borrow=True)
self.batch_size = batch_size
self.n_ht = W.shape[0]
self.m = m
self.n_samples = n_samples
self.csrng = CURAND_RandomStreams(123)
mask = self.csrng.uniform(size=(self.n_samples,1),low=0.0,high=1.0,dtype=theano.config.floatX)
self.rfun = theano.function([],mask.argsort(axis=0))
self.alpha = T.constant(1.0/numpy.arange(start=1,stop=self.n_ht + 1,step=1))
self.weights = [self.W]
self.biases = []
def __repr__(self):
return "{}: W_shape: {}, m={}, n_samples={}, n_ht={}".format(self.__class__.__name__, self.W.shape.eval(),self.m,self.n_samples,self.n_ht)
def output_func(self, input):
self.f = T.tensordot(input.dimshuffle(0,'x',1),self.W.dimshuffle('x',0,1),axes=[[1,2],[0,2]]) # cosine sim
self.y_pred = T.argmax(self.f,axis=0)
return self.y_pred
def get_tag_neg(self,f,f_y):
cand = f[(f > f_y - self.m).nonzero()]
rnk =cand.shape[0] - 1# due to i != y
if rnk == 0:
return 0
l = T.sum(self.alpha[T.arange(rnk)])
return l/rnk
def _warp_loss_cost(self, y,i):
f_y = self.f[T.arange(y.shape[0]), y]
s = self.m - f_y + self.f[T.arange(i.shape[0]),i]
return T.maximum(0.0,s)
def warp_loss_cost(self, y, idx):
f_y = self.f[T.arange(y.shape[0]), y]
f_yy = T.repeat(f_y.dimshuffle(0,'x'),self.f.shape[1],axis=1)
f_idx = T.maximum(0.0,f_yy - self.f + self.m)
idx = f_idx.argsort(axis=1)[:,0]
s = self.m - f_y + self.f[T.arange(idx.shape[0]),idx]
return T.maximum(0.0,s)
def training_cost(self, y,i):
return T.mean(self.warp_loss_cost(y,i))
示例4: __init__
def __init__(self,rng, W=None,m=1.0, n_samples=50,shape=None,batch_size=1000):
if W is None:
W = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (shape[0] + shape[1])),
high=numpy.sqrt(6. / (shape[0] + shape[1])),
size=(shape[0], shape[1])), dtype=theano.config.floatX)
self.W = theano.shared(value=W, name='Hashtag_emb', borrow=True)
self.batch_size = batch_size
self.n_ht = W.shape[0]
self.m = m
self.n_samples = n_samples
self.csrng = CURAND_RandomStreams(123)
mask = self.csrng.uniform(size=(self.n_samples,1),low=0.0,high=1.0,dtype=theano.config.floatX)
self.rfun = theano.function([],mask.argsort(axis=0))
self.alpha = T.constant(1.0/numpy.arange(start=1,stop=self.n_ht + 1,step=1))
self.weights = [self.W]
self.biases = []
示例5: __init__
def __init__(self, rng, clean_input=None, fuzzy_input=None, \
in_dim=0, out_dim=0, activation=None, input_noise=0., \
W=None, b_h=None, b_v=None):
# Setup a shared random generator for this layer
#self.rng = theano.tensor.shared_randomstreams.RandomStreams( \
# rng.randint(100000))
self.rng = CURAND_RandomStreams(rng.randint(1000000))
# Grab the layer input and perturb it with some sort of noise. This
# is, afterall, a _denoising_ autoencoder...
self.clean_input = clean_input
self.noisy_input = self._get_noisy_input(fuzzy_input, input_noise)
# Set some basic layer properties
self.activation = activation
self.in_dim = in_dim
self.out_dim = out_dim
# Get some random initial weights and biases, if not given
if W is None:
W_init = np.asarray(0.01 * rng.standard_normal( \
size=(in_dim, out_dim)), dtype=theano.config.floatX)
W = theano.shared(value=W_init, name='W')
if b_h is None:
b_init = np.zeros((out_dim,), dtype=theano.config.floatX)
b_h = theano.shared(value=b_init, name='b_h')
if b_v is None:
b_init = np.zeros((in_dim,), dtype=theano.config.floatX)
b_v = theano.shared(value=b_init, name='b_v')
# Grab pointers to the now-initialized weights and biases
self.W = W
self.b_h = b_h
self.b_v = b_v
# Put the learnable/optimizable parameters into a list
self.params = [self.W, self.b_h, self.b_v]
# Beep boop... layer construction complete...
return
示例6: HiddenLayer
class HiddenLayer(object):
def __init__(self, rng, input, in_dim, out_dim, \
activation=None, pool_size=0, \
drop_rate=0., input_noise=0., bias_noise=0., \
W=None, b=None, \
use_bias=True, name=""):
# Setup a shared random generator for this layer
#self.srng = theano.tensor.shared_randomstreams.RandomStreams( \
# rng.randint(100000))
self.srng = CURAND_RandomStreams(rng.randint(1000000))
self.clean_input = input
# Add gaussian noise to the input (if desired)
if (input_noise > 1e-4):
self.fuzzy_input = input + \
(input_noise * self.srng.normal(size=input.shape, \
dtype=theano.config.floatX))
else:
self.fuzzy_input = input
# Apply masking noise to the input (if desired)
if (drop_rate > 1e-4):
self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate)
else:
self.noisy_input = self.fuzzy_input
# Set some basic layer properties
self.pool_size = pool_size
self.in_dim = in_dim
self.out_dim = out_dim
if self.pool_size <= 1:
self.filt_count = self.out_dim
else:
self.filt_count = self.out_dim * self.pool_size
self.pool_count = self.filt_count / max(self.pool_size, 1)
if activation:
self.activation = activation
else:
if self.pool_size <= 1:
self.activation = lambda x: relu_actfun(x)
else:
self.activation = lambda x: \
maxout_actfun(x, self.pool_size, self.filt_count)
# Get some random initial weights and biases, if not given
if W is None:
if self.pool_size <= 1:
# Generate random initial filters in a typical way
W_init = np.asarray(0.04 * rng.standard_normal( \
size=(self.in_dim, self.filt_count)), \
dtype=theano.config.floatX)
else:
# Generate groups of random filters to pool over such that
# intra-group correlations are stronger than inter-group
# correlations, to encourage pooling over similar filters...
filters = []
for g_num in range(self.pool_count):
g_filt = 0.01 * rng.standard_normal(size=(self.in_dim,1))
for f_num in range(self.pool_size):
f_filt = g_filt + (0.005 * rng.standard_normal( \
size=(self.in_dim,1)))
filters.append(f_filt)
W_init = np.hstack(filters).astype(theano.config.floatX)
W = theano.shared(value=W_init, name="{0:s}_W".format(name))
if b is None:
b_init = np.zeros((self.filt_count,), dtype=theano.config.floatX)
b = theano.shared(value=b_init, name="{0:s}_b".format(name))
# Set layer weights and biases
self.W = W
self.b = b
# Compute linear "pre-activation" for this layer
if use_bias:
self.linear_output = T.dot(self.noisy_input, self.W) + self.b
else:
self.linear_output = T.dot(self.noisy_input, self.W)
# Add noise to the pre-activation features (if desired)
self.noisy_linear = self.linear_output + \
(bias_noise * self.srng.normal(size=self.linear_output.shape, \
dtype=theano.config.floatX))
# Apply activation function
self.output = self.activation(self.noisy_linear)
# Compute some properties of the activations, probably to regularize
self.act_l2_sum = T.sum(self.output**2.) / self.output.size
self.row_l1_sum = T.sum(abs(row_normalize(self.output))) / \
self.output.shape[0]
self.col_l1_sum = T.sum(abs(col_normalize(self.output))) / \
self.output.shape[1]
# Conveniently package layer parameters
if use_bias:
self.params = [self.W, self.b]
#.........这里部分代码省略.........
示例7: __init__
def __init__(self, rng, input, in_dim, W=None, b=None, W_scale=1.0):
# Setup a shared random generator for this layer
self.rng = RandStream(rng.randint(1000000))
self.input = input
self.in_dim = in_dim
# Get some random initial weights and biases, if not given
if W is None:
# Generate random initial filters in a typical way
W_init = 1.0 * np.asarray(rng.normal( \
size=(self.in_dim, 1)), \
dtype=theano.config.floatX)
W = theano.shared(value=(W_scale*W_init))
if b is None:
b_init = np.zeros((1,), dtype=theano.config.floatX)
b = theano.shared(value=b_init)
# Set layer weights and biases
self.W = W
self.b = b
# Compute linear "pre-activation" for this layer
self.linear_output = 20.0 * T.tanh((T.dot(self.input, self.W) + self.b) / 20.0)
# Apply activation function
self.output = self.linear_output
# Compute squared sum of outputs, for regularization
self.act_l2_sum = T.sum(self.output**2.0) / self.output.shape[0]
# Conveniently package layer parameters
self.params = [self.W, self.b]
# little layer construction complete...
return
示例8: __init__
def __init__(self, rng, in_dim, out_dim, \
W_mean=None, b_mean=None, \
W_logvar=None, b_logvar=None, \
name="", W_scale=1.0):
# setup a shared random generator for this network
self.rng = RandStream(rng.randint(1000000))
# set some basic layer properties
self.in_dim = in_dim
self.out_dim = out_dim
# initialize weights and biases for mean estimate
if W_mean is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
if W_scale > 0.1:
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
W_init = W_init.astype(theano.config.floatX)
W_mean = theano.shared(value=W_init, \
name="{0:s}_W_mean".format(name))
if b_mean is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_mean = theano.shared(value=b_init, \
name="{0:s}_b_mean".format(name))
# grab handles for easy access
self.W_mean = W_mean
self.b_mean = b_mean
# initialize weights and biases for log-variance estimate
if W_logvar is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
#W_init = ortho_matrix(shape=W_shape, gain=W_scale)
W_init = W_init.astype(theano.config.floatX)
W_logvar = theano.shared(value=W_init, \
name="{0:s}_W_logvar".format(name))
if b_logvar is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_logvar = theano.shared(value=b_init, \
name="{0:s}_b_logvar".format(name))
# grab handles for easy access
self.W_logvar = W_logvar
self.b_logvar = b_logvar
# Conveniently package layer parameters
self.mlp_params = [self.W_mean, self.b_mean, \
self.W_logvar, self.b_logvar]
# Layer construction complete...
return
示例9: __init__
def __init__(self, rand_dim, out_dim,
apply_bn=True, init_func=None,
rand_type='normal', final_relu=True,
mod_name='dm_uni'):
self.rand_dim = rand_dim
self.out_dim = out_dim
self.apply_bn = apply_bn
self.mod_name = mod_name
self.rand_type = rand_type
self.final_relu = final_relu
self.rng = RandStream(123)
if init_func is None:
self.init_func = inits.Normal(scale=0.02)
else:
self.init_func = init_func
self._init_params() # initialize parameters
return
示例10: DiscLayer
class DiscLayer(object):
def __init__(self, rng, input, in_dim, W=None, b=None):
# Setup a shared random generator for this layer
self.rng = RandStream(rng.randint(1000000))
self.input = input
self.in_dim = in_dim
# Get some random initial weights and biases, if not given
if W is None:
# Generate random initial filters in a typical way
W_init = 0.01 * np.asarray(rng.normal( \
size=(self.in_dim, 1)), \
dtype=theano.config.floatX)
W = theano.shared(value=W_init)
if b is None:
b_init = np.zeros((1,), dtype=theano.config.floatX)
b = theano.shared(value=b_init)
# Set layer weights and biases
self.W = W
self.b = b
# Compute linear "pre-activation" for this layer
self.linear_output = 20.0 * T.tanh((T.dot(self.input, self.W) + self.b) / 20.0)
# Apply activation function
self.output = self.linear_output
# Compute squared sum of outputs, for regularization
self.act_l2_sum = T.sum(self.output**2.0) / self.output.shape[0]
# Conveniently package layer parameters
self.params = [self.W, self.b]
# little layer construction complete...
return
def _noisy_params(self, P, noise_lvl=0.):
"""Noisy weights, like convolving energy surface with a gaussian."""
P_nz = P + self.rng.normal(size=P.shape, avg=0.0, std=noise_lvl, \
dtype=theano.config.floatX)
return P_nz
示例11: __init__
def __init__(self, rng=None, \
x_in=None, x_out=None, \
p_s0_given_z=None, \
p_hi_given_si=None, \
p_sip1_given_si_hi=None, \
q_z_given_x=None, \
q_hi_given_x_si=None, \
obs_dim=None, \
z_dim=None, h_dim=None, \
ir_steps=4, params=None, \
shared_param_dicts=None):
# setup a rng for this GIPair
self.rng = RandStream(rng.randint(100000))
# grab the user-provided parameters
self.params = params
self.x_type = self.params['x_type']
assert((self.x_type == 'bernoulli') or (self.x_type == 'gaussian'))
if 'obs_transform' in self.params:
assert((self.params['obs_transform'] == 'sigmoid') or \
(self.params['obs_transform'] == 'none'))
if self.params['obs_transform'] == 'sigmoid':
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
else:
self.obs_transform = lambda x: x
else:
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
if self.x_type == 'bernoulli':
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
self.shared_param_dicts = shared_param_dicts
# record the dimensions of various spaces relevant to this model
self.obs_dim = obs_dim
self.z_dim = z_dim
self.h_dim = h_dim
self.ir_steps = ir_steps
# grab handles to the relevant InfNets
self.q_z_given_x = q_z_given_x
self.q_hi_given_x_si = q_hi_given_x_si
self.p_s0_given_z = p_s0_given_z
self.p_hi_given_si = p_hi_given_si
self.p_sip1_given_si_hi = p_sip1_given_si_hi
# record the symbolic variables that will provide inputs to the
# computation graph created to describe this MultiStageModel
self.x_in = x_in
self.x_out = x_out
self.hi_zmuv = T.tensor3() # for ZMUV Gaussian samples to use in scan
# setup switching variable for changing between sampling/training
zero_ary = to_fX( np.zeros((1,)) )
self.train_switch = theano.shared(value=zero_ary, name='msm_train_switch')
self.set_train_switch(1.0)
# setup a variable for controlling dropout noise
self.drop_rate = theano.shared(value=zero_ary, name='msm_drop_rate')
self.set_drop_rate(0.0)
# this weight balances l1 vs. l2 penalty on posterior KLds
self.lam_kld_l1l2 = theano.shared(value=zero_ary, name='msm_lam_kld_l1l2')
self.set_lam_kld_l1l2(1.0)
if self.shared_param_dicts is None:
# initialize "optimizable" parameters specific to this MSM
init_vec = to_fX( np.zeros((self.z_dim,)) )
self.p_z_mean = theano.shared(value=init_vec, name='msm_p_z_mean')
self.p_z_logvar = theano.shared(value=init_vec, name='msm_p_z_logvar')
init_vec = to_fX( np.zeros((self.obs_dim,)) )
self.obs_logvar = theano.shared(value=zero_ary, name='msm_obs_logvar')
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
self.shared_param_dicts = {}
self.shared_param_dicts['p_z_mean'] = self.p_z_mean
self.shared_param_dicts['p_z_logvar'] = self.p_z_logvar
self.shared_param_dicts['obs_logvar'] = self.obs_logvar
else:
self.p_z_mean = self.shared_param_dicts['p_z_mean']
self.p_z_logvar = self.shared_param_dicts['p_z_logvar']
self.obs_logvar = self.shared_param_dicts['obs_logvar']
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
# setup a function for computing reconstruction log likelihood
if self.x_type == 'bernoulli':
self.log_prob_func = lambda xo, xh: \
(-1.0 * log_prob_bernoulli(xo, xh))
else:
self.log_prob_func = lambda xo, xh: \
(-1.0 * log_prob_gaussian2(xo, xh, \
log_vars=self.bounded_logvar))
# get a drop mask that drops things with probability p
drop_scale = 1. / (1. - self.drop_rate[0])
drop_rnd = self.rng.uniform(size=self.x_out.shape, \
low=0.0, high=1.0, dtype=theano.config.floatX)
drop_mask = drop_scale * (drop_rnd > self.drop_rate[0])
#############################
# Setup self.z and self.s0. #
#############################
print("Building MSM step 0...")
drop_x = drop_mask * self.x_in
self.q_z_mean, self.q_z_logvar, self.z = \
#.........这里部分代码省略.........
示例12: InfNet
class InfNet(object):
"""
A net that tries to infer an approximate posterior for some observation,
given some deep, directed generative model. The output of this network
comprises two constructs: an approximate mean vector and an approximate
standard deviation vector (i.e. diagonal matrix) for a Gaussian posterior.
Parameters:
rng: a numpy.random RandomState object
Xd: symbolic input matrix for inputs
params: a dict of parameters describing the desired network:
vis_drop: drop rate to use on observable variables
hid_drop: drop rate to use on hidden layer activations
-- note: vis_drop/hid_drop are optional, with defaults 0.0/0.0
input_noise: standard dev for noise on the input of this net
bias_noise: standard dev for noise on the biases of hidden layers
shared_config: list of "layer descriptions" for shared part
mu_config: list of "layer descriptions" for mu part
sigma_config: list of "layer descriptions" for sigma part
activation: "function handle" for the desired non-linearity
init_scale: scaling factor for hidden layer weights (__ * 0.01)
shared_param_dicts: parameters for the MLP controlled by this InfNet
"""
def __init__(self, \
rng=None, \
Xd=None, \
params=None, \
shared_param_dicts=None):
# Setup a shared random generator for this network
self.rng = RandStream(rng.randint(1000000))
# Grab the symbolic input matrix
self.Xd = Xd
#####################################################
# Process user-supplied parameters for this network #
#####################################################
self.params = params
if 'build_theano_funcs' in params:
self.build_theano_funcs = params['build_theano_funcs']
else:
self.build_theano_funcs = True
if 'vis_drop' in params:
self.vis_drop = params['vis_drop']
else:
self.vis_drop = 0.0
if 'hid_drop' in params:
self.hid_drop = params['hid_drop']
else:
self.hid_drop = 0.0
if 'input_noise' in params:
self.input_noise = params['input_noise']
else:
self.input_noise = 0.0
if 'bias_noise' in params:
self.bias_noise = params['bias_noise']
else:
self.bias_noise = 0.0
if 'init_scale' in params:
self.init_scale = params['init_scale']
else:
self.init_scale = 1.0
if 'sigma_init_scale' in params:
self.sigma_init_scale = params['sigma_init_scale']
else:
self.sigma_init_scale = 1.0
# Check if the params for this net were given a priori. This option
# will be used for creating "clones" of an inference network, with all
# of the network parameters shared between clones.
if shared_param_dicts is None:
# This is not a clone, and we will need to make a dict for
# referring to the parameters of each network layer
self.shared_param_dicts = {'shared': [], 'mu': [], 'sigma': []}
self.is_clone = False
else:
# This is a clone, and its layer parameters can be found by
# referring to the given param dict (i.e. shared_param_dicts).
self.shared_param_dicts = shared_param_dicts
self.is_clone = True
# Get the configuration/prototype for this network. The config is a
# list of layer descriptions, including a description for the input
# layer, which is typically just the dimension of the inputs. So, the
# depth of the mlp is one less than the number of layer configs.
self.shared_config = params['shared_config']
self.mu_config = params['mu_config']
self.sigma_config = params['sigma_config']
if 'activation' in params:
self.activation = params['activation']
else:
self.activation = relu_actfun
#########################################
# Initialize the shared part of network #
#########################################
self.shared_layers = []
layer_def_pairs = zip(self.shared_config[:-1],self.shared_config[1:])
layer_num = 0
# Construct input to the inference network
next_input = self.Xd
for in_def, out_def in layer_def_pairs:
first_layer = (layer_num == 0)
last_layer = (layer_num == (len(layer_def_pairs) - 1))
l_name = "share_layer_{0:d}".format(layer_num)
#.........这里部分代码省略.........
示例13: WalkoutModel
class WalkoutModel(object):
"""
Controller for training a forwards-backwards chainy model.
Parameters:
rng: numpy.random.RandomState (for reproducibility)
x_out: the goal state for forwards-backwards walking process
p_z_given_x: InfNet for stochastic part of step
p_x_given_z: HydraNet for deterministic part of step
params: REQUIRED PARAMS SHOWN BELOW
x_dim: dimension of observations to construct
z_dim: dimension of latent space for policy wobble
walkout_steps: number of steps to walk out
x_type: can be "bernoulli" or "gaussian"
x_transform: can be 'none' or 'sigmoid'
"""
def __init__(self, rng=None,
x_out=None, \
p_z_given_x=None, \
p_x_given_z=None, \
params=None, \
shared_param_dicts=None):
# setup a rng for this WalkoutModel
self.rng = RandStream(rng.randint(100000))
# grab the user-provided parameters
self.params = params
self.x_dim = self.params['x_dim']
self.z_dim = self.params['z_dim']
self.walkout_steps = self.params['walkout_steps']
self.x_type = self.params['x_type']
self.shared_param_dicts = shared_param_dicts
if 'x_transform' in self.params:
assert((self.params['x_transform'] == 'sigmoid') or \
(self.params['x_transform'] == 'none'))
if self.params['x_transform'] == 'sigmoid':
self.x_transform = lambda x: T.nnet.sigmoid(x)
else:
self.x_transform = lambda x: x
else:
self.x_transform = lambda x: T.nnet.sigmoid(x)
if self.x_type == 'bernoulli':
self.x_transform = lambda x: T.nnet.sigmoid(x)
assert((self.x_type == 'bernoulli') or (self.x_type == 'gaussian'))
assert((self.step_type == 'add') or (self.step_type == 'jump'))
# grab handles to the relevant networks
self.p_z_given_x = p_z_given_x
self.p_x_given_z = p_x_given_z
# record the symbolic variables that will provide inputs to the
# computation graph created for this WalkoutModel
self.x_out = x_out # target output for generation
self.zi_zmuv = T.tensor3() # ZMUV gauss noise for walk-out wobble
if self.shared_param_dicts is None:
# initialize the parameters "owned" by this model
zero_ary = to_fX( np.zeros((1,)) )
self.obs_logvar = theano.shared(value=zero_ary, name='obs_logvar')
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar[0])
self.shared_param_dicts = {}
self.shared_param_dicts['obs_logvar'] = self.obs_logvar
else:
# grab the parameters required by this model from a given dict
self.obs_logvar = self.shared_param_dicts['obs_logvar']
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar[0])
###############################################################
# Setup the forwards (i.e. training) walk-out loop using scan #
###############################################################
def forwards_loop(xi_zmuv, zi_zmuv, xi_fw, zi_fw):
# get samples of next zi, according to the forwards model
zi_fw_mean, zi_fw_logvar = self.p_z_given_x.apply(xi_fw, \
do_samples=False)
zi_fw = zi_fw_mean + (T.exp(0.5 * zi_fw_logvar) * zi_zmuv)
# check reverse direction probability p(xi_fw | zi_fw)
xi_bw_mean, xi_bw_logvar = self.p_x_given_z.apply(zi_fw, \
do_samples=False)
xi_bw_mean = self.x_transform(xi_bw_mean)
nll_xi_bw = log_prob_gaussian2(xi_fw, xi_bw_mean, \
log_vars=xi_bw_logvar, mask=None)
nll_xi_bw = nll_xi_bw.flatten()
# get samples of next xi, according to the forwards model
xi_fw_mean, xi_fw_logvar = self.p_x_given_z.apply(zi_fw, \
do_samples=False)
xi_fw_mean = self.x_transform(xi_fw_mean)
xi_fw = xi_fw_mean + (T.exp(0.5 * xi_fw_logvar) * xi_zmuv)
# check reverse direction probability p(zi_fw | xi_fw)
zi_bw_mean, zi_bw_logvar = self.p_z_given_x.apply(xi_fw, \
do_samples=False)
nll_zi_bw = log_prob_gaussian2(zi_fw, zi_bw_mean, \
log_vars=zi_bw_logvar, mask=None)
nll_zi_bw = nll_zi_bw.flatten()
# each loop iteration produces the following values:
# xi_fw: xi generated fom zi by forwards walk
# zi_fw: zi generated fom xi by forwards walk
#.........这里部分代码省略.........
示例14: MultiStageModel
class MultiStageModel(object):
"""
Controller for training a multi-step iterative refinement model.
Parameters:
rng: numpy.random.RandomState (for reproducibility)
x_in: the input data to encode
x_out: the target output to decode
p_s0_given_z: InfNet for initializing "canvas" state
p_hi_given_si: InfNet for hi given si
p_sip1_given_si_hi: HydraNet for sip1 given si and hi
q_z_given_x: InfNet for z given x
q_hi_given_x_si: InfNet for hi given x and si
obs_dim: dimension of the observations to generate
z_dim: dimension of the "initial" latent space
h_dim: dimension of the "primary" latent space
ir_steps: number of "iterative refinement" steps to perform
params: REQUIRED PARAMS SHOWN BELOW
x_type: can be "bernoulli" or "gaussian"
obs_transform: can be 'none' or 'sigmoid'
"""
def __init__(self, rng=None, \
x_in=None, x_out=None, \
p_s0_given_z=None, \
p_hi_given_si=None, \
p_sip1_given_si_hi=None, \
q_z_given_x=None, \
q_hi_given_x_si=None, \
obs_dim=None, \
z_dim=None, h_dim=None, \
ir_steps=4, params=None, \
shared_param_dicts=None):
# setup a rng for this GIPair
self.rng = RandStream(rng.randint(100000))
# grab the user-provided parameters
self.params = params
self.x_type = self.params['x_type']
assert((self.x_type == 'bernoulli') or (self.x_type == 'gaussian'))
if 'obs_transform' in self.params:
assert((self.params['obs_transform'] == 'sigmoid') or \
(self.params['obs_transform'] == 'none'))
if self.params['obs_transform'] == 'sigmoid':
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
else:
self.obs_transform = lambda x: x
else:
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
if self.x_type == 'bernoulli':
self.obs_transform = lambda x: T.nnet.sigmoid(20.0 * T.tanh(0.05 * x))
self.shared_param_dicts = shared_param_dicts
# record the dimensions of various spaces relevant to this model
self.obs_dim = obs_dim
self.z_dim = z_dim
self.h_dim = h_dim
self.ir_steps = ir_steps
# grab handles to the relevant InfNets
self.q_z_given_x = q_z_given_x
self.q_hi_given_x_si = q_hi_given_x_si
self.p_s0_given_z = p_s0_given_z
self.p_hi_given_si = p_hi_given_si
self.p_sip1_given_si_hi = p_sip1_given_si_hi
# record the symbolic variables that will provide inputs to the
# computation graph created to describe this MultiStageModel
self.x_in = x_in
self.x_out = x_out
self.hi_zmuv = T.tensor3() # for ZMUV Gaussian samples to use in scan
# setup switching variable for changing between sampling/training
zero_ary = to_fX( np.zeros((1,)) )
self.train_switch = theano.shared(value=zero_ary, name='msm_train_switch')
self.set_train_switch(1.0)
# setup a variable for controlling dropout noise
self.drop_rate = theano.shared(value=zero_ary, name='msm_drop_rate')
self.set_drop_rate(0.0)
# this weight balances l1 vs. l2 penalty on posterior KLds
self.lam_kld_l1l2 = theano.shared(value=zero_ary, name='msm_lam_kld_l1l2')
self.set_lam_kld_l1l2(1.0)
if self.shared_param_dicts is None:
# initialize "optimizable" parameters specific to this MSM
init_vec = to_fX( np.zeros((self.z_dim,)) )
self.p_z_mean = theano.shared(value=init_vec, name='msm_p_z_mean')
self.p_z_logvar = theano.shared(value=init_vec, name='msm_p_z_logvar')
init_vec = to_fX( np.zeros((self.obs_dim,)) )
self.obs_logvar = theano.shared(value=zero_ary, name='msm_obs_logvar')
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
self.shared_param_dicts = {}
self.shared_param_dicts['p_z_mean'] = self.p_z_mean
self.shared_param_dicts['p_z_logvar'] = self.p_z_logvar
self.shared_param_dicts['obs_logvar'] = self.obs_logvar
else:
self.p_z_mean = self.shared_param_dicts['p_z_mean']
self.p_z_logvar = self.shared_param_dicts['p_z_logvar']
self.obs_logvar = self.shared_param_dicts['obs_logvar']
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
#.........这里部分代码省略.........
示例15: SimpleInfNet
class SimpleInfNet(object):
def __init__(self, rng, in_dim, out_dim, \
W_mean=None, b_mean=None, \
W_logvar=None, b_logvar=None, \
name="", W_scale=1.0):
# setup a shared random generator for this network
self.rng = RandStream(rng.randint(1000000))
# set some basic layer properties
self.in_dim = in_dim
self.out_dim = out_dim
# initialize weights and biases for mean estimate
if W_mean is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
if W_scale > 0.1:
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
W_init = W_init.astype(theano.config.floatX)
W_mean = theano.shared(value=W_init, \
name="{0:s}_W_mean".format(name))
if b_mean is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_mean = theano.shared(value=b_init, \
name="{0:s}_b_mean".format(name))
# grab handles for easy access
self.W_mean = W_mean
self.b_mean = b_mean
# initialize weights and biases for log-variance estimate
if W_logvar is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
#W_init = ortho_matrix(shape=W_shape, gain=W_scale)
W_init = W_init.astype(theano.config.floatX)
W_logvar = theano.shared(value=W_init, \
name="{0:s}_W_logvar".format(name))
if b_logvar is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_logvar = theano.shared(value=b_init, \
name="{0:s}_b_logvar".format(name))
# grab handles for easy access
self.W_logvar = W_logvar
self.b_logvar = b_logvar
# Conveniently package layer parameters
self.mlp_params = [self.W_mean, self.b_mean, \
self.W_logvar, self.b_logvar]
# Layer construction complete...
return
def get_bias(self):
"""
Get the bias at output layer.
"""
out_bias = self.b_mean
return out_bias
def apply(self, x, do_samples=True):
"""
Apply this SimpleInfNet to some input.
"""
z_mean = T.dot(x, self.W_mean) + self.b_mean
z_logvar = T.dot(x, self.W_logvar) + self.b_logvar
z_samples = z_mean + ( (T.exp(0.5*z_logvar)) * \
DCG(self.rng.normal(size=z_mean.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX)) )
# wrap them up for easy returnage
result = [z_mean, z_logvar]
if do_samples:
result.append(z_samples)
return result