本文整理汇总了Python中theano.sandbox.cuda.rng_curand.CURAND_RandomStreams.normal方法的典型用法代码示例。如果您正苦于以下问题:Python CURAND_RandomStreams.normal方法的具体用法?Python CURAND_RandomStreams.normal怎么用?Python CURAND_RandomStreams.normal使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类theano.sandbox.cuda.rng_curand.CURAND_RandomStreams
的用法示例。
在下文中一共展示了CURAND_RandomStreams.normal方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compare_speed
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
def compare_speed():
# To run this speed comparison
# cd <directory of this file>
# THEANO_FLAGS=device=gpu \
# python -c 'import test_rng_curand; test_rng_curand.compare_speed()'
mrg = MRG_RandomStreams()
crn = CURAND_RandomStreams(234)
N = 1000 * 100
dest = theano.shared(numpy.zeros(N, dtype=theano.config.floatX))
mrg_u = theano.function([], [], updates={dest: mrg.uniform((N,))},
profile='mrg uniform')
crn_u = theano.function([], [], updates={dest: crn.uniform((N,))},
profile='crn uniform')
mrg_n = theano.function([], [], updates={dest: mrg.normal((N,))},
profile='mrg normal')
crn_n = theano.function([], [], updates={dest: crn.normal((N,))},
profile='crn normal')
for f in mrg_u, crn_u, mrg_n, crn_n:
# don't time the first call, it has some startup cost
print('DEBUGPRINT')
print('----------')
theano.printing.debugprint(f)
for i in range(100):
for f in mrg_u, crn_u, mrg_n, crn_n:
# don't time the first call, it has some startup cost
f.fn.time_thunks = (i > 0)
f()
示例2: check_normal_basic
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
def check_normal_basic(shape_as_symbolic, dim_as_symbolic=False):
"""
check_normal_basic(shape_as_symbolic, dim_as_symbolic=False)
Runs a basic sanity check on the `normal` method of a
`CURAND_RandomStreams` object.
Checks that variates
* have a mean in the right neighbourhood (near 0)
* are of the specified shape
* successive calls produce different arrays of variates
Parameters
----------
shape_as_symbolic : boolean
If `True`, est the case that the shape tuple is a symbolic
variable rather than known at compile-time.
dim_as_symbolic : boolean
If `True`, test the case that an element of the shape
tuple is a Theano symbolic. Irrelevant if `shape_as_symbolic`
is `True`.
"""
rng = CURAND_RandomStreams(234)
if shape_as_symbolic:
# instantiate a TensorConstant with the value (10, 10)
shape = constant((10, 10))
else:
if dim_as_symbolic:
# Only one dimension is symbolic, with the others known
shape = (10, constant(10))
else:
shape = (10, 10)
u0 = rng.normal(shape)
u1 = rng.normal(shape)
f0 = theano.function([], u0, mode=mode_with_gpu)
f1 = theano.function([], u1, mode=mode_with_gpu)
v0list = [f0() for i in range(3)]
v1list = [f1() for i in range(3)]
# print v0list
# print v1list
# assert that elements are different in a few ways
assert numpy.all(v0list[0] != v0list[1])
assert numpy.all(v1list[0] != v1list[1])
assert numpy.all(v0list[0] != v1list[0])
for v in v0list:
assert v.shape == (10, 10)
assert v.min() < v.max()
assert -.5 <= v.mean() <= .5
示例3: DiscLayer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class DiscLayer(object):
def __init__(self, rng, input, in_dim, W=None, b=None, W_scale=1.0):
# Setup a shared random generator for this layer
self.rng = RandStream(rng.randint(1000000))
self.input = input
self.in_dim = in_dim
# Get some random initial weights and biases, if not given
if W is None:
# Generate random initial filters in a typical way
W_init = 1.0 * np.asarray(rng.normal( \
size=(self.in_dim, 1)), \
dtype=theano.config.floatX)
W = theano.shared(value=(W_scale*W_init))
if b is None:
b_init = np.zeros((1,), dtype=theano.config.floatX)
b = theano.shared(value=b_init)
# Set layer weights and biases
self.W = W
self.b = b
# Compute linear "pre-activation" for this layer
self.linear_output = 20.0 * T.tanh((T.dot(self.input, self.W) + self.b) / 20.0)
# Apply activation function
self.output = self.linear_output
# Compute squared sum of outputs, for regularization
self.act_l2_sum = T.sum(self.output**2.0) / self.output.shape[0]
# Conveniently package layer parameters
self.params = [self.W, self.b]
# little layer construction complete...
return
def _noisy_params(self, P, noise_lvl=0.):
"""Noisy weights, like convolving energy surface with a gaussian."""
P_nz = P + DCG(self.rng.normal(size=P.shape, avg=0.0, std=noise_lvl, \
dtype=theano.config.floatX))
return P_nz
示例4: GenConvModule
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class GenConvModule(object):
"""
Module of one "fractionally strided" convolution layer followed by one
regular convolution layer. Inputs to the fractionally strided convolution
can optionally be augmented with some random values.
Params:
filt_shape: shape for convolution filters -- should be square and odd
in_chans: number of channels in the inputs to module
out_chans: number of channels in the outputs from module
rand_chans: number of random channels to augment input
use_rand: flag for whether or not to augment inputs
apply_bn_1: flag for whether to batch normalize following first conv
apply_bn_2: flag for whether to batch normalize following second conv
us_stride: upsampling ratio in the fractionally strided convolution
use_pooling: whether to use unpooling or fractional striding
init_func: function for initializing module parameters
mod_name: text name for identifying module in theano graph
rand_type: whether to use Gaussian or uniform randomness
"""
def __init__(self, filt_shape, in_chans, out_chans, rand_chans,
use_rand=True, apply_bn_1=True, apply_bn_2=True,
us_stride=2, use_pooling=True,
init_func=None, mod_name='gm_conv',
rand_type='normal'):
assert ((filt_shape[0] % 2) > 0), "filter dim should be odd (not even)"
self.filt_dim = filt_shape[0]
self.in_chans = in_chans
self.out_chans = out_chans
self.rand_chans = rand_chans
self.use_rand = use_rand
self.apply_bn_1 = apply_bn_1
self.apply_bn_2 = apply_bn_2
self.us_stride = us_stride
self.use_pooling = use_pooling
self.mod_name = mod_name
self.rand_type = rand_type
self.rng = RandStream(123)
if init_func is None:
self.init_func = inits.Normal(scale=0.02)
else:
self.init_func = init_func
self._init_params() # initialize parameters
return
def _init_params(self):
"""
Initialize parameters for the layers in this generator module.
"""
if self.use_rand:
# random values will be stacked on exogenous input
self.w1 = self.init_func((self.out_chans, (self.in_chans+self.rand_chans), self.filt_dim, self.filt_dim),
"{}_w1".format(self.mod_name))
else:
# random values won't be stacked on exogenous input
self.w1 = self.init_func((self.out_chans, self.in_chans, self.filt_dim, self.filt_dim),
"{}_w1".format(self.mod_name))
self.w2 = self.init_func((self.out_chans, self.out_chans, self.filt_dim, self.filt_dim),
"{}_w2".format(self.mod_name))
self.params = [self.w1, self.w2]
# make gains and biases for transforms that will get batch normed
if self.apply_bn_1:
gain_ifn = inits.Normal(loc=1., scale=0.02)
bias_ifn = inits.Constant(c=0.)
self.g1 = gain_ifn((self.out_chans), "{}_g1".format(self.mod_name))
self.b1 = bias_ifn((self.out_chans), "{}_b1".format(self.mod_name))
self.params.extend([self.g1, self.b1])
if self.apply_bn_2:
gain_ifn = inits.Normal(loc=1., scale=0.02)
bias_ifn = inits.Constant(c=0.)
self.g2 = gain_ifn((self.out_chans), "{}_g2".format(self.mod_name))
self.b2 = bias_ifn((self.out_chans), "{}_b2".format(self.mod_name))
self.params.extend([self.g2, self.b2])
return
def apply(self, input, rand_vals=None):
"""
Apply this generator module to some input.
"""
batch_size = input.shape[0]
bm = int((self.filt_dim - 1) / 2) # use "same" mode convolutions
ss = self.us_stride # stride for "learned upsampling"
if self.use_pooling:
# "unpool" the input if desired
input = input.repeat(ss, axis=2).repeat(ss, axis=3)
# get shape for random values that will augment input
rand_shape = (batch_size, self.rand_chans, input.shape[2], input.shape[3])
if self.use_rand:
# augment input with random channels
if rand_vals is None:
if self.rand_type == 'normal':
rand_vals = self.rng.normal(size=rand_shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX)
else:
rand_vals = self.rng.uniform(size=rand_shape, low=-1.0, high=1.0, \
dtype=theano.config.floatX)
rand_vals = rand_vals.reshape(rand_shape)
# stack random values on top of input
full_input = T.concatenate([rand_vals, input], axis=1)
else:
#.........这里部分代码省略.........
示例5: DAELayer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class DAELayer(object):
def __init__(self, rng, clean_input=None, fuzzy_input=None, \
in_dim=0, out_dim=0, activation=None, input_noise=0., \
W=None, b_h=None, b_v=None, W_scale=1.0):
# Setup a shared random generator for this layer
self.rng = RandStream(rng.randint(1000000))
# Grab the layer input and perturb it with some sort of noise. This
# is, afterall, a _denoising_ autoencoder...
self.clean_input = clean_input
self.noisy_input = self._get_noisy_input(fuzzy_input, input_noise)
# Set some basic layer properties
self.activation = activation
self.in_dim = in_dim
self.out_dim = out_dim
# Get some random initial weights and biases, if not given
if W is None:
W_init = np.asarray(1.0 * DCG(rng.standard_normal( \
size=(in_dim, out_dim)), dtype=theano.config.floatX))
W = theano.shared(value=(W_scale*W_init), name='W')
if b_h is None:
b_init = np.zeros((out_dim,), dtype=theano.config.floatX)
b_h = theano.shared(value=b_init, name='b_h')
if b_v is None:
b_init = np.zeros((in_dim,), dtype=theano.config.floatX)
b_v = theano.shared(value=b_init, name='b_v')
# Grab pointers to the now-initialized weights and biases
self.W = W
self.b_h = b_h
self.b_v = b_v
# Put the learnable/optimizable parameters into a list
self.params = [self.W, self.b_h, self.b_v]
# Beep boop... layer construction complete...
return
def compute_costs(self, lam_l1=None):
"""Compute reconstruction and activation sparsity costs."""
# Get noise-perturbed encoder/decoder parameters
W_nz = self._noisy_params(self.W, 0.01)
b_nz = self.b_h #self._noisy_params(self.b_h, 0.05)
# Compute hidden and visible activations
A_v, A_h = self._compute_activations(self.noisy_input, \
W_nz, b_nz, self.b_v)
# Compute reconstruction error cost
recon_cost = T.sum((self.clean_input - A_v)**2.0) / \
self.clean_input.shape[0]
# Compute sparsity penalty (over both population and lifetime)
row_l1_sum = T.sum(abs(row_normalize(A_h))) / A_h.shape[0]
col_l1_sum = T.sum(abs(col_normalize(A_h))) / A_h.shape[1]
sparse_cost = lam_l1[0] * (row_l1_sum + col_l1_sum)
return [recon_cost, sparse_cost]
def _compute_hidden_acts(self, X, W, b_h):
"""Compute activations of encoder (at hidden layer)."""
A_h = self.activation(T.dot(X, W) + b_h)
return A_h
def _compute_activations(self, X, W, b_h, b_v):
"""Compute activations of decoder (at visible layer)."""
A_h = self._compute_hidden_acts(X, W, b_h)
A_v = T.dot(A_h, W.T) + b_v
return [A_v, A_h]
def _noisy_params(self, P, noise_lvl=0.):
"""Noisy weights, like convolving energy surface with a gaussian."""
if noise_lvl > 1e-3:
P_nz = P + DCG(self.rng.normal(size=P.shape, avg=0.0, std=noise_lvl, \
dtype=theano.config.floatX))
else:
P_nz = P
return P_nz
def _get_noisy_input(self, input, p):
"""p is the probability of dropping elements of input."""
drop_rnd = self.rng.uniform(input.shape, low=0.0, high=1.0, \
dtype=theano.config.floatX)
drop_mask = drop_rnd > p
# Cast mask from int to float32, to keep things on GPU
noisy_input = input * DCG(drop_mask)
return noisy_input
示例6: TwoStageModel2
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
self.train_switch = theano.shared(value=zero_ary, name='tsm_train_switch')
self.set_train_switch(1.0)
if self.shared_param_dicts is None:
# initialize "optimizable" parameters specific to this TSM
init_vec = to_fX( np.zeros((1,self.z_dim)) )
self.p_z_mean = theano.shared(value=init_vec, name='tsm_p_z_mean')
self.p_z_logvar = theano.shared(value=init_vec, name='tsm_p_z_logvar')
self.obs_logvar = theano.shared(value=zero_ary, name='tsm_obs_logvar')
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
self.shared_param_dicts = {}
self.shared_param_dicts['p_z_mean'] = self.p_z_mean
self.shared_param_dicts['p_z_logvar'] = self.p_z_logvar
self.shared_param_dicts['obs_logvar'] = self.obs_logvar
else:
self.p_z_mean = self.shared_param_dicts['p_z_mean']
self.p_z_logvar = self.shared_param_dicts['p_z_logvar']
self.obs_logvar = self.shared_param_dicts['obs_logvar']
self.bounded_logvar = 8.0 * T.tanh((1.0/8.0) * self.obs_logvar)
##############################################
# Setup the TwoStageModels main computation. #
##############################################
print("Building TSM...")
# samples of "hidden" latent state (from q)
h_q_mean, h_q_logvar, h_q = \
self.q_h_given_x.apply(self.x_in, do_samples=True)
# samples of "prior" latent state (from q)
z_q_mean, z_q_logvar, z_q = \
self.q_z_given_h.apply(h_q, do_samples=True)
# samples of "prior" latent state (from p)
z_p_mean = self.p_z_mean.repeat(z_q.shape[0], axis=0)
z_p_logvar = self.p_z_logvar.repeat(z_q.shape[0], axis=0)
zmuv = self.rng.normal(size=z_q.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX)
z_p = (T.exp(0.5*z_p_logvar) * zmuv) + z_p_mean
# samples from z -- switched between q/p
self.z = (self.train_switch[0] * z_q) + \
((1.0 - self.train_switch[0]) * z_p)
# samples of "hidden" latent state (from p)
h_p_mean, h_p_logvar, h_p = \
self.p_h_given_z.apply(self.z, do_samples=True)
# samples from h -- switched between q/p
self.h = (self.train_switch[0] * h_q) + \
((1.0 - self.train_switch[0]) * h_p)
# compute KLds for "prior" and "hidden" latent distributions
self.kld_z_q2p = gaussian_kld(z_q_mean, z_q_logvar, \
z_p_mean, z_p_logvar)
self.kld_z_p2q = gaussian_kld(z_p_mean, z_p_logvar, \
z_q_mean, z_q_logvar)
self.kld_h_q2p = gaussian_kld(h_q_mean, h_q_logvar, \
h_p_mean, h_p_logvar)
self.kld_h_p2q = gaussian_kld(h_p_mean, h_p_logvar, \
h_q_mean, h_q_logvar)
# p_x_given_h generates an observation x conditioned on the "hidden"
# latent variables h.
self.x_gen, _ = self.p_x_given_h.apply(self.h, do_samples=False)
######################################################################
# ALL SYMBOLIC VARS NEEDED FOR THE OBJECTIVE SHOULD NOW BE AVAILABLE #
######################################################################
# shared var learning rate for generator and inferencer
zero_ary = to_fX( np.zeros((1,)) )
示例7: GPSImputer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
new_lam = zero_ary + lam_kld_q
self.lam_kld_q.set_value(to_fX(new_lam))
new_lam = zero_ary + lam_kld_g
self.lam_kld_g.set_value(to_fX(new_lam))
return
def set_lam_l2w(self, lam_l2w=1e-3):
"""
Set the relative strength of l2 regularization on network params.
"""
zero_ary = np.zeros((1,))
new_lam = zero_ary + lam_l2w
self.lam_l2w.set_value(to_fX(new_lam))
return
def set_train_switch(self, switch_val=0.0):
"""
Set the switch for changing between training and sampling behavior.
"""
if (switch_val < 0.5):
switch_val = 0.0
else:
switch_val = 1.0
zero_ary = np.zeros((1,))
new_val = zero_ary + switch_val
self.train_switch.set_value(to_fX(new_val))
return
def _construct_zi_zmuv(self, xi, br):
"""
Construct the necessary (symbolic) samples for computing through this
GPSImputer for input (sybolic) matrix xi.
"""
zi_zmuv = self.rng.normal( \
size=(self.imp_steps, xi.shape[0]*br, self.z_dim), \
avg=0.0, std=1.0, dtype=theano.config.floatX)
return zi_zmuv
def _construct_nll_costs(self, si, xo, xm):
"""
Construct the negative log-likelihood part of free energy.
"""
# average log-likelihood over the refinement sequence
xh = self._si_as_x(si)
xm_inv = 1.0 - xm # we will measure nll only where xm_inv is 1
if self.x_type == 'bernoulli':
ll_costs = log_prob_bernoulli(xo, xh, mask=xm_inv)
else:
ll_costs = log_prob_gaussian2(xo, xh, \
log_vars=self.bounded_logvar, mask=xm_inv)
nll_costs = -ll_costs.flatten()
return nll_costs
def _construct_kld_costs(self, p=1.0):
"""
Construct the policy KL-divergence part of cost to minimize.
"""
kld_pis = []
kld_qis = []
kld_gis = []
for i in range(self.imp_steps):
kld_pis.append(T.sum(self.kldi_p2q[i]**p, axis=1))
kld_qis.append(T.sum(self.kldi_q2p[i]**p, axis=1))
kld_gis.append(T.sum(self.kldi_p2g[i]**p, axis=1))
# compute the batch-wise costs
kld_pi = sum(kld_pis)
示例8: HiddenLayer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class HiddenLayer(object):
def __init__(self, rng, input, in_dim, out_dim, \
activation=None, pool_size=0, \
drop_rate=0., input_noise=0., bias_noise=0., \
W=None, b=None, \
use_bias=True, name=""):
# Setup a shared random generator for this layer
#self.srng = theano.tensor.shared_randomstreams.RandomStreams( \
# rng.randint(100000))
self.srng = CURAND_RandomStreams(rng.randint(1000000))
self.clean_input = input
# Add gaussian noise to the input (if desired)
if (input_noise > 1e-4):
self.fuzzy_input = input + \
(input_noise * self.srng.normal(size=input.shape, \
dtype=theano.config.floatX))
else:
self.fuzzy_input = input
# Apply masking noise to the input (if desired)
if (drop_rate > 1e-4):
self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate)
else:
self.noisy_input = self.fuzzy_input
# Set some basic layer properties
self.pool_size = pool_size
self.in_dim = in_dim
self.out_dim = out_dim
if self.pool_size <= 1:
self.filt_count = self.out_dim
else:
self.filt_count = self.out_dim * self.pool_size
self.pool_count = self.filt_count / max(self.pool_size, 1)
if activation:
self.activation = activation
else:
if self.pool_size <= 1:
self.activation = lambda x: relu_actfun(x)
else:
self.activation = lambda x: \
maxout_actfun(x, self.pool_size, self.filt_count)
# Get some random initial weights and biases, if not given
if W is None:
if self.pool_size <= 1:
# Generate random initial filters in a typical way
W_init = np.asarray(0.04 * rng.standard_normal( \
size=(self.in_dim, self.filt_count)), \
dtype=theano.config.floatX)
else:
# Generate groups of random filters to pool over such that
# intra-group correlations are stronger than inter-group
# correlations, to encourage pooling over similar filters...
filters = []
for g_num in range(self.pool_count):
g_filt = 0.01 * rng.standard_normal(size=(self.in_dim,1))
for f_num in range(self.pool_size):
f_filt = g_filt + (0.005 * rng.standard_normal( \
size=(self.in_dim,1)))
filters.append(f_filt)
W_init = np.hstack(filters).astype(theano.config.floatX)
W = theano.shared(value=W_init, name="{0:s}_W".format(name))
if b is None:
b_init = np.zeros((self.filt_count,), dtype=theano.config.floatX)
b = theano.shared(value=b_init, name="{0:s}_b".format(name))
# Set layer weights and biases
self.W = W
self.b = b
# Compute linear "pre-activation" for this layer
if use_bias:
self.linear_output = T.dot(self.noisy_input, self.W) + self.b
else:
self.linear_output = T.dot(self.noisy_input, self.W)
# Add noise to the pre-activation features (if desired)
self.noisy_linear = self.linear_output + \
(bias_noise * self.srng.normal(size=self.linear_output.shape, \
dtype=theano.config.floatX))
# Apply activation function
self.output = self.activation(self.noisy_linear)
# Compute some properties of the activations, probably to regularize
self.act_l2_sum = T.sum(self.output**2.) / self.output.size
self.row_l1_sum = T.sum(abs(row_normalize(self.output))) / \
self.output.shape[0]
self.col_l1_sum = T.sum(abs(col_normalize(self.output))) / \
self.output.shape[1]
# Conveniently package layer parameters
if use_bias:
self.params = [self.W, self.b]
#.........这里部分代码省略.........
示例9: GenNet
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
self.output = self._construct_post_samples() * self.output_mask
self.out_dim = self.mlp_layers[-1].out_dim
C_init = np.zeros((self.out_dim,self.out_dim)).astype(theano.config.floatX)
m_init = np.zeros((self.out_dim,)).astype(theano.config.floatX)
self.dist_mean = theano.shared(m_init, name='gn_dist_mean')
self.dist_cov = theano.shared(C_init, name='gn_dist_cov')
# Get simple regularization penalty to moderate activation dynamics
self.act_reg_cost = lam_l2a * self._act_reg_cost()
# Construct a sampler for drawing independent samples from this model's
# isotropic Gaussian prior, and a sampler for the model distribution.
self.sample_from_prior = self._construct_prior_sampler()
self.sample_from_model = self._construct_model_sampler()
# Construct a function for passing points from the latent/prior space
# through the transform induced by the current model parameters.
self.transform_prior = self._construct_transform_prior()
return
def _act_reg_cost(self):
"""
Apply L2 regularization to the activations in this network.
"""
act_sq_sums = []
for layer in self.mlp_layers:
act_sq_sums.append(layer.act_l2_sum)
full_act_sq_sum = T.sum(act_sq_sums)
return full_act_sq_sum
def _construct_post_samples(self):
"""
Draw a single sample from each of the approximate posteriors encoded
in self.output_mu and self.output_sigma.
"""
post_samples = self.output_mu + (self.output_sigma * \
self.rng.normal(size=self.output_sigma.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX))
return post_samples
def _construct_prior_sampler(self):
"""
Draw independent samples from this model's isotropic Gaussian prior.
"""
samp_count = T.lscalar()
prior_samples = self.prior_sigma * self.rng.normal( \
size=(samp_count, self.latent_dim), avg=0.0, std=1.0, \
dtype=theano.config.floatX)
prior_sampler = theano.function([samp_count], outputs=prior_samples)
return prior_sampler
def _construct_model_sampler(self):
"""
Draw independent samples from this model's distribution.
"""
samp_count = T.lscalar()
prior_samples = self.prior_sigma * self.rng.normal( \
size=(samp_count, self.latent_dim), avg=0.0, std=1.0, \
dtype=theano.config.floatX)
prior_sampler = theano.function([samp_count], outputs=self.output, \
givens={self.Xp: prior_samples})
return prior_sampler
def _construct_transform_prior(self):
"""
Apply the tranform induced by the current model parameters to some
set of points in the latent/prior space.
"""
feedforward = theano.function([self.Xp], outputs=self.output)
示例10: InfNet
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
"""
Pass input X through this InfNet and get the resulting Gaussian
conditional distribution.
"""
# pass activations through the shared layers
shared_acts = [X]
for layer in self.shared_layers:
r0, r1, layer_acts = layer.apply(shared_acts[-1])
shared_acts.append(layer_acts)
# pass activations through the mean estimating layers
mu_acts = [shared_acts[-1]]
for layer in self.mu_layers:
r0, r1, layer_acts = layer.apply(mu_acts[-1])
mu_acts.append(layer_acts)
layer_acts, r0, r1 = self.mu_layers[-1].apply(mu_acts[-2])
mu_acts[-1] = layer_acts # use linear output at last layer
# pass activations through the logvar estimating layers
sigma_acts = [shared_acts[-1]]
for layer in self.sigma_layers:
r0, r1, layer_acts = layer.apply(sigma_acts[-1])
sigma_acts.append(layer_acts)
layer_acts, r0, r1 = self.sigma_layers[-1].apply(sigma_acts[-2])
sigma_acts[-1] = layer_acts # use linear output at last layer
# construct the outputs we will want to access
output_mean = mu_acts[-1]
output_logvar = sigma_acts[-1]
# wrap them up for easy returnage
result = [output_mean, output_logvar]
if do_samples:
output_samples = output_mean + \
( (self.sigma_scale[0] * T.exp(0.5*output_logvar)) * \
self.rng.normal(size=output_mean.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX) )
result.append(output_samples)
return result
def apply_shared(self, X):
"""
Pass input X through this InfNet's shared layers.
"""
# pass activations through the shared layers
shared_acts = [X]
for layer in self.shared_layers:
r0, r1, layer_acts = layer.apply(shared_acts[-1])
shared_acts.append(layer_acts)
result = shared_acts[-1]
return result
def train_rica(self, X, lr, lam):
"""
CONSTRUCT FUNCTIONS FOR RICA PRETRAINING INPUT LAYER
"""
if self.rica_func is None:
l_rate = T.scalar()
lam_l1 = T.scalar()
X_in = T.matrix('in_X_in')
W_in = self.W_rica + self.rng.normal(size=self.W_rica.shape, \
avg=0.0, std=0.01, dtype=theano.config.floatX)
X_enc = X_in
H_rec = T.dot(X_enc, W_in)
X_rec = T.dot(H_rec, W_in.T)
recon_cost = T.sum((X_enc - X_rec)**2.0) / X_enc.shape[0]
spars_cost = lam_l1 * (T.sum(soft_abs(H_rec)) / H_rec.shape[0])
rica_cost = recon_cost + spars_cost
示例11: InfNet
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
self.sample_posterior = None
self.mean_posterior = None
return
def set_sigma_scale(self, sigma_scale=1.0):
"""
Set the posterior sigma rescaling shared parameter to some value.
"""
zero_ary = np.zeros((1,))
new_scale = zero_ary + sigma_scale
self.sigma_scale.set_value(new_scale.astype(theano.config.floatX))
return
def _act_reg_cost(self):
"""
Apply L2 regularization to the activations in each net.
"""
act_sq_sums = []
for layer in self.shared_layers:
act_sq_sums.append(layer.act_l2_sum)
for layer in self.mu_layers:
act_sq_sums.append(layer.act_l2_sum)
for layer in self.sigma_layers:
act_sq_sums.append(layer.act_l2_sum)
full_act_sq_sum = T.sum(act_sq_sums)
return full_act_sq_sum
def _construct_post_samples(self):
"""
Draw a single sample from each of the approximate posteriors encoded
in self.output_mean and self.output_sigma.
"""
post_samples = self.output_mean + (self.output_sigma * \
self.rng.normal(size=self.output_sigma.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX))
return post_samples
def _construct_kld_cost(self):
"""
Compute (analytically) the KL divergence between each approximate
posterior encoded by self.mu/self.sigma and the isotropic Gaussian
distribution with mean 0 and standard deviation self.prior_sigma.
"""
prior_mu = 0.0
prior_logvar = np.log(self.prior_sigma**2.0)
post_klds = gaussian_kld(self.output_mean, self.output_logvar, \
prior_mu, prior_logvar)
kld_cost = T.sum(post_klds, axis=1, keepdims=True)
return kld_cost
def _construct_sample_posterior(self):
"""
Construct a sampler that draws a single sample from the inferred
posterior for some set of inputs.
"""
psample = theano.function([self.Xd], \
outputs=self.output)
return psample
def init_biases(self, b_init=0.0, b_std=1e-2):
"""
Initialize the biases in all hidden layers to some constant.
"""
for layer in self.shared_layers:
b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init
b_vec = b_vec + (b_std * npr.randn(*b_vec.shape))
示例12: HiddenLayer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class HiddenLayer(object):
def __init__(self, rng, input, in_dim, out_dim, \
activation=None, pool_size=0, \
drop_rate=0., input_noise=0., bias_noise=0., \
W=None, b=None, b_in=None, s_in=None, \
name="", W_scale=1.0):
# Setup a shared random generator for this layer
self.rng = RandStream(rng.randint(1000000))
# setup parameters for controlling
zero_ary = np.zeros((1,)).astype(theano.config.floatX)
self.input_noise = theano.shared(value=(zero_ary+input_noise), \
name="{0:s}_input_noise".format(name))
self.bias_noise = theano.shared(value=(zero_ary+bias_noise), \
name="{0:s}_bias_noise".format(name))
self.drop_rate = theano.shared(value=(zero_ary+drop_rate), \
name="{0:s}_drop_rate".format(name))
# setup scale and bias params for the input
if b_in is None:
# input biases are always initialized to zero
ary = np.zeros((in_dim,), dtype=theano.config.floatX)
b_in = theano.shared(value=ary, name="{0:s}_b_in".format(name))
if s_in is None:
# input scales are always initialized to one
ary = 0.541325 * np.ones((in_dim,), dtype=theano.config.floatX)
s_in = theano.shared(value=ary, name="{0:s}_s_in".format(name))
self.b_in = b_in
self.s_in = s_in
# Set some basic layer properties
self.pool_size = pool_size
self.in_dim = in_dim
self.out_dim = out_dim
if self.pool_size <= 1:
self.filt_count = self.out_dim
else:
self.filt_count = self.out_dim * self.pool_size
self.pool_count = self.filt_count / max(self.pool_size, 1)
if activation is None:
activation = relu_actfun
if self.pool_size <= 1:
self.activation = activation
else:
self.activation = lambda x: \
maxout_actfun(x, self.pool_size, self.filt_count)
# Get some random initial weights and biases, if not given
if W is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.filt_count)
#W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
#W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
W_init = ortho_matrix(shape=(self.in_dim, self.filt_count), \
gain=W_scale)
#W_init = 0.01 * npr.normal(0.0, 1.0, W_shape)
W_init = W_init.astype(theano.config.floatX)
W = theano.shared(value=W_init, name="{0:s}_W".format(name))
if b is None:
b_init = np.zeros((self.filt_count,), dtype=theano.config.floatX)
b = theano.shared(value=b_init, name="{0:s}_b".format(name))
# Set layer weights and biases
self.W = W
self.b = b
# Feedforward through the layer
use_in = input_noise > 0.001
use_bn = bias_noise > 0.001
use_drop = drop_rate > 0.001
self.linear_output, self.noisy_linear, self.output = \
self.apply(input, use_in=use_in, use_bn=use_bn, \
use_drop=use_drop)
# Compute some properties of the activations, probably to regularize
self.act_l2_sum = T.sum(self.noisy_linear**2.) / self.output.size
# Conveniently package layer parameters
self.params = [self.W, self.b, self.b_in, self.s_in]
self.shared_param_dicts = { \
'W': self.W, \
'b': self.b, \
'b_in': self.b_in, \
's_in': self.s_in }
# Layer construction complete...
return
def apply(self, input, use_in=False, use_bn=False, use_drop=False):
"""
Apply feedforward to this input, returning several partial results.
"""
# Add gaussian noise to the input (if desired)
#fancy_input = T.nnet.softplus(self.s_in) * (input + self.b_in)
fancy_input = input
if use_in:
fuzzy_input = fancy_input + self.input_noise[0] * \
self.rng.normal(size=fancy_input.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX)
else:
#.........这里部分代码省略.........
示例13: SimpleInfNet
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class SimpleInfNet(object):
def __init__(self, rng, in_dim, out_dim, \
W_mean=None, b_mean=None, \
W_logvar=None, b_logvar=None, \
name="", W_scale=1.0):
# setup a shared random generator for this network
self.rng = RandStream(rng.randint(1000000))
# set some basic layer properties
self.in_dim = in_dim
self.out_dim = out_dim
# initialize weights and biases for mean estimate
if W_mean is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
if W_scale > 0.1:
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
W_init = W_init.astype(theano.config.floatX)
W_mean = theano.shared(value=W_init, \
name="{0:s}_W_mean".format(name))
if b_mean is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_mean = theano.shared(value=b_init, \
name="{0:s}_b_mean".format(name))
# grab handles for easy access
self.W_mean = W_mean
self.b_mean = b_mean
# initialize weights and biases for log-variance estimate
if W_logvar is None:
# Generate initial filters using orthogonal random trick
W_shape = (self.in_dim, self.out_dim)
W_scale = W_scale * (1.0 / np.sqrt(self.in_dim))
W_init = W_scale * npr.normal(0.0, 1.0, W_shape)
#W_init = ortho_matrix(shape=W_shape, gain=W_scale)
W_init = W_init.astype(theano.config.floatX)
W_logvar = theano.shared(value=W_init, \
name="{0:s}_W_logvar".format(name))
if b_logvar is None:
b_init = np.zeros((self.out_dim,), \
dtype=theano.config.floatX)
b_logvar = theano.shared(value=b_init, \
name="{0:s}_b_logvar".format(name))
# grab handles for easy access
self.W_logvar = W_logvar
self.b_logvar = b_logvar
# Conveniently package layer parameters
self.mlp_params = [self.W_mean, self.b_mean, \
self.W_logvar, self.b_logvar]
# Layer construction complete...
return
def get_bias(self):
"""
Get the bias at output layer.
"""
out_bias = self.b_mean
return out_bias
def apply(self, x, do_samples=True):
"""
Apply this SimpleInfNet to some input.
"""
z_mean = T.dot(x, self.W_mean) + self.b_mean
z_logvar = T.dot(x, self.W_logvar) + self.b_logvar
z_samples = z_mean + ( (T.exp(0.5*z_logvar)) * \
DCG(self.rng.normal(size=z_mean.shape, avg=0.0, std=1.0, \
dtype=theano.config.floatX)) )
# wrap them up for easy returnage
result = [z_mean, z_logvar]
if do_samples:
result.append(z_samples)
return result
示例14: ConvPoolLayer
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
class ConvPoolLayer(object):
"""
A simple convolution --> max-pooling layer.
The (symbolic) input to this layer must be a theano.tensor.dtensor4 shaped
like (batch_size, chan_count, im_dim_1, im_dim_2).
filt_def should be a 4-tuple like (filt_count, in_chans, filt_def_1, filt_def_2)
pool_def should be a 3-tuple like (pool_dim, pool_stride)
"""
def __init__(self, rng, input=None, filt_def=None, pool_def=(2, 2), \
activation=None, drop_rate=0., input_noise=0., bias_noise=0., \
W=None, b=None, name="", W_scale=1.0):
# Setup a shared random generator for this layer
#self.rng = theano.tensor.shared_randomstreams.RandomStreams( \
# rng.randint(100000))
self.rng = CURAND_RandomStreams(rng.randint(1000000))
self.clean_input = input
# Add gaussian noise to the input (if desired)
if (input_noise > 1e-4):
self.fuzzy_input = input + self.rng.normal(size=input.shape, \
avg=0.0, std=input_noise, dtype=theano.config.floatX)
else:
self.fuzzy_input = input
# Apply masking noise to the input (if desired)
if (drop_rate > 1e-4):
self.noisy_input = self._drop_from_input(self.fuzzy_input, drop_rate)
else:
self.noisy_input = self.fuzzy_input
# Set the activation function for the conv filters
if activation:
self.activation = activation
else:
self.activation = lambda x: relu_actfun(x)
# initialize weights with random weights
W_init = 0.01 * np.asarray(rng.normal( \
size=filt_def), dtype=theano.config.floatX)
self.W = theano.shared(value=(W_scale*W_init), \
name="{0:s}_W".format(name))
# the bias is a 1D tensor -- one bias per output feature map
b_init = np.zeros((filt_def[0],), dtype=theano.config.floatX) + 0.1
self.b = theano.shared(value=b_init, name="{0:s}_b".format(name))
# convolve input feature maps with filters
input_c01b = self.noisy_input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
filters_c01b = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
conv_op = FilterActs(stride=1, partial_sum=1)
contig_input = gpu_contiguous(input_c01b)
contig_filters = gpu_contiguous(filters_c01b)
conv_out_c01b = conv_op(contig_input, contig_filters)
if (bias_noise > 1e-4):
noisy_conv_out_c01b = conv_out_c01b + self.rng.normal( \
size=conv_out_c01b.shape, avg=0.0, std=bias_noise, \
dtype=theano.config.floatX)
else:
noisy_conv_out_c01b = conv_out_c01b
# downsample each feature map individually, using maxpooling
pool_op = MaxPool(ds=pool_def[0], stride=pool_def[1])
mp_out_c01b = pool_op(noisy_conv_out_c01b)
mp_out_bc01 = mp_out_c01b.dimshuffle(3, 0, 1, 2) # c01b to bc01
# add the bias term. Since the bias is a vector (1D array), we first
# reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
# thus be broadcasted across mini-batches and feature map
# width & height
self.noisy_linear_output = mp_out_bc01 + self.b.dimshuffle('x', 0, 'x', 'x')
self.linear_output = self.noisy_linear_output
self.output = self.activation(self.noisy_linear_output)
# store parameters of this layer
self.params = [self.W, self.b]
return
def _drop_from_input(self, input, p):
"""p is the probability of dropping elements of input."""
# get a drop mask that drops things with probability p
drop_rnd = self.rng.uniform(size=input.shape, low=0.0, high=1.0, \
dtype=theano.config.floatX)
drop_mask = drop_rnd > p
# get a scaling factor to keep expectations fixed after droppage
drop_scale = 1. / (1. - p)
# apply dropout mask and rescaling factor to the input
droppy_input = drop_scale * input * drop_mask
return droppy_input
def _noisy_params(self, P, noise_lvl=0.):
"""Noisy weights, like convolving energy surface with a gaussian."""
P_nz = P + self.rng.normal(size=P.shape, avg=0.0, std=noise_lvl, \
dtype=theano.config.floatX)
#.........这里部分代码省略.........
示例15: GenNet
# 需要导入模块: from theano.sandbox.cuda.rng_curand import CURAND_RandomStreams [as 别名]
# 或者: from theano.sandbox.cuda.rng_curand.CURAND_RandomStreams import normal [as 别名]
#.........这里部分代码省略.........
#self.output = self.mlp_layers[-1].noisy_linear
self.output = T.nnet.sigmoid(self.mlp_layers[-1].noisy_linear)
self.out_dim = self.mlp_layers[-1].out_dim
C_init = np.zeros((self.out_dim,self.out_dim)).astype(theano.config.floatX)
m_init = np.zeros((self.out_dim,)).astype(theano.config.floatX)
self.dist_mean = theano.shared(m_init, name='gn_dist_mean')
self.dist_cov = theano.shared(C_init, name='gn_dist_cov')
# Get simple regularization penalty to moderate activation dynamics
self.act_reg_cost = lam_l2a * self._act_reg_cost()
# Construct a sampler for drawing independent samples from this model's
# isotropic Gaussian prior, and a sampler for the model distribution.
self.sample_from_prior = self._construct_prior_sampler()
self.sample_from_model = self._construct_model_sampler()
# Construct a function for passing points from the latent/prior space
# through the transform induced by the current model parameters.
self.transform_prior = self._construct_transform_prior()
return
def _act_reg_cost(self):
"""
Apply L2 regularization to the activations in this network.
"""
act_sq_sums = []
for layer in self.mlp_layers:
act_sq_sums.append(layer.act_l2_sum)
full_act_sq_sum = T.sum(act_sq_sums)
return full_act_sq_sum
def _construct_prior_sampler(self):
"""
Draw independent samples from this model's isotropic Gaussian prior.
"""
samp_count = T.lscalar()
prior_samples = self.prior_sigma * self.rng.normal( \
size=(samp_count, self.latent_dim), avg=0.0, std=1.0, \
dtype=theano.config.floatX)
prior_sampler = theano.function([samp_count], outputs=prior_samples)
return prior_sampler
def _construct_model_sampler(self):
"""
Draw independent samples from this model's distribution.
"""
samp_count = T.lscalar()
prior_samples = self.prior_sigma * self.rng.normal( \
size=(samp_count, self.latent_dim), avg=0.0, std=1.0, \
dtype=theano.config.floatX)
prior_sampler = theano.function([samp_count], outputs=self.output, \
givens={self.Xp: prior_samples})
return prior_sampler
def _construct_transform_prior(self):
"""
Apply the tranform induced by the current model parameters to some
set of points in the latent/prior space.
"""
feedforward = theano.function([self.Xp], outputs=self.output)
return feedforward
def _batch_moments(self):
"""
Compute covariance and mean of the current sample outputs.
"""
mu = T.mean(self.output, axis=0, keepdims=True)
sigma = T.dot((self.output.T - mu.T), (self.output - mu))
return [mu, sigma]