本文整理汇总了Python中blocks.bricks.recurrent.LSTM.initialize方法的典型用法代码示例。如果您正苦于以下问题:Python LSTM.initialize方法的具体用法?Python LSTM.initialize怎么用?Python LSTM.initialize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.recurrent.LSTM
的用法示例。
在下文中一共展示了LSTM.initialize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
num_batches))
dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
100))
stream_train = DataStream(dataset=dataset_train)
stream_test = DataStream(dataset=dataset_test)
x = T.tensor3('x')
y = T.matrix('y')
# we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
# LSTM layer documentation for the explanation
x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
weights_init=IsotropicGaussian(),
biases_init=Constant(0.0))
lstm = LSTM(lstm_dim, name='lstm',
weights_init=IsotropicGaussian(),
biases_init=Constant(0.0))
h_to_o = Linear(lstm_dim, 1, name='h_to_o',
weights_init=IsotropicGaussian(),
biases_init=Constant(0.0))
x_transform = x_to_h.apply(x)
h, c = lstm.apply(x_transform)
# only values of hidden units of the last timeframe are used for
# the classification
y_hat = h_to_o.apply(h[-1])
y_hat = Logistic().apply(y_hat)
cost = BinaryCrossEntropy().apply(y, y_hat)
cost.name = 'cost'
lstm.initialize()
x_to_h.initialize()
h_to_o.initialize()
cg = ComputationGraph(cost)
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
step_rule=Adam())
test_monitor = DataStreamMonitoring(variables=[cost],
data_stream=stream_test, prefix="test")
train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
after_epoch=True)
main_loop = MainLoop(algorithm, stream_train,
extensions=[test_monitor, train_monitor,
FinishAfter(after_n_epochs=num_epochs),
Printing(), ProgressBar()])
main_loop.run()
print 'Learned weights:'
for layer in (x_to_h, lstm, h_to_o):
print "Layer '%s':" % layer.name
for param in layer.parameters:
print param.name, ': ', param.get_value()
print
示例2: __init__
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
x = tensor.tensor3('x', dtype=floatX)
y = tensor.tensor3('y', dtype=floatX)
x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size,
weights_init=IsotropicGaussian(), biases_init=Constant(0))
lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0))
lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size,
weights_init=IsotropicGaussian(), biases_init=Constant(0))
x_transform = x_to_lstm.apply(x)
h, c = lstm.apply(x_transform)
y_hat = lstm_to_output.apply(h)
y_hat = Logistic(name="y_hat").apply(y_hat)
self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat)
x_to_lstm.initialize()
lstm.initialize()
lstm_to_output.initialize()
self.computation_graph = ComputationGraph(self.cost)
示例3: __init__
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512):
self.hidden_size = hidden_size
self.input1_size = input1_size
self.input2_size = input2_size
self.lookup1_dim = lookup1_dim
self.lookup2_dim = lookup2_dim
x1 = tensor.lmatrix('durations')
x2 = tensor.lmatrix('syllables')
y = tensor.lmatrix('pitches')
lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1',
weights_init=initialization.Uniform(width=0.01),
biases_init=Constant(0))
lookup1.initialize()
lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2',
weights_init=initialization.Uniform(width=0.01),
biases_init=Constant(0))
lookup2.initialize()
merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size,
weights_init=initialization.Uniform(width=0.01),
biases_init=Constant(0))
merge.initialize()
recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(),
weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3)
recurrent_block.initialize()
linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size,
weights_init=initialization.Uniform(width=0.01),
biases_init=Constant(0))
linear.initialize()
softmax = NDimensionalSoftmax()
l1 = lookup1.apply(x1)
l2 = lookup2.apply(x2)
m = merge.apply(l1, l2)
h = recurrent_block.apply(m)
a = linear.apply(h)
y_hat = softmax.apply(a, extra_ndim=1)
# ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D)
self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean()
self.ComputationGraph = ComputationGraph(self.Cost)
self.Model = Model(y_hat)
示例4: example4
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def example4():
"""LSTM -> Plante lors de l'initialisation du lstm."""
x = tensor.tensor3('x')
dim=3
# gate_inputs = theano.function([x],x*4)
gate_inputs = Linear(input_dim=dim,output_dim=dim*4, name="linear",weights_init=initialization.Identity(), biases_init=Constant(2))
lstm = LSTM(dim=dim,activation=Tanh(), weights_init=IsotropicGaussian(), biases_init=Constant(0))
gate_inputs.initialize()
hg = gate_inputs.apply(x)
#print(gate_inputs.parameters)
#print(gate_inputs.parameters[1].get_value())
lstm.initialize()
h, cells = lstm.apply(hg)
print(lstm.parameters)
f = theano.function([x], h)
print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
print(f(4*np.ones((dim, 1, dim), dtype=theano.config.floatX)))
print("Good Job!")
# lstm_output =
#Initial State
h0 = tensor.matrix('h0')
c = tensor.matrix('cells')
h,c1 = lstm.apply(inputs=x, states=h0, cells=c) # lstm.apply(states=h0,cells=cells,inputs=gate_inputs)
f = theano.function([x, h0, c], h)
print("a")
print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
np.ones((1, 3), dtype=theano.config.floatX),
np.ones((1, 3), dtype=theano.config.floatX)))
示例5: build_theano_functions
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def build_theano_functions(self) :
#import pdb ; pdb.set_trace()
x = T.fmatrix('x')
s = T.fvector('s')
mu = T.fvector('mu')
mu = T.reshape(mu,(self.number_of_mix,1))
pi = T.fvector('pi')
lstm = LSTM(
dim=self.input_dim/4,
weights_init=IsotropicGaussian(0.5),
biases_init=Constant(1))
lstm.initialize()
h, c = lstm.apply(x)
h = h[0][0][-1]
LL = T.sum(pi*(1./(T.sqrt(2.*np.pi)*s))*T.exp(\
-0.5*(h-mu)**2/T.reshape(s,(self.number_of_mix,1))**2.).sum(axis=1))
cost = -T.log(LL)
#cg = ComputationGraph(cost)
#self.cg = cg
#parameters = cg.parameters
model = Model(cost)
self.model = model
parameters = model.parameters
grads = T.grad(cost, parameters)
updates = []
for i in range(len(grads)) :
updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
gradf = theano.function([x,s,mu,pi],[cost],updates=updates)
f = theano.function([x],[h])
return gradf, f
示例6: TestLSTM
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
class TestLSTM(unittest.TestCase):
def setUp(self):
self.lstm = LSTM(dim=3, weights_init=Constant(2),
biases_init=Constant(0))
self.lstm.initialize()
def test_one_step(self):
h0 = tensor.matrix('h0')
c0 = tensor.matrix('c0')
x = tensor.matrix('x')
h1, c1 = self.lstm.apply(x, h0, c0, iterate=False)
next_h = theano.function(inputs=[x, h0, c0], outputs=[h1])
h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
dtype=theano.config.floatX)
c0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
dtype=theano.config.floatX)
x_val = 0.1 * numpy.array([range(12), range(12, 24)],
dtype=theano.config.floatX)
W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)
# omitting biases because they are zero
activation = numpy.dot(h0_val, W_state_val) + x_val
def sigmoid(x):
return 1. / (1. + numpy.exp(-x))
i_t = sigmoid(activation[:, :3] + c0_val * W_cell_to_in)
f_t = sigmoid(activation[:, 3:6] + c0_val * W_cell_to_forget)
next_cells = f_t * c0_val + i_t * numpy.tanh(activation[:, 6:9])
o_t = sigmoid(activation[:, 9:12] +
next_cells * W_cell_to_out)
h1_val = o_t * numpy.tanh(next_cells)
assert_allclose(h1_val, next_h(x_val, h0_val, c0_val)[0],
rtol=1e-6)
def test_many_steps(self):
x = tensor.tensor3('x')
mask = tensor.matrix('mask')
h, c = self.lstm.apply(x, mask=mask, iterate=True)
calc_h = theano.function(inputs=[x, mask], outputs=[h])
x_val = (0.1 * numpy.asarray(
list(itertools.islice(itertools.permutations(range(12)), 0, 24)),
dtype=theano.config.floatX))
x_val = numpy.ones((24, 4, 12),
dtype=theano.config.floatX) * x_val[:, None, :]
mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
mask_val[12:24, 3] = 0
h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
c_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)
def sigmoid(x):
return 1. / (1. + numpy.exp(-x))
for i in range(1, 25):
activation = numpy.dot(h_val[i-1], W_state_val) + x_val[i-1]
i_t = sigmoid(activation[:, :3] + c_val[i-1] * W_cell_to_in)
f_t = sigmoid(activation[:, 3:6] + c_val[i-1] * W_cell_to_forget)
c_val[i] = f_t * c_val[i-1] + i_t * numpy.tanh(activation[:, 6:9])
o_t = sigmoid(activation[:, 9:12] +
c_val[i] * W_cell_to_out)
h_val[i] = o_t * numpy.tanh(c_val[i])
h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
(1 - mask_val[i - 1, :, None]) * h_val[i - 1])
c_val[i] = (mask_val[i - 1, :, None] * c_val[i] +
(1 - mask_val[i - 1, :, None]) * c_val[i - 1])
h_val = h_val[1:]
assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)
# Also test that initial state is a parameter
initial1, initial2 = VariableFilter(roles=[INITIAL_STATE])(
ComputationGraph(h))
assert is_shared_variable(initial1)
assert is_shared_variable(initial2)
assert {initial1.name, initial2.name} == {
'initial_state', 'initial_cells'}
示例7: build_theano_functions
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def build_theano_functions(self):
x = T.fmatrix('time_sequence')
x = x.reshape((self.batch_dim, self.sequence_dim, self.time_dim))
y = x[:,1:self.sequence_dim,:]
x = x[:,:self.sequence_dim-1,:]
# if we try to include the spectrogram features
spec_dims = 0
if self.image_size is not None :
print "Convolution activated"
self.init_conv()
spec = T.ftensor4('spectrogram')
spec_features, spec_dims = self.conv.build_conv_layers(spec)
print "Conv final dims =", spec_dims
spec_dims = np.prod(spec_dims)
spec_features = spec_features.reshape(
(self.batch_dim, self.sequence_dim-1, spec_dims))
x = T.concatenate([x, spec_features], axis=2)
layers_input = [x]
dims =np.array([self.time_dim + spec_dims])
for dim in self.lstm_layers_dim :
dims = np.append(dims, dim)
print "Dimensions =", dims
# layer is just an index of the layer
for layer in range(len(self.lstm_layers_dim)) :
# before the cell, input, forget and output gates, x needs to
# be transformed
linear = Linear(dims[layer],
dims[layer+1]*4,
weights_init=Orthogonal(self.orth_scale),
biases_init=Constant(0),
name="linear"+str(layer))
linear.initialize()
lstm_input = linear.apply(layers_input[layer])
# the lstm wants batch X sequence X time
lstm = LSTM(
dim=dims[layer+1],
weights_init=IsotropicGaussian(mean=0.,std=0.5),
biases_init=Constant(1),
name="lstm"+str(layer))
lstm.initialize()
# hack to use Orthogonal on lstm w_state
lstm.W_state.set_value(
self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
h, _dummy = lstm.apply(lstm_input)
layers_input.append(h)
# this is where Alex Graves' paper starts
print "Last linear transform dim :", dims[1:].sum()
output_transform = Linear(dims[1:].sum(),
self.output_dim,
weights_init=Orthogonal(self.orth_scale),
use_bias=False,
name="output_transform")
output_transform.initialize()
if len(self.lstm_layers_dim) == 1 :
print "hallo there, only one layer speaking"
y_hat = output_transform.apply(layers_input[-1])
else :
y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))
# transforms to find each gmm params (mu, pi, sig)
# small hack to softmax a 3D tensor
pis = T.reshape(
T.nnet.softmax(
T.reshape(y_hat[:,:,:self.gmm_dim], ((self.sequence_dim-1)*self.batch_dim, self.gmm_dim))),
(self.batch_dim, (self.sequence_dim-1), self.gmm_dim))
sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6
mus = y_hat[:,:,self.gmm_dim*2:]
pis = pis[:,:,:,np.newaxis]
mus = mus[:,:,:,np.newaxis]
sig = sig[:,:,:,np.newaxis]
y = y[:,:,np.newaxis,:]
y = T.patternbroadcast(y, (False, False, True, False))
mus = T.patternbroadcast(mus, (False, False, False, True))
sig = T.patternbroadcast(sig, (False, False, False, True))
# sum likelihood with targets
# see blog for this crazy Pr() = sum log sum prod
# axes :: (batch, sequence, mixture, time)
expo_term = -0.5*((y-mus)**2)/sig**2
coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS))
#coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
sequences = coeff + expo_term
log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True)
log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)
LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean()
LL.name = "summed_likelihood"
model = Model(LL)
#.........这里部分代码省略.........
示例8: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main():
x = T.tensor3('features')
m = T.matrix('features_mask')
y = T.imatrix('targets')
#rnn = SimpleRecurrent(
#dim = 50,
#activation=Tanh(),
#weights_init = Uniform(std=0.01),
#biases_init = Constant(0.)
#)
#rnn = GatedRecurrent(
#dim = 50,
#activation=Tanh(),
#weights_init = Uniform(std=0.01),
#biases_init = Constant(0.)
#)
embedding_size = 300
#glove_version = "vectors.6B.100d.txt"
glove_version = "glove.6B.300d.txt"
#fork = Fork(weights_init=IsotropicGaussian(0.02),
#biases_init=Constant(0.),
#input_dim=embedding_size,
#output_dims=[embedding_size]*3,
#output_names=['inputs', 'reset_inputs', 'update_inputs']
#)
rnn = LSTM(
dim = embedding_size,
activation=Tanh(),
weights_init = IsotropicGaussian(std=0.02),
)
rnn.initialize()
#fork.initialize()
wstd = 0.02
score_layer = Linear(
input_dim = 128,
output_dim = 1,
weights_init = IsotropicGaussian(std=wstd),
biases_init = Constant(0.),
name="linear2")
score_layer.initialize()
gloveMapping = Linear(
input_dim = embedding_size,
output_dim = embedding_size,
weights_init = IsotropicGaussian(std=wstd),
biases_init = Constant(0.0),
name="gloveMapping"
)
gloveMapping.initialize()
o = gloveMapping.apply(x)
o = Rectifier(name="rectivfyglove").apply(o)
forget_bias = np.zeros((embedding_size*4), dtype=theano.config.floatX)
forget_bias[embedding_size:embedding_size*2] = 4.0
toLSTM = Linear(
input_dim = embedding_size,
output_dim = embedding_size*4,
weights_init = IsotropicGaussian(std=wstd),
biases_init = Constant(forget_bias),
#biases_init = Constant(0.0),
name="ToLSTM"
)
toLSTM.initialize()
rnn_states, rnn_cells = rnn.apply(toLSTM.apply(o) * T.shape_padright(m), mask=m)
#inputs, reset_inputs, update_inputs = fork.apply(x)
#rnn_states = rnn.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs, mask=m)
#rnn_out = rnn_states[:, -1, :]
rnn_out = (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1) / m.sum(axis=1).dimshuffle(0, 'x')
#rnn_out = (rnn_states).mean(axis=1)# / m.sum(axis=1)
hidden = Linear(
input_dim = embedding_size,
output_dim = 128,
weights_init = Uniform(std=0.01),
biases_init = Constant(0.))
hidden.initialize()
o = hidden.apply(rnn_out)
o = Rectifier().apply(o)
hidden = Linear(
input_dim = 128,
output_dim = 128,
weights_init = IsotropicGaussian(std=0.02),
biases_init = Constant(0.),
name="hiddenmap2")
hidden.initialize()
o = hidden.apply(o)
o = Rectifier(name="rec2").apply(o)
o = score_layer.apply(o)
#.........这里部分代码省略.........
示例9: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main():
x = T.imatrix('features')
m = T.matrix('features_mask')
y = T.imatrix('targets')
#x_int = x.astype(dtype='int32').T
x_int = x.T
train_dataset = IMDB('train')
n_voc = len(train_dataset.dict.keys())
n_h = 2
lookup = LookupTable(
length=n_voc+2,
dim = n_h*4,
weights_init = Uniform(std=0.01),
biases_init = Constant(0.)
)
lookup.initialize()
#rnn = SimpleRecurrent(
#dim = n_h,
#activation=Tanh(),
#weights_init = Uniform(std=0.01),
#biases_init = Constant(0.)
#)
rnn = LSTM(
dim = n_h,
activation=Tanh(),
weights_init = Uniform(std=0.01),
biases_init = Constant(0.)
)
rnn.initialize()
score_layer = Linear(
input_dim = n_h,
output_dim = 1,
weights_init = Uniform(std=0.01),
biases_init = Constant(0.))
score_layer.initialize()
embedding = lookup.apply(x_int) * T.shape_padright(m.T)
#embedding = lookup.apply(x_int) + m.T.mean()*0
#embedding = lookup.apply(x_int) + m.T.mean()*0
rnn_states = rnn.apply(embedding, mask=m.T)
#rnn_states, rnn_cells = rnn.apply(embedding)
rnn_out_mean_pooled = rnn_states[-1]
#rnn_out_mean_pooled = rnn_states.mean()
probs = Sigmoid().apply(
score_layer.apply(rnn_out_mean_pooled))
cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
cost.name = 'cost'
misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
misclassification.name = 'misclassification'
# =================
cg = ComputationGraph([cost])
params = cg.parameters
algorithm = GradientDescent(
cost = cost,
params=params,
step_rule = CompositeRule([
StepClipping(threshold=10),
Adam(),
#AdaDelta(),
])
)
# ========
test_dataset = IMDB('test')
batch_size = 64
n_train = train_dataset.num_examples
train_stream = DataStream(
dataset=train_dataset,
iteration_scheme=ShuffledScheme(
examples=n_train,
batch_size=batch_size)
)
train_padded = Padding(
data_stream=train_stream,
mask_sources=('features',)
#mask_sources=[]
)
test_stream = DataStream(
dataset=test_dataset,
iteration_scheme=ShuffledScheme(
examples=n_train,
batch_size=batch_size)
)
test_padded = Padding(
data_stream=test_stream,
mask_sources=('features',)
#.........这里部分代码省略.........
示例10: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main(num_epochs=100):
x = tensor.matrix('features')
m = tensor.matrix('features_mask')
x_int = x.astype(dtype='int32').T
train_dataset = TextFile('inspirational.txt')
train_dataset.indexables[0] = numpy.array(sorted(
train_dataset.indexables[0], key=len
))
n_voc = len(train_dataset.dict.keys())
init_probs = numpy.array(
[sum(filter(lambda idx:idx == w,
[s[0] for s in train_dataset.indexables[
train_dataset.sources.index('features')]]
)) for w in xrange(n_voc)],
dtype=theano.config.floatX
)
init_probs = init_probs / init_probs.sum()
n_h = 100
linear_embedding = LookupTable(
length=n_voc,
dim=4*n_h,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
linear_embedding.initialize()
rnn = LSTM(
dim=n_h,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
rnn.initialize()
score_layer = Linear(
input_dim=n_h,
output_dim=n_voc,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
score_layer.initialize()
embedding = (linear_embedding.apply(x_int[:-1])
* tensor.shape_padright(m.T[1:]))
rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
probs = softmax(
sequence_map(score_layer.apply, rnn_out[0], mask=m.T[1:])[0]
)
idx_mask = m.T[1:].nonzero()
cost = CategoricalCrossEntropy().apply(
x_int[1:][idx_mask[0], idx_mask[1]],
probs[idx_mask[0], idx_mask[1]]
)
cost.name = 'cost'
misclassification = MisclassificationRate().apply(
x_int[1:][idx_mask[0], idx_mask[1]],
probs[idx_mask[0], idx_mask[1]]
)
misclassification.name = 'misclassification'
cg = ComputationGraph([cost])
params = cg.parameters
algorithm = GradientDescent(
cost=cost,
params=params,
step_rule=CompositeRule(
[StepClipping(10.),
Adam()])
)
train_data_stream = Padding(
data_stream=DataStream(
dataset=train_dataset,
iteration_scheme=BatchwiseShuffledScheme(
examples=train_dataset.num_examples,
batch_size=10,
)
),
mask_sources=('features',)
)
model = Model(cost)
extensions = []
extensions.append(Timing())
extensions.append(FinishAfter(after_n_epochs=num_epochs))
extensions.append(TrainingDataMonitoring(
[cost, misclassification],
prefix='train',
after_epoch=True))
batch_size = 10
length = 30
trng = MRG_RandomStreams(18032015)
u = trng.uniform(size=(length, batch_size, n_voc))
gumbel_noise = -tensor.log(-tensor.log(u))
init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
#.........这里部分代码省略.........
示例11: build_theano_functions
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def build_theano_functions(self) :
x = T.ftensor3('x') # shape of input : batch X time X value
y = T.ftensor4('y')
layers_input = [x]
dims =np.array([self.time_dim])
for dim in self.lstm_layers_dim :
dims = np.append(dims, dim)
print "Dimensions =", dims
# layer is just an index of the layer
for layer in range(len(self.lstm_layers_dim)) :
# before the cell, input, forget and output gates, x needs to
# be transformed
linear = Linear(dims[layer],
dims[layer+1]*4,
weights_init=Orthogonal(self.orth_scale),
#weights_init=IsotropicGaussian(mean=1.,std=1),
biases_init=Constant(0),
name="linear"+str(layer))
linear.initialize()
lstm_input = linear.apply(layers_input[layer])
# the lstm wants batch X time X value
lstm = LSTM(
dim=dims[layer+1],
weights_init=IsotropicGaussian(mean=0.,std=0.5),
biases_init=Constant(1),
name="lstm"+str(layer))
lstm.initialize()
# hack to use Orthogonal on lstm w_state
lstm.W_state.set_value(
self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
h, _dummy = lstm.apply(lstm_input)
layers_input.append(h)
# this is where Alex Graves' paper starts
print "Last linear transform dim :", dims[1:].sum()
output_transform = Linear(dims[1:].sum(),
self.output_dim,
weights_init=Orthogonal(self.orth_scale),
#weights_init=IsotropicGaussian(mean=0., std=1),
use_bias=False,
name="output_transform")
output_transform.initialize()
if len(self.lstm_layers_dim) == 1 :
print "hallo there, only one layer speaking"
y_hat = output_transform.apply(layers_input[-1])
else :
y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))
# transforms to find each gmm params (mu, pi, sig)
# small hack to softmax a 3D tensor
#pis = T.reshape(
# T.nnet.softmax(
# T.nnet.sigmoid(
# T.reshape(y_hat[:,:,0:self.gmm_dim], (self.time_dim*self.batch_dim, self.gmm_dim)))),
# (self.batch_dim, self.time_dim, self.gmm_dim))
pis = T.reshape(
T.nnet.softmax(
T.reshape(y_hat[:,:,:self.gmm_dim], (self.sequence_dim*self.batch_dim, self.gmm_dim))),
(self.batch_dim, self.sequence_dim, self.gmm_dim))
sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6
#sig = T.nnet.relu(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+0.1
#mus = 2.*T.tanh(y_hat[:,:,self.gmm_dim*2:])
mus = y_hat[:,:,self.gmm_dim*2:]
pis = pis[:,:,:,np.newaxis]
mus = mus[:,:,:,np.newaxis]
sig = sig[:,:,:,np.newaxis]
#y = y[:,:,np.newaxis,:]
y = T.patternbroadcast(y, (False, False, True, False))
mus = T.patternbroadcast(mus, (False, False, False, True))
sig = T.patternbroadcast(sig, (False, False, False, True))
# sum likelihood with targets
# see blog for this crazy Pr() = sum log sum prod
# axes :: (batch, sequence, mixture, time)
expo_term = -0.5*((y-mus)**2)/sig**2
coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS))
#coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
sequences = coeff + expo_term
log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True)
log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)
LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean()
model = Model(LL)
self.model = model
parameters = model.parameters
grads = T.grad(LL, parameters)
updates = []
lr = T.scalar('lr')
for i in range(len(grads)) :
#updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
#.........这里部分代码省略.........
示例12: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main(num_epochs=100):
x = tensor.matrix('features')
m = tensor.matrix('features_mask')
y = tensor.imatrix('targets')
x_int = x.astype(dtype='int32').T
train_dataset = IMDB()
idx_sort = numpy.argsort(
[len(s) for s in
train_dataset.indexables[
train_dataset.sources.index('features')]]
)
n_voc = len(train_dataset.dict.keys())
for idx in xrange(len(train_dataset.sources)):
train_dataset.indexables[idx] = train_dataset.indexables[idx][idx_sort]
n_h = 100
linear_embedding = LookupTable(
length=n_voc,
dim=4 * n_h,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
linear_embedding.initialize()
lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
lstm_biases[n_h:(2 * n_h)] = 4.
rnn = LSTM(
dim=n_h,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
rnn.initialize()
score_layer = Linear(
input_dim=n_h,
output_dim=1,
weights_init=Uniform(std=0.01),
biases_init=Constant(0.)
)
score_layer.initialize()
embedding = linear_embedding.apply(x_int) * tensor.shape_padright(m.T)
rnn_out = rnn.apply(embedding)
rnn_out_mean_pooled = rnn_out[0][-1]
probs = Sigmoid().apply(
score_layer.apply(rnn_out_mean_pooled))
cost = - (y * tensor.log(probs)
+ (1 - y) * tensor.log(1 - probs)
).mean()
cost.name = 'cost'
misclassification = (y * (probs < 0.5)
+ (1 - y) * (probs > 0.5)
).mean()
misclassification.name = 'misclassification'
cg = ComputationGraph([cost])
params = cg.parameters
algorithm = GradientDescent(
cost=cost,
params=params,
step_rule=CompositeRule(
components=[StepClipping(threshold=10.),
Adam()
]
)
)
n_train = int(numpy.floor(.8 * train_dataset.num_examples))
n_valid = int(numpy.floor(.1 * train_dataset.num_examples))
train_data_stream = Padding(
data_stream=DataStream(
dataset=train_dataset,
iteration_scheme=BatchwiseShuffledScheme(
examples=range(n_train),
batch_size=10,
)
),
mask_sources=('features',)
)
valid_data_stream = Padding(
data_stream=DataStream(
dataset=train_dataset,
iteration_scheme=BatchwiseShuffledScheme(
examples=range(n_train, n_train + n_valid),
batch_size=10,
)
),
mask_sources=('features',)
)
test_data_stream = Padding(
data_stream=DataStream(
dataset=train_dataset,
iteration_scheme=BatchwiseShuffledScheme(
examples=range(n_train + n_valid,
train_dataset.num_examples),
batch_size=10,
)
#.........这里部分代码省略.........
示例13: build_theano_functions
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def build_theano_functions(self) :
x = T.ftensor3('x') # shape of input : batch X time X value
y = T.ftensor3('y')
z = T.ftensor3('z')
layers_input = [x]
dims =np.array([self.input_dim])
for dim in self.lstm_layers_dim :
dims = np.append(dims, dim)
print "Dimensions =", dims
# layer is just an index of the layer
for layer in range(len(self.lstm_layers_dim)) :
# before the cell, input, forget and output gates, x needs to
# be transformed
linear = Linear(dims[layer],
dims[layer+1]*4,
#weights_init=Uniform(mean=data_mean, std=1),
weights_init=IsotropicGaussian(mean=1.,std=1),
biases_init=Constant(0),
name="linear"+str(layer))
linear.initialize()
lstm_input = linear.apply(layers_input[layer])
# the lstm wants batch X time X value
lstm = LSTM(
dim=dims[layer+1],
weights_init=IsotropicGaussian(mean=0.,std=0.5),
biases_init=Constant(1),
name="lstm"+str(layer))
lstm.initialize()
# hack to use Orthogonal on lstm w_state
lstm.W_state.set_value(Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
h, _dummy = lstm.apply(lstm_input)
layers_input.append(h)
# the idea is to have one gaussian parametrize every frequency bin
print "Last linear transform dim :", dims[1:].sum()
output_transform = Linear(dims[1:].sum(),
self.output_dim,
weights_init=IsotropicGaussian(mean=0., std=1),
biases_init=Constant(0),
#use_bias=False,
name="output_transform")
output_transform.initialize()
if len(self.lstm_layers_dim) == 1 :
print "hallo there, only one layer speaking"
y_hat = output_transform.apply(layers_input[-1])
else :
y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))
sig = T.nnet.relu(y_hat[:,:,:self.output_dim/2])+0.05
mus = y_hat[:,:,self.output_dim/2:]
# sum likelihood with targets
# sum inside log accross mixtures, sum outside log accross time
inside_expo = -0.5*((y-mus)**2)/sig**2
expo = T.exp(inside_expo)
coeff = 1./(T.sqrt(2.*np.pi)*sig)
inside_log = T.log(coeff*expo)
inside_log_max = T.max(inside_log, axis=2, keepdims=True)
LL = -(inside_log_max + T.log(T.sum(T.exp(inside_log - inside_log_max), axis=2, keepdims=True))).sum()
#zinside_expo = -0.5*((z-mus)**2)/sig**2
#zexpo = T.exp(zinside_expo)
#zcoeff = pis*(1./(T.sqrt(2.*np.pi)*sig))
#zinside_log = (zcoeff*zexpo).sum(axis=2)
#zLL = -(T.log(zinside_log)).sum()
model = Model(LL)
self.model = model
parameters = model.parameters
grads = T.grad(LL, parameters)
updates = []
lr = T.scalar('lr')
for i in range(len(grads)) :
#updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
updates.append(tuple([parameters[i], parameters[i] - lr*grads[i]]))
#gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
if self.debug :
gradf = theano.function([x, y, lr],[LL, mus, sig],updates=updates)
else :
#gradf = theano.function([x, y, z],[zLL],updates=updates)
gradf = theano.function([x, y, lr],[LL],updates=updates)
f = theano.function([x],[sig, mus])
return gradf, f
示例14: build_theano_functions
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def build_theano_functions(self, data_mean, data_std) :
x = T.ftensor3('x') # shape of input : batch X time X value
y = T.ftensor3('y')
# before the cell, input, forget and output gates, x needs to
# be transformed
linear_transforms = []
for transform in ['c','i','f','o'] :
linear_transforms.append(
Linear(self.input_dim,
self.lstm_dim,
weights_init=Uniform(mean=data_mean, std=data_std),
#weights_init=IsotropicGaussian(mean=1.,std=1),
biases_init=Constant(data_mean),
name=transform+"_transform")
)
for transform in linear_transforms :
transform.initialize()
linear_applications = []
for transform in linear_transforms :
linear_applications.append(
transform.apply(x))
lstm_input = T.concatenate(linear_applications, axis=2)
# the lstm wants batch X time X value
lstm = LSTM(
dim=self.lstm_dim,
weights_init=IsotropicGaussian(mean=0.5,std=1),
biases_init=Constant(1))
lstm.initialize()
h, _dummy = lstm.apply(lstm_input)
# this is where Alex Graves' paper starts
output_transform = Linear(self.lstm_dim,
self.output_dim,
#weights_init=Uniform(mean=data_mean, std=data_std),
weights_init=IsotropicGaussian(mean=0., std=1),
biases_init=Constant(1),
name="output_transform")
output_transform.initialize()
y_hat = output_transform.apply(h)
# transforms to find each gmm params (mu, pi, sig)
#pis = NDimensionalSoftmax.apply(y_hat[:,:,0:self.gmm_dim])
# small hack to softmax a 3D tensor
pis = T.reshape(
T.nnet.softmax(
T.reshape(y_hat[:,:,0:self.gmm_dim], (self.time_dim*self.batch_dim, self.gmm_dim)))
, (self.batch_dim, self.time_dim, self.gmm_dim))
#sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])
sig = T.nnet.relu(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+0.1
mus = y_hat[:,:,self.gmm_dim*2:]
pis = pis[:,:,:,np.newaxis]
mus = mus[:,:,:,np.newaxis]
sig = sig[:,:,:,np.newaxis]
y = y[:,:,np.newaxis,:]
#sig=theano.printing.Print()(sig)
# sum likelihood with targets
# sum inside log accross mixtures, sum outside log accross time
#LL = -T.log((pis*(1./(T.sqrt(2.*np.pi)*sig))*T.exp(-0.5*((y-mus)**2)/sig**2)).sum(axis=2)).sum()
expo = T.exp(-0.5*((y-mus)**2)/sig**2)
test_expo = theano.function([x,y],[expo, mus, sig])
return test_expo
coeff = pis*(1./(T.sqrt(2.*np.pi)*sig))
inside_log = (coeff*expo).sum(axis=2)
LL = -(T.log(inside_log)).sum()
model = Model(LL)
self.model = model
parameters = model.parameters
grads = T.grad(LL, parameters)
updates = []
for i in range(len(grads)) :
updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))
#gradf = theano.function([x, y],[LL],updates=updates, mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
gradf = theano.function([x, y],[LL],updates=updates)
f = theano.function([x],[pis, sig, mus])
return gradf, f
示例15: main
# 需要导入模块: from blocks.bricks.recurrent import LSTM [as 别名]
# 或者: from blocks.bricks.recurrent.LSTM import initialize [as 别名]
def main(model_path, recurrent_type):
dataset_options = dict(dictionary=char2code, level="character",
preprocess=_lower)
dataset = OneBillionWord("training", [99], **dataset_options)
data_stream = dataset.get_example_stream()
data_stream = Filter(data_stream, _filter_long)
data_stream = Mapping(data_stream, _make_target,
add_sources=('target',))
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(100))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
features = tensor.lmatrix('features')
features_mask = tensor.matrix('features_mask')
target = tensor.lmatrix('target')
target_mask = tensor.matrix('target_mask')
dim = 100
lookup = LookupTable(len(all_chars), dim,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0.))
if recurrent_type == 'lstm':
rnn = LSTM(dim / 4, Tanh(),
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0.))
elif recurrent_type == 'simple':
rnn = SimpleRecurrent(dim, Tanh())
rnn = Bidirectional(rnn,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0.))
else:
raise ValueError('Not known RNN type')
rnn.initialize()
lookup.initialize()
y_hat = rnn.apply(lookup.apply(features), mask=features_mask)
print len(all_chars)
linear = Linear(2 * dim, len(all_chars),
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0.))
linear.initialize()
y_hat = linear.apply(y_hat)
seq_lenght = y_hat.shape[0]
batch_size = y_hat.shape[1]
y_hat = Softmax().apply(y_hat.reshape((seq_lenght * batch_size, -1))).reshape(y_hat.shape)
cost = CategoricalCrossEntropy().apply(
target.flatten(),
y_hat.reshape((-1, len(all_chars)))) * seq_lenght * batch_size
cost.name = 'cost'
cost_per_character = cost / features_mask.sum()
cost_per_character.name = 'cost_per_character'
cg = ComputationGraph([cost, cost_per_character])
model = Model(cost)
algorithm = GradientDescent(step_rule=Adam(), cost=cost,
params=cg.parameters)
train_monitor = TrainingDataMonitoring(
[cost, cost_per_character], prefix='train',
after_batch=True)
extensions = [train_monitor, Printing(every_n_batches=40),
Dump(model_path, every_n_batches=200),
#Checkpoint('rnn.pkl', every_n_batches=200)
]
main_loop = MainLoop(model=model, algorithm=algorithm,
data_stream=data_stream, extensions=extensions)
main_loop.run()