本文整理汇总了Python中blocks.bricks.parallel.Fork.initialize方法的典型用法代码示例。如果您正苦于以下问题:Python Fork.initialize方法的具体用法?Python Fork.initialize怎么用?Python Fork.initialize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.parallel.Fork
的用法示例。
在下文中一共展示了Fork.initialize方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: example2
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def example2():
"""GRU"""
x = tensor.tensor3('x')
dim = 3
fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0))
gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0))
fork.initialize()
gru.initialize()
linear, gate_inputs = fork.apply(x)
h = gru.apply(linear, gate_inputs)
f = theano.function([x], h)
print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
doubler = Linear(
input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2),
biases_init=initialization.Constant(0))
doubler.initialize()
lin, gate = fork.apply(doubler.apply(x))
h_doubler = gru.apply(lin,gate)
f = theano.function([x], h_doubler)
print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
示例2: build_fork_lookup
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def build_fork_lookup(vocab_size, args):
x = tensor.lmatrix('features')
virtual_dim = 6
time_length = 5
mini_batch_size = 2
skip_connections = True
layers = 3
# Build the model
output_names = []
output_dims = []
for d in range(layers):
if d > 0:
suffix = '_' + str(d)
else:
suffix = ''
if d == 0 or skip_connections:
output_names.append("inputs" + suffix)
output_dims.append(virtual_dim)
print output_names
print output_dims
lookup = LookupTable(length=vocab_size, dim=virtual_dim)
lookup.weights_init = initialization.IsotropicGaussian(0.1)
lookup.biases_init = initialization.Constant(0)
fork = Fork(output_names=output_names, input_dim=time_length,
output_dims=output_dims,
prototype=FeedforwardSequence(
[lookup.apply]))
# Return list of 3D Tensor, one for each layer
# (Batch X Time X embedding_dim)
pre_rnn = fork.apply(x)
fork.initialize()
f = theano.function([x], pre_rnn)
return f
示例3: build_model_vanilla
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
#.........这里部分代码省略.........
transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())
for _ in range(layers)]
rnn = RecurrentStack(transitions, skip_connections=skip_connections)
# If skip_connections: dim = layers * state_dim
# else: dim = state_dim
output_layer = Linear(
input_dim=skip_connections * layers *
state_dim + (1 - skip_connections) * state_dim,
output_dim=vocab_size, name="output_layer")
# Return list of 3D Tensor, one for each layer
# (Time X Batch X embedding_dim)
pre_rnn = fork.apply(x)
# Give a name to the input of each layer
if skip_connections:
for t in range(len(pre_rnn)):
pre_rnn[t].name = "pre_rnn_" + str(t)
else:
pre_rnn.name = "pre_rnn"
# Prepare inputs for the RNN
kwargs = OrderedDict()
init_states = {}
for d in range(layers):
if d > 0:
suffix = '_' + str(d)
else:
suffix = ''
if skip_connections:
kwargs['inputs' + suffix] = pre_rnn[d]
elif d == 0:
kwargs['inputs'] = pre_rnn
init_states[d] = theano.shared(
numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
name='state0_%d' % d)
kwargs['states' + suffix] = init_states[d]
# Apply the RNN to the inputs
h = rnn.apply(low_memory=True, **kwargs)
# We have
# h = [state, state_1, state_2 ...] if layers > 1
# h = state if layers == 1
# If we have skip connections, concatenate all the states
# Else only consider the state of the highest layer
last_states = {}
if layers > 1:
# Save all the last states
for d in range(layers):
last_states[d] = h[d][-1, :, :]
if skip_connections:
h = tensor.concatenate(h, axis=2)
else:
h = h[-1]
else:
last_states[0] = h[-1, :, :]
h.name = "hidden_state"
# The updates of the hidden states
updates = []
for d in range(layers):
updates.append((init_states[d], last_states[d]))
presoft = output_layer.apply(h[context:, :, :])
# Define the cost
# Compute the probability distribution
time, batch, feat = presoft.shape
presoft.name = 'presoft'
cross_entropy = Softmax().categorical_cross_entropy(
y[context:, :].flatten(),
presoft.reshape((batch * time, feat)))
cross_entropy = cross_entropy / tensor.log(2)
cross_entropy.name = "cross_entropy"
# TODO: add regularisation for the cost
# the log(1) is here in order to differentiate the two variables
# for monitoring
cost = cross_entropy + tensor.log(1)
cost.name = "regularized_cost"
# Initialize the model
logger.info('Initializing...')
fork.initialize()
rnn.weights_init = initialization.Orthogonal()
rnn.biases_init = initialization.Constant(0)
rnn.initialize()
output_layer.weights_init = initialization.IsotropicGaussian(0.1)
output_layer.biases_init = initialization.Constant(0)
output_layer.initialize()
return cost, cross_entropy, updates
示例4: get_prernn
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def get_prernn(args):
# time x batch
x_mask = tensor.fmatrix('mask')
# Compute the state dim
if args.rnn_type == 'lstm':
state_dim = 4 * args.state_dim
else:
state_dim = args.state_dim
# Prepare the arguments for the fork
output_names = []
output_dims = []
for d in range(args.layers):
if d > 0:
suffix = RECURRENTSTACK_SEPARATOR + str(d)
else:
suffix = ''
if d == 0 or args.skip_connections:
output_names.append("inputs" + suffix)
output_dims.append(state_dim)
# Prepare the brick to be forked (LookupTable or Linear)
# Check if the dataset provides indices (in the case of a
# fixed vocabulary, x is 2D tensor) or if it gives raw values
# (x is 3D tensor)
if has_indices(args.dataset):
features = args.mini_batch_size
x = tensor.lmatrix('features')
vocab_size = get_output_size(args.dataset)
lookup = LookupTable(length=vocab_size, dim=state_dim)
lookup.weights_init = initialization.IsotropicGaussian(0.1)
lookup.biases_init = initialization.Constant(0)
forked = FeedforwardSequence([lookup.apply])
if not has_mask(args.dataset):
x_mask = tensor.ones_like(x, dtype=floatX)
else:
x = tensor.tensor3('features', dtype=floatX)
if args.used_inputs is not None:
x = tensor.set_subtensor(x[args.used_inputs:, :, :],
tensor.zeros_like(x[args.used_inputs:,
:, :],
dtype=floatX))
features = get_output_size(args.dataset)
forked = Linear(input_dim=features, output_dim=state_dim)
forked.weights_init = initialization.IsotropicGaussian(0.1)
forked.biases_init = initialization.Constant(0)
if not has_mask(args.dataset):
x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX)
# Define the fork
fork = Fork(output_names=output_names, input_dim=features,
output_dims=output_dims,
prototype=forked)
fork.initialize()
# Apply the fork
prernn = fork.apply(x)
# Give a name to the input of each layer
if args.skip_connections:
for t in range(len(prernn)):
prernn[t].name = "pre_rnn_" + str(t)
else:
prernn.name = "pre_rnn"
return prernn, x_mask
示例5: build_fork_lookup
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def build_fork_lookup(vocab_size, time_length, args):
x = tensor.lmatrix('features')
virtual_dim = 6
state_dim = 6
skip_connections = False
layers = 1
# Build the model
output_names = []
output_dims = []
for d in range(layers):
if d > 0:
suffix = '_' + str(d)
else:
suffix = ''
if d == 0 or skip_connections:
output_names.append("inputs" + suffix)
output_dims.append(virtual_dim)
lookup = LookupTable(length=vocab_size, dim=virtual_dim)
lookup.weights_init = initialization.IsotropicGaussian(0.1)
lookup.biases_init = initialization.Constant(0)
fork = Fork(output_names=output_names, input_dim=time_length,
output_dims=output_dims,
prototype=FeedforwardSequence(
[lookup.apply]))
# Note that this order of the periods makes faster modules flow in slower
# ones with is the opposite of the original paper
transitions = [ClockworkBase(dim=state_dim, activation=Tanh(),
period=2 ** i) for i in range(layers)]
rnn = RecurrentStack(transitions, skip_connections=skip_connections)
# Return list of 3D Tensor, one for each layer
# (Batch X Time X embedding_dim)
pre_rnn = fork.apply(x)
# Give time as the first index for each element in the list:
# (Time X Batch X embedding_dim)
if layers > 1 and skip_connections:
for t in range(len(pre_rnn)):
pre_rnn[t] = pre_rnn[t].dimshuffle(1, 0, 2)
else:
pre_rnn = pre_rnn.dimshuffle(1, 0, 2)
f_pre_rnn = theano.function([x], pre_rnn)
# Prepare inputs for the RNN
kwargs = OrderedDict()
for d in range(layers):
if d > 0:
suffix = '_' + str(d)
else:
suffix = ''
if d == 0 or skip_connections:
if skip_connections:
kwargs['inputs' + suffix] = pre_rnn[d]
else:
kwargs['inputs' + suffix] = pre_rnn
print kwargs
# Apply the RNN to the inputs
h = rnn.apply(low_memory=True, **kwargs)
fork.initialize()
rnn.weights_init = initialization.Orthogonal()
rnn.biases_init = initialization.Constant(0)
rnn.initialize()
f_h = theano.function([x], h)
return f_pre_rnn, f_h
示例6: main
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def main():
nvis, nhid, nlat, learn_prior = 784, 200, 100, False
theano_rng = MRG_RandomStreams(134663)
# Initialize prior
prior_mu = shared_floatx(numpy.zeros(nlat), name='prior_mu')
prior_log_sigma = shared_floatx(numpy.zeros(nlat), name='prior_log_sigma')
if learn_prior:
add_role(prior_mu, PARAMETER)
add_role(prior_log_sigma, PARAMETER)
# Initialize encoding network
encoding_network = MLP(activations=[Rectifier()],
dims=[nvis, nhid],
weights_init=IsotropicGaussian(std=0.001),
biases_init=Constant(0))
encoding_network.initialize()
encoding_parameter_mapping = Fork(
output_names=['mu_phi', 'log_sigma_phi'], input_dim=nhid,
output_dims=dict(mu_phi=nlat, log_sigma_phi=nlat), prototype=Linear(),
weights_init=IsotropicGaussian(std=0.001), biases_init=Constant(0))
encoding_parameter_mapping.initialize()
# Initialize decoding network
decoding_network = MLP(activations=[Rectifier()],
dims=[nlat, nhid],
weights_init=IsotropicGaussian(std=0.001),
biases_init=Constant(0))
decoding_network.initialize()
decoding_parameter_mapping = Linear(
input_dim=nhid, output_dim=nvis, name='mu_theta',
weights_init=IsotropicGaussian(std=0.001),
biases_init=Constant(0))
decoding_parameter_mapping.initialize()
# Encode / decode
x = tensor.matrix('features')
h_phi = encoding_network.apply(x)
mu_phi, log_sigma_phi = encoding_parameter_mapping.apply(h_phi)
epsilon = theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype)
epsilon.name = 'epsilon'
z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
z.name = 'z'
h_theta = decoding_network.apply(z)
mu_theta = decoding_parameter_mapping.apply(h_theta)
# Compute cost
kl_term = (
prior_log_sigma - log_sigma_phi
+ 0.5 * (
tensor.exp(2 * log_sigma_phi) + (mu_phi - prior_mu) ** 2
) / tensor.exp(2 * prior_log_sigma)
- 0.5
).sum(axis=1)
kl_term.name = 'kl_term'
kl_term_mean = kl_term.mean()
kl_term_mean.name = 'avg_kl_term'
reconstruction_term = - (
x * tensor.nnet.softplus(-mu_theta)
+ (1 - x) * tensor.nnet.softplus(mu_theta)).sum(axis=1)
reconstruction_term.name = 'reconstruction_term'
reconstruction_term_mean = -reconstruction_term.mean()
reconstruction_term_mean.name = 'avg_reconstruction_term'
cost = -(reconstruction_term - kl_term).mean()
cost.name = 'nll_upper_bound'
# Datasets and data streams
mnist_train = MNIST(
'train', start=0, stop=50000, binary=True, sources=('features',))
train_loop_stream = DataStream(
dataset=mnist_train,
iteration_scheme=SequentialScheme(mnist_train.num_examples, 100))
train_monitor_stream = DataStream(
dataset=mnist_train,
iteration_scheme=SequentialScheme(mnist_train.num_examples, 500))
mnist_valid = MNIST(
'train', start=50000, stop=60000, binary=True, sources=('features',))
valid_monitor_stream = DataStream(
dataset=mnist_valid,
iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500))
mnist_test = MNIST('test', binary=True, sources=('features',))
test_monitor_stream = DataStream(
dataset=mnist_test,
iteration_scheme=SequentialScheme(mnist_test.num_examples, 500))
# Get parameters
computation_graph = ComputationGraph([cost])
params = VariableFilter(roles=[PARAMETER])(computation_graph.variables)
# Training loop
step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95)
algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule)
monitored_quantities = [cost, reconstruction_term_mean, kl_term_mean]
main_loop = MainLoop(
model=None, data_stream=train_loop_stream, algorithm=algorithm,
extensions=[
Timing(),
FinishAfter(after_n_epochs=200),
DataStreamMonitoring(
monitored_quantities, train_monitor_stream, prefix="train"),
#.........这里部分代码省略.........
示例7: build_model_soft
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
#.........这里部分代码省略.........
# dim = layers * state_dim
output_layer = Linear(
input_dim=layers * state_dim,
output_dim=vocab_size, name="output_layer")
# Return list of 3D Tensor, one for each layer
# (Time X Batch X embedding_dim)
pre_rnn = fork.apply(x)
# Give a name to the input of each layer
if skip_connections:
for t in range(len(pre_rnn)):
pre_rnn[t].name = "pre_rnn_" + str(t)
else:
pre_rnn.name = "pre_rnn"
# Prepare inputs for the RNN
kwargs = OrderedDict()
init_states = {}
for d in range(layers):
if d > 0:
suffix = '_' + str(d)
else:
suffix = ''
if skip_connections:
kwargs['inputs' + suffix] = pre_rnn[d]
elif d == 0:
kwargs['inputs' + suffix] = pre_rnn
init_states[d] = theano.shared(
numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
name='state0_%d' % d)
kwargs['states' + suffix] = init_states[d]
# Apply the RNN to the inputs
h = rnn.apply(low_memory=True, **kwargs)
# Now we have:
# h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]
# Extract gate_values
gate_values = h[2::2]
new_h = [h[0]]
new_h.extend(h[1::2])
h = new_h
# Now we have:
# h = [state, state_1, state_2, ...]
# gate_values = [gate_value_1, gate_value_2, gate_value_3]
for i, gate_value in enumerate(gate_values):
gate_value.name = "gate_value_" + str(i)
# Save all the last states
last_states = {}
for d in range(layers):
last_states[d] = h[d][-1, :, :]
# Concatenate all the states
if layers > 1:
h = tensor.concatenate(h, axis=2)
h.name = "hidden_state"
# The updates of the hidden states
updates = []
for d in range(layers):
updates.append((init_states[d], last_states[d]))
presoft = output_layer.apply(h[context:, :, :])
# Define the cost
# Compute the probability distribution
time, batch, feat = presoft.shape
presoft.name = 'presoft'
cross_entropy = Softmax().categorical_cross_entropy(
y[context:, :].flatten(),
presoft.reshape((batch * time, feat)))
cross_entropy = cross_entropy / tensor.log(2)
cross_entropy.name = "cross_entropy"
# TODO: add regularisation for the cost
# the log(1) is here in order to differentiate the two variables
# for monitoring
cost = cross_entropy + tensor.log(1)
cost.name = "regularized_cost"
# Initialize the model
logger.info('Initializing...')
fork.initialize()
rnn.weights_init = initialization.Orthogonal()
rnn.biases_init = initialization.Constant(0)
rnn.initialize()
output_layer.weights_init = initialization.IsotropicGaussian(0.1)
output_layer.biases_init = initialization.Constant(0)
output_layer.initialize()
return cost, cross_entropy, updates, gate_values
示例8: main
# 需要导入模块: from blocks.bricks.parallel import Fork [as 别名]
# 或者: from blocks.bricks.parallel.Fork import initialize [as 别名]
def main():
x = T.tensor3('features')
m = T.matrix('features_mask')
y = T.imatrix('targets')
x = m.mean() + x #stupid mask not always needed...
#embedding_size = 300
#glove_version = "glove.6B.300d.txt"
embedding_size = 50
glove_version = "vectors.6B.50d.txt"
wstd = 0.02
conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size,
weights_init=IsotropicGaussian(std=wstd),
biases_init=Constant(0.0))
conv1.initialize()
o = conv1.apply(x)
o = Rectifier(name="conv1red").apply(o)
o = MaxPooling1D(pooling_length=5
#, step=2
).apply(o)
conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
weights_init=IsotropicGaussian(std=wstd),
biases_init=Constant(0.0),
step=3,
name="conv2")
conv2.initialize()
o = conv2.apply(o)
o = Rectifier(name="conv2rec").apply(o)
conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
weights_init=IsotropicGaussian(std=wstd),
biases_init=Constant(0.0),
step=3,
name="conv3")
conv2.initialize()
o = conv2.apply(o)
o = Rectifier(name="conv3rec").apply(o)
fork = Fork(weights_init=IsotropicGaussian(0.02),
biases_init=Constant(0.),
input_dim=128,
output_dims=[128]*3,
output_names=['inputs', 'reset_inputs', 'update_inputs']
)
fork.initialize()
inputs, reset_inputs, update_inputs = fork.apply(o)
out = o.mean(axis=1)
#gru = GatedRecurrent(dim=128,
#weights_init=IsotropicGaussian(0.02),
#biases_init=IsotropicGaussian(0.0))
#gru.initialize()
#states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs)
#out = states[:, -1, :]
hidden = Linear(
input_dim = 128,
output_dim = 128,
weights_init = Uniform(std=0.01),
biases_init = Constant(0.))
hidden.initialize()
o = hidden.apply(out)
o = Rectifier().apply(o)
#hidden = Linear(
#input_dim = 128,
#output_dim = 128,
#weights_init = IsotropicGaussian(std=0.02),
#biases_init = Constant(0.),
#name="hiddenmap2")
#hidden.initialize()
#o = hidden.apply(o)
#o = Rectifier(name="rec2").apply(o)
score_layer = Linear(
input_dim = 128,
output_dim = 1,
weights_init = IsotropicGaussian(std=wstd),
biases_init = Constant(0.),
name="linear2")
score_layer.initialize()
o = score_layer.apply(o)
probs = Sigmoid().apply(o)
cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
cost.name = 'cost'
misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
misclassification.name = 'misclassification'
#print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval(
#.........这里部分代码省略.........