本文整理汇总了Python中blocks.bricks.sequence_generators.SequenceGenerator.generate方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceGenerator.generate方法的具体用法?Python SequenceGenerator.generate怎么用?Python SequenceGenerator.generate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.sequence_generators.SequenceGenerator
的用法示例。
在下文中一共展示了SequenceGenerator.generate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def test_sequence_generator():
# Disclaimer: here we only check shapes, not values.
output_dim = 1
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=output_dim, source_names=["states"],
emitter=TestEmitter(name="emitter"), name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.initialize()
y = tensor.tensor3('y')
mask = tensor.matrix('mask')
costs = generator.cost(y, mask)
assert costs.ndim == 2
costs_val = theano.function([y, mask], [costs])(
numpy.zeros((n_steps, batch_size, output_dim), dtype=floatX),
numpy.ones((n_steps, batch_size), dtype=floatX))[0]
assert costs_val.shape == (n_steps, batch_size)
states, outputs, costs = [variable.eval() for variable in
generator.generate(
iterate=True, batch_size=batch_size,
n_steps=n_steps)]
assert states.shape == (n_steps, batch_size, dim)
assert outputs.shape == (n_steps, batch_size, output_dim)
assert costs.shape == (n_steps, batch_size)
示例2: test_integer_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def test_integer_sequence_generator():
# Disclaimer: here we only check shapes, not values.
readout_dim = 5
feedback_dim = 3
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=readout_dim, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(readout_dim, feedback_dim),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.initialize()
y = tensor.lmatrix('y')
mask = tensor.matrix('mask')
costs = generator.cost(y, mask)
assert costs.ndim == 2
costs_val = theano.function([y, mask], [costs])(
numpy.zeros((n_steps, batch_size), dtype='int64'),
numpy.ones((n_steps, batch_size), dtype=floatX))[0]
assert costs_val.shape == (n_steps, batch_size)
states, outputs, costs = generator.generate(
iterate=True, batch_size=batch_size, n_steps=n_steps)
states_val, outputs_val, costs_val = theano.function(
[], [states, outputs, costs],
updates=costs.owner.inputs[0].owner.tag.updates)()
assert states_val.shape == (n_steps, batch_size, dim)
assert outputs_val.shape == (n_steps, batch_size)
assert outputs_val.dtype == 'int64'
assert costs_val.shape == (n_steps, batch_size)
示例3: PyramidLayer
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
#.........这里部分代码省略.........
[hidden_size_mlp_theta]*depth_theta
activations_context = [Rectifier()]*depth_context
dims_context = [frame_size] + [hidden_size_mlp_context]*(depth_context-1) + \
[context_size]
mlp_x = MLP(activations = activations_x,
dims = dims_x,
name = "mlp_x")
feedback = DeepTransitionFeedback(mlp = mlp_x)
transition = [GatedRecurrent(dim=hidden_size_recurrent,
use_bias = True,
name = "gru_{}".format(i) ) for i in range(depth_transition)]
transition = RecurrentStack( transition,
name="transition", skip_connections = True)
self.transition = transition
mlp_theta = MLP( activations = activations_theta,
dims = dims_theta,
name = "mlp_theta")
mlp_gmm = GMMMLP(mlp = mlp_theta,
dim = target_size,
k = k,
const = 0.00001,
name = "gmm_wrap")
gmm_emitter = GMMEmitter(gmmmlp = mlp_gmm,
output_size = frame_size, k = k)
source_names = [name for name in transition.apply.states if 'states' in name]
attention = SimpleSequenceAttention(
state_names = source_names,
state_dims = [hidden_size_recurrent],
attended_dim = context_size,
name = "attention")
#ipdb.set_trace()
# Verify source names
readout = Readout(
readout_dim = hidden_size_recurrent,
source_names =source_names + ['feedback'] + ['glimpses'],
emitter=gmm_emitter,
feedback_brick = feedback,
name="readout")
self.generator = SequenceGenerator(readout=readout,
transition=transition,
attention = attention,
name = "generator")
self.mlp_context = MLP(activations = activations_context,
dims = dims_context)
self.children = [self.generator, self.mlp_context]
self.final_states = []
def monitoring_vars(self, cg):
readout = self.generator.readout
readouts = VariableFilter( applications = [readout.readout],
name_regex = "output")(cg.variables)[0]
mu, sigma, coeff = readout.emitter.components(readouts)
min_sigma = sigma.min().copy(name="sigma_min")
mean_sigma = sigma.mean().copy(name="sigma_mean")
max_sigma = sigma.max().copy(name="sigma_max")
min_mu = mu.min().copy(name="mu_min")
mean_mu = mu.mean().copy(name="mu_mean")
max_mu = mu.max().copy(name="mu_max")
monitoring_vars = [mean_sigma, min_sigma,
min_mu, max_mu, mean_mu, max_sigma]
return monitoring_vars
@application
def cost(self, x, context, **kwargs):
cost_matrix = self.generator.cost_matrix(
x, attended=self.mlp_context.apply(context),
**kwargs)
return cost_matrix.mean()
@application
def generate(context):
return self.generator.generate(
attended = self.mlp_context.apply(context),
n_steps = context.shape[0],
batch_size = context.shape[1],
iterate = True)
示例4: function
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
if '/generator/readout/emitter/mlp/' in k:
v = parameters2.pop(k)
parameters2[k.replace('/generator/readout/emitter/mlp/',
'/generator/readout/emitter/gmm_emitter/gmmmlp/mlp/') ] = v
model.set_parameter_values(parameters2)
# import ipdb
# ipdb.set_trace()
#print function([f0, sp, voiced], cost_matrix, updates = extra_updates)(x_tr[0],x_tr[1],x_tr[2])
#generator.generate(n_steps=steps, batch_size=n_samples, iterate=True, **states)
#states = {}
sample = ComputationGraph(generator.generate(n_steps=steps,
batch_size=n_samples, iterate=True))
sample_fn = sample.get_theano_function()
outputs_bp = sample_fn()[-2]
for this_sample in range(n_samples):
print "Iteration: ", this_sample
outputs = outputs_bp
sampled_f0 = outputs[:,:,-2]
sampled_voiced = outputs[:,:,-1]
print sampled_voiced.mean()
print sampled_f0.max(), sampled_f0.min()
outputs = outputs[:,:,:-2]
示例5: Decoder
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
class Decoder(Initializable):
def __init__(self, vocab_size, embedding_dim, state_dim,
representation_dim, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.state_dim = state_dim
self.representation_dim = representation_dim
self.transition = GRUInitialState(
attended_dim=state_dim, dim=state_dim,
activation=Tanh(), name='decoder')
self.attention = SequenceContentAttention(
state_names=self.transition.apply.states,
attended_dim=representation_dim,
match_dim=state_dim, name="attention")
readout = Readout(
source_names=['states', 'feedback', self.attention.take_glimpses.outputs[0]],
readout_dim=self.vocab_size,
emitter=SoftmaxEmitter(initial_output=-1),
feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim),
post_merge=InitializableFeedforwardSequence(
[Bias(dim=state_dim, name='maxout_bias').apply,
Maxout(num_pieces=2, name='maxout').apply,
Linear(input_dim=state_dim / 2, output_dim=embedding_dim,
use_bias=False, name='softmax0').apply,
Linear(input_dim=embedding_dim, name='softmax1').apply]),
merged_dim=state_dim)
self.sequence_generator = SequenceGenerator(
readout=readout,
transition=self.transition,
attention=self.attention,
fork=Fork([name for name in self.transition.apply.sequences
if name != 'mask'], prototype=Linear())
)
self.children = [self.sequence_generator]
@application(inputs=['representation', 'source_sentence_mask',
'target_sentence_mask', 'target_sentence'],
outputs=['cost'])
def cost(self, representation, source_sentence_mask,
target_sentence, target_sentence_mask):
source_sentence_mask = source_sentence_mask.T
target_sentence = target_sentence.T
target_sentence_mask = target_sentence_mask.T
# Get the cost matrix
cost = self.sequence_generator.cost_matrix(
**{'mask': target_sentence_mask,
'outputs': target_sentence,
'attended': representation,
'attended_mask': source_sentence_mask}
)
return (cost * target_sentence_mask).sum() / target_sentence_mask.shape[1]
@application
def generate(self, source_sentence, representation):
return self.sequence_generator.generate(
n_steps=2 * source_sentence.shape[1],
batch_size=source_sentence.shape[0],
attended=representation,
attended_mask=tensor.ones(source_sentence.shape).T)
示例6: ComputationGraph
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
cg = ComputationGraph(cost)
model = Model(cost)
transition_matrix = VariableFilter(
theano_name_regex = "state_to_state")(cg.parameters)
for matr in transition_matrix:
matr.set_value(0.98*np.eye(hidden_size_recurrent, dtype = floatX))
readouts = VariableFilter( applications = [generator.readout.readout],
name_regex = "output")(cg.variables)[0]
mean, sigma, corr, weight, penup = emitter.components(readouts)
emit = generator.generate(
n_steps = 400,
batch_size = 8,
iterate = True
)[-2]
#ipdb.set_trace()
function([x, x_mask], cost)(x_tr[0],x_tr[1])
emit_fn = ComputationGraph(emit).get_theano_function()
emit_fn()
min_sigma = sigma.min(axis=(0,2)).copy(name="sigma_min")
mean_sigma = sigma.mean(axis=(0,2)).copy(name="sigma_mean")
max_sigma = sigma.max(axis=(0,2)).copy(name="sigma_max")
min_mean = mean.min(axis=(0,2)).copy(name="mu_min")
mean_mean = mean.mean(axis=(0,2)).copy(name="mu_mean")
示例7: ComputationGraph
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
generator.transition.push_initialization_config()
generator.initialize()
##############
# Test model
##############
cost_matrix = generator.cost_matrix(x,
attended = mlp_context.apply(context))
cost = cost_matrix.mean()
cost.name = "nll"
emit = generator.generate(
attended = mlp_context.apply(context),
n_steps = context.shape[0],
batch_size = context.shape[1],
iterate = True
)[-4]
cg = ComputationGraph(cost)
model = Model(cost)
#################
# Algorithm
#################
n_batches = 139#139*16
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Adam(lr)]))
示例8: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of language modeling with RNN",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"prefix", default="sine",
help="The prefix for model, timing and state files")
parser.add_argument(
"state", nargs="?", default="",
help="Changes to Groundhog state")
parser.add_argument("--path", help="Path to a language dataset")
parser.add_argument("--dict", help="Path to the dataset dictionary")
parser.add_argument("--restart", help="Start anew")
parser.add_argument(
"--reset", action="store_true", default=False,
help="Reset the hidden state between batches")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot for the 'sample' mode"
" OR training sequence length for the 'train' mode.")
args = parser.parse_args()
logger.debug("Args:\n" + str(args))
dim = 200
num_chars = 50
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=num_chars, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_chars, dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.allocate()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
if args.mode == "train":
batch_size = 1
seq_len = args.steps
generator.initialize()
# Build cost computation graph that uses the saved hidden states.
# An issue: for Groundhog this is completely transparent, that's
# why it does not carry the hidden state over the period when
# validation in done. We should find a way to fix in the future.
x = tensor.lmatrix('x')
init_states = shared_floatx_zeros((batch_size, dim),
name='init_states')
reset = tensor.scalar('reset')
cost = ComputationGraph(
generator.cost(x, states=init_states * reset).sum())
# TODO: better search routine
states = [v for v in cost.variables
if hasattr(v.tag, 'application_call')
and v.tag.application_call.brick == generator.transition
and (v.tag.application_call.application ==
generator.transition.apply)
and v.tag.role == VariableRole.OUTPUT
and v.tag.name == 'states']
assert len(states) == 1
states = states[0]
gh_model = GroundhogModel(generator, cost)
gh_model.properties.append(
('bpc', cost.outputs[0] * numpy.log(2) / seq_len))
gh_model.properties.append(('mean_init_state', init_states.mean()))
gh_model.properties.append(('reset', reset))
if not args.reset:
gh_model.updates.append((init_states, states[-1]))
state = GroundhogState(args.prefix, batch_size,
learning_rate=0.0001).as_dict()
changes = eval("dict({})".format(args.state))
state.update(changes)
def output_format(x, y, reset):
return dict(x=x[:, None], reset=reset)
train, valid, test = [
LMIterator(batch_size=batch_size,
use_infinite_loop=mode == 'train',
path=args.path,
#.........这里部分代码省略.........
示例9: ComputationGraph
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
cost = cost_matrix.sum(axis=0).mean()
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
transition_matrix = VariableFilter(theano_name_regex="state_to_state")(cg.parameters)
for matr in transition_matrix:
matr.set_value(0.98 * np.eye(hidden_size_recurrent, dtype=floatX))
readouts = VariableFilter(applications=[generator.readout.readout], name_regex="output")(cg.variables)[0]
mean, sigma, corr, weight, penup = emitter.components(readouts)
emit = generator.generate(
n_steps=400, iterate=True, attended=embed, attended_mask=context_mask, batch_size=embed.shape[1]
)[-4]
function([x, x_mask, context, context_mask], cost)(x_tr[0], x_tr[1], x_tr[2], x_tr[3])
emit_fn = ComputationGraph(emit).get_theano_function()
emit_fn(x_tr[3], x_tr[2])[0].shape
min_sigma = sigma.min(axis=(0, 2)).copy(name="sigma_min")
mean_sigma = sigma.mean(axis=(0, 2)).copy(name="sigma_mean")
max_sigma = sigma.max(axis=(0, 2)).copy(name="sigma_max")
min_mean = mean.min(axis=(0, 2)).copy(name="mu_min")
mean_mean = mean.mean(axis=(0, 2)).copy(name="mu_mean")
max_mean = mean.max(axis=(0, 2)).copy(name="mu_max")
min_corr = corr.min().copy(name="corr_min")
示例10: test_sequence_generator_with_lm
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def test_sequence_generator_with_lm():
floatX = theano.config.floatX
rng = numpy.random.RandomState(1234)
readout_dim = 5
feedback_dim = 3
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(dim=dim, activation=Tanh(),
weights_init=Orthogonal())
language_model = SequenceGenerator(
Readout(readout_dim=readout_dim, source_names=["states"],
emitter=SoftmaxEmitter(theano_seed=1234),
feedback_brick=LookupFeedback(readout_dim, dim,
name='feedback')),
SimpleRecurrent(dim, Tanh()),
name='language_model')
generator = SequenceGenerator(
Readout(readout_dim=readout_dim, source_names=["states", "lm_states"],
emitter=SoftmaxEmitter(theano_seed=1234),
feedback_brick=LookupFeedback(readout_dim,
feedback_dim)),
transition,
language_model=language_model,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0),
seed=1234)
generator.initialize()
# Test 'cost_matrix' method
y = tensor.lmatrix('y')
y.tag.test_value = numpy.zeros((15, batch_size), dtype='int64')
mask = tensor.matrix('mask')
mask.tag.test_value = numpy.ones((15, batch_size))
costs = generator.cost_matrix(y, mask)
assert costs.ndim == 2
costs_fun = theano.function([y, mask], [costs])
y_test = rng.randint(readout_dim, size=(n_steps, batch_size))
m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
costs_val = costs_fun(y_test, m_test)[0]
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(costs_val.sum(), 483.153, rtol=1e-5)
# Test 'cost' method
cost = generator.cost(y, mask)
assert cost.ndim == 0
cost_val = theano.function([y, mask], cost)(y_test, m_test)
assert_allclose(cost_val, 16.105, rtol=1e-5)
# Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
cg = ComputationGraph([cost])
var_filter = VariableFilter(roles=[AUXILIARY])
aux_var_name = '_'.join([generator.name, generator.cost.name,
'per_sequence_element'])
cost_per_el = [el for el in var_filter(cg.variables)
if el.name == aux_var_name][0]
assert cost_per_el.ndim == 0
cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
assert_allclose(cost_per_el_val, 1.61051, rtol=1e-5)
# Test generate
states, outputs, lm_states, costs = generator.generate(
iterate=True, batch_size=batch_size, n_steps=n_steps)
cg = ComputationGraph([states, outputs, costs])
states_val, outputs_val, costs_val = theano.function(
[], [states, outputs, costs],
updates=cg.updates)()
assert states_val.shape == (n_steps, batch_size, dim)
assert outputs_val.shape == (n_steps, batch_size)
assert outputs_val.dtype == 'int64'
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(states_val.sum(), -4.88367, rtol=1e-5)
assert_allclose(costs_val.sum(), 486.681, rtol=1e-5)
assert outputs_val.sum() == 627
# Test masks agnostic results of cost
cost1 = costs_fun([[1], [2]], [[1], [1]])[0]
cost2 = costs_fun([[3, 1], [4, 2], [2, 0]],
[[1, 1], [1, 1], [1, 0]])[0]
assert_allclose(cost1.sum(), cost2[:, 1].sum(), rtol=1e-5)
示例11: test_attention_transition
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def test_attention_transition():
inp_dim = 2
inp_len = 10
attended_dim = 3
attended_len = 11
batch_size = 4
n_steps = 30
transition = TestTransition(dim=inp_dim, attended_dim=attended_dim,
name="transition")
attention = SequenceContentAttention(transition.apply.states,
match_dim=inp_dim, name="attention")
mixer = Mixer([name for name in transition.apply.sequences
if name != 'mask'],
attention.take_look.outputs[0],
name="mixer")
att_trans = AttentionTransition(transition, attention, mixer,
name="att_trans")
att_trans.weights_init = IsotropicGaussian(0.01)
att_trans.biases_init = Constant(0)
att_trans.initialize()
attended = tensor.tensor3("attended")
attended_mask = tensor.matrix("attended_mask")
inputs = tensor.tensor3("inputs")
inputs_mask = tensor.matrix("inputs_mask")
states, glimpses, weights = att_trans.apply(
input_=inputs, mask=inputs_mask,
attended=attended, attended_mask=attended_mask)
assert states.ndim == 3
assert glimpses.ndim == 3
assert weights.ndim == 3
input_vals = numpy.zeros((inp_len, batch_size, inp_dim),
dtype=floatX)
input_mask_vals = numpy.ones((inp_len, batch_size),
dtype=floatX)
attended_vals = numpy.zeros((attended_len, batch_size, attended_dim),
dtype=floatX)
attended_mask_vals = numpy.ones((attended_len, batch_size),
dtype=floatX)
func = theano.function([inputs, inputs_mask, attended, attended_mask],
[states, glimpses, weights])
states_vals, glimpses_vals, weight_vals = func(
input_vals, input_mask_vals,
attended_vals, attended_mask_vals)
assert states_vals.shape == input_vals.shape
assert glimpses_vals.shape == (inp_len, batch_size, attended_dim)
assert weight_vals.shape == (inp_len, batch_size, attended_len)
# Test SequenceGenerator using AttentionTransition
generator = SequenceGenerator(
LinearReadout(readout_dim=inp_dim, source_names=["state"],
emitter=TestEmitter(name="emitter"),
name="readout"),
transition=transition,
attention=attention,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
outputs = tensor.tensor3('outputs')
costs = generator.cost(outputs, attended=attended,
attended_mask=attended_mask)
costs_vals = costs.eval({outputs: input_vals,
attended: attended_vals,
attended_mask: attended_mask_vals})
assert costs_vals.shape == (inp_len, batch_size)
results = (
generator.generate(n_steps=n_steps, batch_size=attended.shape[1],
attended=attended, attended_mask=attended_mask))
assert len(results) == 5
states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = (
theano.function([attended, attended_mask], results)
(attended_vals, attended_mask_vals))
assert states_vals.shape == (n_steps, batch_size, inp_dim)
assert states_vals.shape == outputs_vals.shape
assert glimpses_vals.shape == (n_steps, batch_size, attended_dim)
assert weights_vals.shape == (n_steps, batch_size, attended_len)
assert costs_vals.shape == (n_steps, batch_size)
示例12: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
#.........这里部分代码省略.........
Linear(input_dim=config['hidden_tgt'],
output_dim=config['embed_tgt'],
use_bias=False,
name='softmax0').apply,
Linear(input_dim=config['embed_tgt'],
name='softmax1').apply]),
merged_dim=config['hidden_tgt'])
decoder = SequenceGenerator(
readout=readout,
transition=transition,
attention=attention,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name="generator",
fork=Fork(
[name for name in transition.apply.sequences if name != 'mask'],
prototype=Linear()),
add_contexts=True)
decoder.transition.weights_init = Orthogonal()
#printchildren(encoder, 1)
# Initialize model
logger.info('Initializing model')
embedder.initialize()
transformer.initialize()
encoder.initialize()
decoder.initialize()
# Apply model
embedded = embedder.apply(source_sentence)
tansformed = transformer.apply(embedded)
encoded = encoder.apply(tansformed)[0]
generated = decoder.generate(
n_steps=2*source_sentence.shape[1],
batch_size=source_sentence.shape[0],
attended = encoded.dimshuffle(1,0,2),
attended_mask=tensor.ones(source_sentence.shape).T
)
print 'Generated: ', generated
# generator_generate_outputs
#samples = generated[1] # For GRU
samples = generated[2] # For LSTM
samples.name = 'samples'
#samples_cost = generated[4] # For GRU
samples_cost = generated[5] # For LSTM
samples_cost = 'sampling_cost'
cost = decoder.cost(
mask = target_sentence_mask.T,
outputs = target_sentence.T,
attended = encoded.dimshuffle(1,0,2),
attended_mask = source_sentence_mask.T)
cost.name = 'target_cost'
cost.tag.aggregation_scheme = TakeLast(cost)
model = Model(cost)
logger.info('Creating computational graph')
cg = ComputationGraph(cost)
# apply dropout for regularization
if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog
logger.info('Applying dropout')
dropout_inputs = [x for x in cg.intermediary_variables if x.name == 'maxout_apply_output']
cg = apply_dropout(cg, dropout_inputs, config['dropout'])
########
示例13: Identity
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
post_merge = Identity(),
merged_dim = dimension,
name="readout")
generator = SequenceGenerator(
readout=readout,
transition=transition,
fork = Fork(['inputs'], prototype=Identity()),
weights_init = initialization.Identity(1.),
biases_init = initialization.Constant(0.),
name="generator")
generator.push_initialization_config()
generator.transition.transition.weights_init = initialization.Identity(2.)
generator.initialize()
results = generator.generate(n_steps=n_steps,
batch_size=1, iterate=True,
return_initial_states = True)
results_cg = ComputationGraph(results)
results_tf = results_cg.get_theano_function()
generated_sequence_t = results_tf()[1]
generated_sequence_t.shape=(n_steps+1, dimension)
print generated_sequence_t
print generated_sequence
示例14: NoLookupDecoder
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
#.........这里部分代码省略.........
attention_sources (string): Defines the sources used by the
attention model 's' for decoder
states, 'f' for feedback
readout_sources (string): Defines the sources used in the
readout network. 's' for decoder
states, 'f' for feedback, 'a' for
attention (context vector)
memory (string): Which external memory should be used
(cf. ``_initialize_attention``)
memory_size (int): Size of the external memory structure
seq_len (int): Maximum sentence length
init_strategy (string): How to initialize the RNN state
(cf. ``GRUInitialState``)
theano_seed: Random seed
"""
super(NoLookupDecoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.state_dim = state_dim
self.representation_dim = representation_dim
self.theano_seed = theano_seed
# Initialize gru with special initial state
self.transition = GRUInitialState(
attended_dim=state_dim,
init_strategy=init_strategy,
dim=state_dim,
activation=Tanh(),
name='decoder')
# Initialize the attention mechanism
att_dim = att_dim if att_dim > 0 else state_dim
self.attention,src_names = _initialize_attention(attention_strategy,
seq_len,
self.transition,
representation_dim,
att_dim,
attention_sources,
readout_sources,
memory,
memory_size)
# Initialize the readout, note that SoftmaxEmitter emits -1 for
# initial outputs which is used by LookupFeedBackWMT15
maxout_dim = maxout_dim if maxout_dim > 0 else state_dim
readout = Readout(
source_names=src_names,
readout_dim=embedding_dim,
emitter=NoLookupEmitter(initial_output=-1,
readout_dim=embedding_dim,
cost_brick=SquaredError()),
# cost_brick=CategoricalCrossEntropy()),
feedback_brick=TrivialFeedback(output_dim=embedding_dim),
post_merge=InitializableFeedforwardSequence(
[Bias(dim=maxout_dim, name='maxout_bias').apply,
Maxout(num_pieces=2, name='maxout').apply,
Linear(input_dim=maxout_dim / 2, output_dim=embedding_dim,
use_bias=False, name='softmax0').apply,
Logistic(name='softmax1').apply]),
merged_dim=maxout_dim)
# Build sequence generator accordingly
self.sequence_generator = SequenceGenerator(
readout=readout,
transition=self.transition,
attention=self.attention,
fork=Fork([name for name in self.transition.apply.sequences
if name != 'mask'], prototype=Linear())
)
self.children = [self.sequence_generator]
@application(inputs=['representation', 'representation_mask',
'target_sentence_mask', 'target_sentence'],
outputs=['cost'])
def cost(self, representation, representation_mask,
target_sentence, target_sentence_mask):
target_sentence = target_sentence.T
target_sentence_mask = target_sentence_mask.T
# Get the cost matrix
cost = self.sequence_generator.cost_matrix(**{
'mask': target_sentence_mask,
'outputs': target_sentence,
'attended': representation,
'attended_mask': representation_mask}
)
return (cost * target_sentence_mask).sum() / \
target_sentence_mask.shape[1]
@application
def generate(self, source_shape, representation, **kwargs):
return self.sequence_generator.generate(
n_steps=2 * source_shape[1],
batch_size=source_shape[0],
attended=representation,
attended_mask=tensor.ones(source_shape).T,
**kwargs)
示例15: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import generate [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of generating a Markov chain with RNN.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"save_path", default="sine",
help="The part to save PyLearn2 model")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot")
parser.add_argument(
"--reset", action="store_true", default=False,
help="Start training from scratch")
args = parser.parse_args()
num_states = ChainDataset.num_states
if args.mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", activation=Tanh(),
dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.debug("Markov chain entropy: {}".format(
ChainDataset.entropy))
logger.debug("Expected min error: {}".format(
-ChainDataset.entropy * seq_len * batch_size))
if os.path.isfile(args.save_path) and not args.reset:
model = Pylearn2Model.load(args.save_path)
else:
model = Pylearn2Model(generator)
# Build the cost computation graph.
# Note: would be probably nicer to make cost part of the model.
x = tensor.ltensor3('x')
cost = Pylearn2Cost(model.brick.cost(x[:, :, 0]).sum())
dataset = ChainDataset(rng, seq_len)
sgd = SGD(learning_rate=0.0001, cost=cost,
batch_size=batch_size, batches_per_iter=10,
monitoring_dataset=dataset,
monitoring_batch_size=batch_size,
monitoring_batches=1,
learning_rule=Pylearn2LearningRule(
SGDLearningRule(),
dict(training_objective=cost.cost)))
train = Pylearn2Train(dataset, model, algorithm=sgd,
save_path=args.save_path, save_freq=10)
train.main_loop()
elif args.mode == "sample":
model = Pylearn2Model.load(args.save_path)
generator = model.brick
sample = ComputationGraph(generator.generate(
n_steps=args.steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
#.........这里部分代码省略.........