本文整理汇总了Python中blocks.bricks.sequence_generators.SequenceGenerator.cost_matrix方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceGenerator.cost_matrix方法的具体用法?Python SequenceGenerator.cost_matrix怎么用?Python SequenceGenerator.cost_matrix使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.sequence_generators.SequenceGenerator
的用法示例。
在下文中一共展示了SequenceGenerator.cost_matrix方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_model
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def build_model(alphabet_size, config):
layers = config['lstm_layers']
dimensions = [config['lstm_dim_' + str(i)] for i in range(layers)]
uniform_width = config['lstm_init_width']
stack = []
for dim in dimensions:
stack.append(LSTM(dim=dim, use_bias=True,
weights_init = Uniform(width=uniform_width),
forget_init=Constant(1.)))
recurrent_stack = RecurrentStack(stack, name='transition')
readout = Readout(readout_dim=alphabet_size,
source_names=['states#' + str(layers - 1)],
emitter=SoftmaxEmitter(name='emitter'),
feedback_brick=LookupFeedback(alphabet_size,
feedback_dim=alphabet_size,
name='feedback'),
name='readout')
generator = SequenceGenerator(readout=readout,
transition=recurrent_stack,
weights_init=Uniform(width=uniform_width),
biases_init=Constant(0),
name='generator')
generator.push_initialization_config()
generator.initialize()
x = tensor.lmatrix('features')
mask = tensor.fmatrix('features_mask')
cost_matrix = generator.cost_matrix(x, mask=mask)
log2e = math.log(math.e, 2)
if 'batch_length' in config:
length = config['batch_length'] - config['batch_overlap']
cost = log2e * aggregation.mean(cost_matrix[:,-length:].sum(),
mask[:,-length:].sum())
else:
cost = log2e * aggregation.mean(cost_matrix[:,:].sum(),
mask[:,:].sum())
cost.name = 'bits_per_character'
return generator, cost
示例2: Decoder
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
class Decoder(Initializable):
def __init__(self, vocab_size, embedding_dim,
state_dim, theano_seed=None, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.theano_seed = theano_seed
self.transition = GatedRecurrent(dim=state_dim,
activation=Tanh(), name='decoder')
readout = Readout(
source_names=['states'],
readout_dim=self.vocab_size,
merged_dim=state_dim)
self.sequence_generator = SequenceGenerator(
readout=readout,
transition=self.transition,
fork=Fork([name for name in self.transition.apply.sequences
if name != 'mask'], prototype=Linear()))
self.children = [self.sequence_generator]
@application(inputs=['representation', 'source_sentence_mask',
'target_sentence_mask', 'target_sentence'],
outputs=['cost'])
def cost(self, representation, source_sentence_mask,
target_sentence, target_sentence_mask):
source_sentence_mask = source_sentence_mask.T
target_sentence = target_sentence.T
target_sentence_mask = target_sentence_mask.T
cost = self.sequence_generator.cost_matrix(**{
'mask': target_sentence_mask,
'outputs': target_sentence})
示例3: test_with_attention
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def test_with_attention():
"""Test a sequence generator with continuous outputs and attention."""
rng = numpy.random.RandomState(1234)
inp_dim = 2
inp_len = 10
attended_dim = 3
attended_len = 11
batch_size = 4
n_steps = 30
# For values
def rand(size):
return rng.uniform(size=size).astype(floatX)
# For masks
def generate_mask(length, batch_size):
mask = numpy.ones((length, batch_size), dtype=floatX)
# To make it look like read data
for i in range(batch_size):
mask[1 + rng.randint(0, length - 1):, i] = 0.0
return mask
output_vals = rand((inp_len, batch_size, inp_dim))
output_mask_vals = generate_mask(inp_len, batch_size)
attended_vals = rand((attended_len, batch_size, attended_dim))
attended_mask_vals = generate_mask(attended_len, batch_size)
transition = TestTransition(
dim=inp_dim, attended_dim=attended_dim, activation=Identity())
attention = SequenceContentAttention(
state_names=transition.apply.states, match_dim=inp_dim)
generator = SequenceGenerator(
Readout(
readout_dim=inp_dim,
source_names=[transition.apply.states[0],
attention.take_glimpses.outputs[0]],
emitter=TestEmitter()),
transition=transition,
attention=attention,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0),
add_contexts=False, seed=1234)
generator.initialize()
# Test 'cost_matrix' method
attended = tensor.tensor3("attended")
attended_mask = tensor.matrix("attended_mask")
outputs = tensor.tensor3('outputs')
mask = tensor.matrix('mask')
costs = generator.cost_matrix(outputs, mask,
attended=attended,
attended_mask=attended_mask)
costs_vals = costs.eval({outputs: output_vals,
mask: output_mask_vals,
attended: attended_vals,
attended_mask: attended_mask_vals})
assert costs_vals.shape == (inp_len, batch_size)
assert_allclose(costs_vals.sum(), 13.5042, rtol=1e-5)
# Test `generate` method
results = (
generator.generate(n_steps=n_steps, batch_size=attended.shape[1],
attended=attended, attended_mask=attended_mask))
assert len(results) == 5
states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = (
theano.function([attended, attended_mask], results)
(attended_vals, attended_mask_vals))
assert states_vals.shape == (n_steps, batch_size, inp_dim)
assert states_vals.shape == outputs_vals.shape
assert glimpses_vals.shape == (n_steps, batch_size, attended_dim)
assert weights_vals.shape == (n_steps, batch_size, attended_len)
assert costs_vals.shape == (n_steps, batch_size)
assert_allclose(states_vals.sum(), 23.4172, rtol=1e-5)
# There is no generation cost in this case, since generation is
# deterministic
assert_allclose(costs_vals.sum(), 0.0, rtol=1e-5)
assert_allclose(weights_vals.sum(), 120.0, rtol=1e-5)
assert_allclose(glimpses_vals.sum(), 199.2402, rtol=1e-5)
assert_allclose(outputs_vals.sum(), -11.6008, rtol=1e-5)
示例4: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def main(mode, save_path, steps, num_batches):
num_states = MarkovChainDataset.num_states
if mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", dim=dim,
activation=Tanh())
generator = SequenceGenerator(
Readout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.info("Markov chain entropy: {}".format(
MarkovChainDataset.entropy))
logger.info("Expected min error: {}".format(
-MarkovChainDataset.entropy * seq_len))
# Build the cost computation graph.
x = tensor.lmatrix('data')
cost = aggregation.mean(generator.cost_matrix(x[:, :]).sum(),
x.shape[1])
cost.name = "sequence_log_likelihood"
algorithm = GradientDescent(
cost=cost, params=list(Selector(generator).get_params().values()),
step_rule=Scale(0.001))
main_loop = MainLoop(
algorithm=algorithm,
data_stream=DataStream(
MarkovChainDataset(rng, seq_len),
iteration_scheme=ConstantScheme(batch_size)),
model=Model(cost),
extensions=[FinishAfter(after_n_batches=num_batches),
TrainingDataMonitoring([cost], prefix="this_step",
after_batch=True),
TrainingDataMonitoring([cost], prefix="average",
every_n_batches=100),
Checkpoint(save_path, every_n_batches=500),
Printing(every_n_batches=100)])
main_loop.run()
elif mode == "sample":
main_loop = cPickle.load(open(save_path, "rb"))
generator = main_loop.model
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).get_theano_function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(theano.config.floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
MarkovChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states),
dtype=theano.config.floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, MarkovChainDataset.trans_prob))
else:
assert False
示例5: SequenceGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
name="readout")
generator = SequenceGenerator(readout=readout,
transition=transition,
name = "generator")
generator.weights_init = IsotropicGaussian(0.01)
generator.biases_init = Constant(0.)
generator.push_initialization_config()
generator.transition.biases_init = IsotropicGaussian(0.01,1)
generator.transition.push_initialization_config()
generator.initialize()
cost_matrix = generator.cost_matrix(x)
cost = cost_matrix.mean()
cost.name = "sequence_log_likelihood"
cg = ComputationGraph(cost)
model = Model(cost)
#################
# Algorithm
#################
n_batches = 500
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Adam(lr)]))
示例6: IsotropicGaussian
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
generator.weights_init = IsotropicGaussian(0.01)
generator.biases_init = Constant(0.)
generator.push_initialization_config()
generator.transition.biases_init = IsotropicGaussian(0.01,1)
generator.transition.push_initialization_config()
generator.initialize()
states = {}
states = generator.transition.apply.outputs
states = {name: shared_floatx_zeros((batch_size, hidden_size_recurrent))
for name in states}
cost_matrix = generator.cost_matrix(x, **states)
#cost_matrix = cost_matrix*voiced
from theano import function
cost = cost_matrix.mean() + 0.*start_flag
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
transition_matrix = VariableFilter(
theano_name_regex="state_to_state")(cg.parameters)
for matr in transition_matrix:
matr.set_value(0.98*numpy.eye(hidden_size_recurrent, dtype=floatX))
示例7: NoLookupDecoder
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
#.........这里部分代码省略.........
attention_sources (string): Defines the sources used by the
attention model 's' for decoder
states, 'f' for feedback
readout_sources (string): Defines the sources used in the
readout network. 's' for decoder
states, 'f' for feedback, 'a' for
attention (context vector)
memory (string): Which external memory should be used
(cf. ``_initialize_attention``)
memory_size (int): Size of the external memory structure
seq_len (int): Maximum sentence length
init_strategy (string): How to initialize the RNN state
(cf. ``GRUInitialState``)
theano_seed: Random seed
"""
super(NoLookupDecoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.state_dim = state_dim
self.representation_dim = representation_dim
self.theano_seed = theano_seed
# Initialize gru with special initial state
self.transition = GRUInitialState(
attended_dim=state_dim,
init_strategy=init_strategy,
dim=state_dim,
activation=Tanh(),
name='decoder')
# Initialize the attention mechanism
att_dim = att_dim if att_dim > 0 else state_dim
self.attention,src_names = _initialize_attention(attention_strategy,
seq_len,
self.transition,
representation_dim,
att_dim,
attention_sources,
readout_sources,
memory,
memory_size)
# Initialize the readout, note that SoftmaxEmitter emits -1 for
# initial outputs which is used by LookupFeedBackWMT15
maxout_dim = maxout_dim if maxout_dim > 0 else state_dim
readout = Readout(
source_names=src_names,
readout_dim=embedding_dim,
emitter=NoLookupEmitter(initial_output=-1,
readout_dim=embedding_dim,
cost_brick=SquaredError()),
# cost_brick=CategoricalCrossEntropy()),
feedback_brick=TrivialFeedback(output_dim=embedding_dim),
post_merge=InitializableFeedforwardSequence(
[Bias(dim=maxout_dim, name='maxout_bias').apply,
Maxout(num_pieces=2, name='maxout').apply,
Linear(input_dim=maxout_dim / 2, output_dim=embedding_dim,
use_bias=False, name='softmax0').apply,
Logistic(name='softmax1').apply]),
merged_dim=maxout_dim)
# Build sequence generator accordingly
self.sequence_generator = SequenceGenerator(
readout=readout,
transition=self.transition,
attention=self.attention,
fork=Fork([name for name in self.transition.apply.sequences
if name != 'mask'], prototype=Linear())
)
self.children = [self.sequence_generator]
@application(inputs=['representation', 'representation_mask',
'target_sentence_mask', 'target_sentence'],
outputs=['cost'])
def cost(self, representation, representation_mask,
target_sentence, target_sentence_mask):
target_sentence = target_sentence.T
target_sentence_mask = target_sentence_mask.T
# Get the cost matrix
cost = self.sequence_generator.cost_matrix(**{
'mask': target_sentence_mask,
'outputs': target_sentence,
'attended': representation,
'attended_mask': representation_mask}
)
return (cost * target_sentence_mask).sum() / \
target_sentence_mask.shape[1]
@application
def generate(self, source_shape, representation, **kwargs):
return self.sequence_generator.generate(
n_steps=2 * source_shape[1],
batch_size=source_shape[0],
attended=representation,
attended_mask=tensor.ones(source_shape).T,
**kwargs)
示例8: SequenceGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
generator = SequenceGenerator(readout=readout,
transition=transition,
attention = attention,
name = "generator")
generator.weights_init = IsotropicGaussian(0.01)
generator.biases_init = Constant(0.)
generator.initialize()
mlp_context.weights_init = IsotropicGaussian(0.01)
mlp_context.biases_init = Constant(0.)
mlp_context.initialize()
#ipdb.set_trace()
cost_matrix = generator.cost_matrix(x, x_mask,
attended = mlp_context.apply(context))
cost = cost_matrix.sum()/x_mask.sum()
cost.name = "sequence_log_likelihood"
cg = ComputationGraph(cost)
model = Model(cost)
#################
# Algorithm
#################
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Adam(lr)]))
train_monitor = TrainingDataMonitoring(
示例9: Decoder
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
class Decoder(Initializable):
def __init__(self, vocab_size, embedding_dim, state_dim,
representation_dim, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.state_dim = state_dim
self.representation_dim = representation_dim
self.transition = GRUInitialState(
attended_dim=state_dim, dim=state_dim,
activation=Tanh(), name='decoder')
self.attention = SequenceContentAttention(
state_names=self.transition.apply.states,
attended_dim=representation_dim,
match_dim=state_dim, name="attention")
readout = Readout(
source_names=['states', 'feedback', self.attention.take_glimpses.outputs[0]],
readout_dim=self.vocab_size,
emitter=SoftmaxEmitter(initial_output=-1),
feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim),
post_merge=InitializableFeedforwardSequence(
[Bias(dim=state_dim, name='maxout_bias').apply,
Maxout(num_pieces=2, name='maxout').apply,
Linear(input_dim=state_dim / 2, output_dim=embedding_dim,
use_bias=False, name='softmax0').apply,
Linear(input_dim=embedding_dim, name='softmax1').apply]),
merged_dim=state_dim)
self.sequence_generator = SequenceGenerator(
readout=readout,
transition=self.transition,
attention=self.attention,
fork=Fork([name for name in self.transition.apply.sequences
if name != 'mask'], prototype=Linear())
)
self.children = [self.sequence_generator]
@application(inputs=['representation', 'source_sentence_mask',
'target_sentence_mask', 'target_sentence'],
outputs=['cost'])
def cost(self, representation, source_sentence_mask,
target_sentence, target_sentence_mask):
source_sentence_mask = source_sentence_mask.T
target_sentence = target_sentence.T
target_sentence_mask = target_sentence_mask.T
# Get the cost matrix
cost = self.sequence_generator.cost_matrix(
**{'mask': target_sentence_mask,
'outputs': target_sentence,
'attended': representation,
'attended_mask': source_sentence_mask}
)
return (cost * target_sentence_mask).sum() / target_sentence_mask.shape[1]
@application
def generate(self, source_sentence, representation):
return self.sequence_generator.generate(
n_steps=2 * source_sentence.shape[1],
batch_size=source_sentence.shape[0],
attended=representation,
attended_mask=tensor.ones(source_sentence.shape).T)
示例10: SequenceGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
name="readout")
generator = SequenceGenerator(readout=readout,
transition=transition,
name = "generator")
generator.weights_init = IsotropicGaussian(0.01)
generator.biases_init = Constant(0.001)
generator.push_initialization_config()
#generator.transition.weights_init = initialization.Identity(0.98)
#generator.transition.biases_init = IsotropicGaussian(0.01,0.9)
generator.transition.push_initialization_config()
generator.initialize()
cost_matrix = generator.cost_matrix(x, x_mask)
cost = cost_matrix.sum(axis = 0).mean()
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
transition_matrix = VariableFilter(
theano_name_regex = "state_to_state")(cg.parameters)
for matr in transition_matrix:
matr.set_value(0.98*np.eye(hidden_size_recurrent, dtype = floatX))
readouts = VariableFilter( applications = [generator.readout.readout],
name_regex = "output")(cg.variables)[0]
mean, sigma, corr, weight, penup = emitter.components(readouts)
示例11: shared_floatx_zeros
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
generator.transition.push_initialization_config()
generator.initialize()
states = {}
states = generator.transition.apply.outputs
states = {name: shared_floatx_zeros((batch_size, hidden_size_recurrent))
for name in states}
x_tr=next(data_stream.get_epoch_iterator())
#ipdb.set_trace()
print function([f0,voiced], mlp_context.apply(context))(x_tr[0],x_tr[2]).shape
cost_matrix = generator.cost_matrix(x, attended = mlp_context.apply(context))# , **states)
print function([f0,x,voiced], cost_matrix)(x_tr[0],x_tr[1],x_tr[2]).shape
cost = cost_matrix.mean() + 0.*start_flag
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
transition_matrix = VariableFilter(
theano_name_regex="state_to_state")(cg.parameters)
for matr in transition_matrix:
matr.set_value(0.98*numpy.eye(hidden_size_recurrent, dtype=floatX))
from play.utils import regex_final_value
示例12: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def main(mode, save_path, steps, num_batches, load_params):
chars = (list(string.ascii_uppercase) + list(range(10)) +
[' ', '.', ',', '\'', '"', '!', '?', '<UNK>'])
char_to_ind = {char: i for i, char in enumerate(chars)}
ind_to_char = {v: k for k, v in char_to_ind.iteritems()}
train_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_train'],
char_to_ind, bos_token=None, eos_token=None,
level='character')
valid_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_valid'],
char_to_ind, bos_token=None, eos_token=None,
level='character')
vocab_size = len(char_to_ind)
logger.info('Dictionary size: {}'.format(vocab_size))
if mode == 'continue':
continue_training(save_path)
return
elif mode == "sample":
main_loop = load(open(save_path, "rb"))
generator = main_loop.model.get_top_bricks()[-1]
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).get_theano_function()
states, outputs, costs = [data[:, 0] for data in sample()]
print("".join([ind_to_char[s] for s in outputs]))
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
trans_freqs = numpy.zeros((vocab_size, vocab_size), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
return
# Experiment configuration
batch_size = 20
dim = 650
feedback_dim = 650
valid_stream = valid_dataset.get_example_stream()
valid_stream = Batch(valid_stream,
iteration_scheme=ConstantScheme(batch_size))
valid_stream = Padding(valid_stream)
valid_stream = Mapping(valid_stream, _transpose)
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", dim=dim,
activation=Tanh())
generator = SequenceGenerator(
Readout(readout_dim=vocab_size, source_names=transition.apply.states,
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(
vocab_size, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=Uniform(std=0.04), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
transition.push_initialization_config()
generator.initialize()
# Build the cost computation graph.
features = tensor.lmatrix('features')
features_mask = tensor.matrix('features_mask')
cost_matrix = generator.cost_matrix(
features, mask=features_mask)
batch_cost = cost_matrix.sum()
cost = aggregation.mean(
batch_cost,
features.shape[1])
cost.name = "sequence_log_likelihood"
char_cost = aggregation.mean(
batch_cost, features_mask.sum())
char_cost.name = 'character_log_likelihood'
ppl = 2 ** (cost / numpy.log(2))
ppl.name = 'ppl'
bits_per_char = char_cost / tensor.log(2)
bits_per_char.name = 'bits_per_char'
length = features.shape[0]
length.name = 'length'
model = Model(batch_cost)
if load_params:
params = load_parameter_values(save_path)
model.set_parameter_values(params)
if mode == "train":
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_parameters().items()],
#.........这里部分代码省略.........
示例13: test_sequence_generator_with_lm
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def test_sequence_generator_with_lm():
floatX = theano.config.floatX
rng = numpy.random.RandomState(1234)
readout_dim = 5
feedback_dim = 3
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(dim=dim, activation=Tanh(),
weights_init=Orthogonal())
language_model = SequenceGenerator(
Readout(readout_dim=readout_dim, source_names=["states"],
emitter=SoftmaxEmitter(theano_seed=1234),
feedback_brick=LookupFeedback(readout_dim, dim,
name='feedback')),
SimpleRecurrent(dim, Tanh()),
name='language_model')
generator = SequenceGenerator(
Readout(readout_dim=readout_dim, source_names=["states", "lm_states"],
emitter=SoftmaxEmitter(theano_seed=1234),
feedback_brick=LookupFeedback(readout_dim,
feedback_dim)),
transition,
language_model=language_model,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0),
seed=1234)
generator.initialize()
# Test 'cost_matrix' method
y = tensor.lmatrix('y')
y.tag.test_value = numpy.zeros((15, batch_size), dtype='int64')
mask = tensor.matrix('mask')
mask.tag.test_value = numpy.ones((15, batch_size))
costs = generator.cost_matrix(y, mask)
assert costs.ndim == 2
costs_fun = theano.function([y, mask], [costs])
y_test = rng.randint(readout_dim, size=(n_steps, batch_size))
m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
costs_val = costs_fun(y_test, m_test)[0]
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(costs_val.sum(), 483.153, rtol=1e-5)
# Test 'cost' method
cost = generator.cost(y, mask)
assert cost.ndim == 0
cost_val = theano.function([y, mask], cost)(y_test, m_test)
assert_allclose(cost_val, 16.105, rtol=1e-5)
# Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
cg = ComputationGraph([cost])
var_filter = VariableFilter(roles=[AUXILIARY])
aux_var_name = '_'.join([generator.name, generator.cost.name,
'per_sequence_element'])
cost_per_el = [el for el in var_filter(cg.variables)
if el.name == aux_var_name][0]
assert cost_per_el.ndim == 0
cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
assert_allclose(cost_per_el_val, 1.61051, rtol=1e-5)
# Test generate
states, outputs, lm_states, costs = generator.generate(
iterate=True, batch_size=batch_size, n_steps=n_steps)
cg = ComputationGraph([states, outputs, costs])
states_val, outputs_val, costs_val = theano.function(
[], [states, outputs, costs],
updates=cg.updates)()
assert states_val.shape == (n_steps, batch_size, dim)
assert outputs_val.shape == (n_steps, batch_size)
assert outputs_val.dtype == 'int64'
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(states_val.sum(), -4.88367, rtol=1e-5)
assert_allclose(costs_val.sum(), 486.681, rtol=1e-5)
assert outputs_val.sum() == 627
# Test masks agnostic results of cost
cost1 = costs_fun([[1], [2]], [[1], [1]])[0]
cost2 = costs_fun([[3, 1], [4, 2], [2, 0]],
[[1, 1], [1, 1], [1, 0]])[0]
assert_allclose(cost1.sum(), cost2[:, 1].sum(), rtol=1e-5)
示例14: main_rnn
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def main_rnn(config):
x = tensor.tensor3('features')
y = tensor.matrix('targets')
# if 'LSTM' in config['model'] :
# from models import getLSTMstack
# y_hat = getLSTMstack(input_dim=13, input_var=x, depth=int(config['model'][-1]))
# else :
# raise Exception("These are not the LSTM we are looking for")
# y_hat = model.apply(x)
emitter = TestEmitter()
# emitter = TrivialEmitter(readout_dim=config['lstm_hidden_size'])
# cost_func = SquaredError()
# @application
# def qwe(self, readouts, outputs=None):
# print(type(self), type(readouts))
# x = cost_func.apply(readouts,outputs)
# return x
print(type(emitter.cost))
# emitter.cost = qwe
# print(type(qwe))
steps = 2
n_samples= config['target_size']
transition = [LSTM(config['lstm_hidden_size']) for _ in range(4)]
transition = RecurrentStack(transition,
name="transition", skip_connections=False)
source_names = [name for name in transition.apply.states if 'states' in name]
readout = Readout(emitter, readout_dim=config['lstm_hidden_size'], source_names=source_names,feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None)
seqgen = SequenceGenerator(readout, transition, attention=None, add_contexts=False)
seqgen.weights_init = IsotropicGaussian(0.01)
seqgen.biases_init = Constant(0.)
seqgen.push_initialization_config()
seqgen.transition.biases_init = IsotropicGaussian(0.01,1)
seqgen.transition.push_initialization_config()
seqgen.initialize()
states = seqgen.transition.apply.outputs
print('states',states)
states = {name: shared_floatx_zeros((n_samples, config['lstm_hidden_size']))
for name in states}
cost_matrix = seqgen.cost_matrix(x, **states)
cost = cost_matrix.mean()
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
#Cost
# cost = SquaredError().apply(y_hat ,y)
#cost = CategoricalCrossEntropy().apply(T.flatten(),Y)
#
#for sampling
#cg = ComputationGraph(seqgen.generate(n_steps=steps,batch_size=n_samples, iterate=True))
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=Scale(learning_rate=config['learning_rate']))
#Getting the stream
train_stream = MFCC.get_stream(config['batch_size'],config['source_size'],config['target_size'],config['num_examples'])
#Monitoring stuff
extensions = [Timing(),
FinishAfter(after_n_batches=config['num_batches']),
#DataStreamMonitoring([cost, error_rate],test_stream,prefix="test"),
TrainingDataMonitoring([cost], prefix="train", every_n_batches=1),
#Checkpoint(save_to),
ProgressBar(),
Printing(every_n_batches=1)]
main_loop = MainLoop(
algorithm,
train_stream,
# model=model,
extensions=extensions)
main_loop.run()
示例15: test_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import cost_matrix [as 别名]
def test_sequence_generator():
"""Test a sequence generator with no contexts and continuous outputs.
Such sequence generators can be used to model e.g. dynamical systems.
"""
rng = numpy.random.RandomState(1234)
output_dim = 1
dim = 20
batch_size = 30
n_steps = 10
transition = SimpleRecurrent(activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
Readout(readout_dim=output_dim, source_names=["states"],
emitter=TestEmitter()),
transition,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0),
seed=1234)
generator.initialize()
# Test 'cost_matrix' method
y = tensor.tensor3('y')
mask = tensor.matrix('mask')
costs = generator.cost_matrix(y, mask)
assert costs.ndim == 2
y_test = rng.uniform(size=(n_steps, batch_size, output_dim)).astype(floatX)
m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
costs_val = theano.function([y, mask], [costs])(y_test, m_test)[0]
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(costs_val.sum(), 115.593, rtol=1e-5)
# Test 'cost' method
cost = generator.cost(y, mask)
assert cost.ndim == 0
cost_val = theano.function([y, mask], [cost])(y_test, m_test)
assert_allclose(cost_val, 3.8531, rtol=1e-5)
# Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
cg = ComputationGraph([cost])
var_filter = VariableFilter(roles=[AUXILIARY])
aux_var_name = '_'.join([generator.name, generator.cost.name,
'per_sequence_element'])
cost_per_el = [el for el in var_filter(cg.variables)
if el.name == aux_var_name][0]
assert cost_per_el.ndim == 0
cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
assert_allclose(cost_per_el_val, 0.38531, rtol=1e-5)
# Test 'generate' method
states, outputs, costs = [variable.eval() for variable in
generator.generate(
states=rng.uniform(
size=(batch_size, dim)).astype(floatX),
iterate=True, batch_size=batch_size,
n_steps=n_steps)]
assert states.shape == (n_steps, batch_size, dim)
assert outputs.shape == (n_steps, batch_size, output_dim)
assert costs.shape == (n_steps, batch_size)
assert_allclose(outputs.sum(), -0.33683, rtol=1e-5)
assert_allclose(states.sum(), 15.7909, rtol=1e-5)
# There is no generation cost in this case, since generation is
# deterministic
assert_allclose(costs.sum(), 0.0)