本文整理汇总了Python中blocks.bricks.sequence_generators.SequenceGenerator.push_initialization_config方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceGenerator.push_initialization_config方法的具体用法?Python SequenceGenerator.push_initialization_config怎么用?Python SequenceGenerator.push_initialization_config使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.sequence_generators.SequenceGenerator
的用法示例。
在下文中一共展示了SequenceGenerator.push_initialization_config方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getRnnGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def getRnnGenerator(vocab_size,hidden_dim,input_dim=512):
"""
"Apply" the RNN to the input x
For initializing the network, the vocab size needs to be known
Default of the hidden layer is set tot 512 like Karpathy
"""
generator = SequenceGenerator(
Readout(readout_dim = vocab_size,
source_names = ["states"], # transition.apply.states ???
emitter = SoftmaxEmitter(name="emitter"),
feedback_brick = LookupFeedback(
vocab_size,
input_dim,
name = 'feedback'
),
name = "readout"
),
MySimpleRecurrent(
name = "transition",
activation = Tanh(),
dim = hidden_dim
),
weights_init = IsotropicGaussian(0.01),
biases_init = Constant(0),
name = "generator"
)
generator.push_initialization_config()
generator.transition.weights_init = IsotropicGaussian(0.01)
generator.initialize()
return generator
示例2: build_model
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def build_model(alphabet_size, config):
layers = config['lstm_layers']
dimensions = [config['lstm_dim_' + str(i)] for i in range(layers)]
uniform_width = config['lstm_init_width']
stack = []
for dim in dimensions:
stack.append(LSTM(dim=dim, use_bias=True,
weights_init = Uniform(width=uniform_width),
forget_init=Constant(1.)))
recurrent_stack = RecurrentStack(stack, name='transition')
readout = Readout(readout_dim=alphabet_size,
source_names=['states#' + str(layers - 1)],
emitter=SoftmaxEmitter(name='emitter'),
feedback_brick=LookupFeedback(alphabet_size,
feedback_dim=alphabet_size,
name='feedback'),
name='readout')
generator = SequenceGenerator(readout=readout,
transition=recurrent_stack,
weights_init=Uniform(width=uniform_width),
biases_init=Constant(0),
name='generator')
generator.push_initialization_config()
generator.initialize()
x = tensor.lmatrix('features')
mask = tensor.fmatrix('features_mask')
cost_matrix = generator.cost_matrix(x, mask=mask)
log2e = math.log(math.e, 2)
if 'batch_length' in config:
length = config['batch_length'] - config['batch_overlap']
cost = log2e * aggregation.mean(cost_matrix[:,-length:].sum(),
mask[:,-length:].sum())
else:
cost = log2e * aggregation.mean(cost_matrix[:,:].sum(),
mask[:,:].sum())
cost.name = 'bits_per_character'
return generator, cost
示例3: Readout
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
state_names=source_names, state_dims=[hidden_size_recurrent], attended_dim=context_size, name="attention"
)
readout = Readout(
readout_dim=hidden_size_recurrent,
source_names=source_names + ["feedback"] + ["glimpses"],
emitter=emitter,
feedback_brick=feedback,
name="readout",
)
generator = SequenceGenerator(readout=readout, transition=transition, attention=attention, name="generator")
generator.weights_init = IsotropicGaussian(0.01)
generator.biases_init = Constant(0.0)
generator.push_initialization_config()
generator.transition.biases_init = IsotropicGaussian(0.01, 1)
generator.transition.push_initialization_config()
generator.initialize()
lookup.weights_init = IsotropicGaussian(0.001)
lookup.biases_init = Constant(0.0)
lookup.initialize()
# states = {}
states = [state for state in generator.transition.apply.outputs if state != "step"]
# ipdb.set_trace()
示例4: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of generating a Markov chain with RNN.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"prefix", default="sine",
help="The prefix for model, timing and state files")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot")
args = parser.parse_args()
dim = 10
num_states = ChainIterator.num_states
feedback_dim = 8
transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.allocate()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
if args.mode == "train":
rng = numpy.random.RandomState(1)
batch_size = 50
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.debug("transition.weights_init={}".format(
transition.weights_init))
cost = generator.cost(tensor.lmatrix('x')).sum()
gh_model = GroundhogModel(generator, cost)
state = GroundhogState(args.prefix, batch_size,
learning_rate=0.0001).as_dict()
data = ChainIterator(rng, 100, batch_size)
trainer = SGD(gh_model, state, data)
main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
main_loop.main()
elif args.mode == "sample":
load_params(generator, args.prefix + "model.npz")
sample = ComputationGraph(generator.generate(
n_steps=args.steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainIterator.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, ChainIterator.trans_prob))
else:
assert False
示例5: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main(mode, save_path, steps, num_batches):
num_states = MarkovChainDataset.num_states
if mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", dim=dim,
activation=Tanh())
generator = SequenceGenerator(
Readout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.info("Markov chain entropy: {}".format(
MarkovChainDataset.entropy))
logger.info("Expected min error: {}".format(
-MarkovChainDataset.entropy * seq_len))
# Build the cost computation graph.
x = tensor.lmatrix('data')
cost = aggregation.mean(generator.cost_matrix(x[:, :]).sum(),
x.shape[1])
cost.name = "sequence_log_likelihood"
algorithm = GradientDescent(
cost=cost, params=list(Selector(generator).get_params().values()),
step_rule=Scale(0.001))
main_loop = MainLoop(
algorithm=algorithm,
data_stream=DataStream(
MarkovChainDataset(rng, seq_len),
iteration_scheme=ConstantScheme(batch_size)),
model=Model(cost),
extensions=[FinishAfter(after_n_batches=num_batches),
TrainingDataMonitoring([cost], prefix="this_step",
after_batch=True),
TrainingDataMonitoring([cost], prefix="average",
every_n_batches=100),
Checkpoint(save_path, every_n_batches=500),
Printing(every_n_batches=100)])
main_loop.run()
elif mode == "sample":
main_loop = cPickle.load(open(save_path, "rb"))
generator = main_loop.model
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).get_theano_function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(theano.config.floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
MarkovChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states),
dtype=theano.config.floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, MarkovChainDataset.trans_prob))
else:
assert False
示例6: train
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def train():
if os.path.isfile('trainingdata.tar'):
with open('trainingdata.tar', 'rb') as f:
main = load(f)
else:
hidden_size = 512
filename = 'warpeace.hdf5'
encoder = HDF5CharEncoder('warpeace_input.txt', 1000)
encoder.write(filename)
alphabet_len = encoder.length
x = theano.tensor.lmatrix('x')
readout = Readout(
readout_dim=alphabet_len,
feedback_brick=LookupFeedback(alphabet_len, hidden_size, name='feedback'),
source_names=['states'],
emitter=RandomSoftmaxEmitter(),
name='readout'
)
transition = GatedRecurrent(
activation=Tanh(),
dim=hidden_size)
transition.weights_init = IsotropicGaussian(0.01)
gen = SequenceGenerator(readout=readout,
transition=transition,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name='sequencegenerator')
gen.push_initialization_config()
gen.initialize()
cost = gen.cost(outputs=x)
cost.name = 'cost'
cg = ComputationGraph(cost)
algorithm = GradientDescent(cost=cost,
parameters=cg.parameters,
step_rule=Scale(0.5))
train_set = encoder.get_dataset()
train_stream = DataStream.default_stream(
train_set, iteration_scheme=SequentialScheme(
train_set.num_examples, batch_size=128))
main = MainLoop(
model=Model(cost),
data_stream=train_stream,
algorithm=algorithm,
extensions=[
FinishAfter(),
Printing(),
Checkpoint('trainingdata.tar', every_n_epochs=10),
ShowOutput(every_n_epochs=10)
])
main.run()
示例7: Readout
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
readout = Readout(readout_dim = alphabet_size,
source_names=["states#2"],
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(alphabet_size,
feedback_dim=alphabet_size,
name="feedback"),
name="readout")
seq_gen = SequenceGenerator(readout=readout,
transition=rnn,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name="generator")
seq_gen.push_initialization_config()
rnn.weights_init = Orthogonal()
seq_gen.initialize()
# z markov_tutorial
x = tensor.lvector('features')
x = x.reshape( (x.shape[0], 1) )
cost = aggregation.mean(seq_gen.cost_matrix(x[:,:]).sum(), x.shape[1])
cost.name = "sequence_log_likelihood"
cost_cg = ComputationGraph(cost)
# theano.printing.pydotprint(cost, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)
algorithm = GradientDescent(
cost=cost,
parameters=list(Selector(seq_gen).get_parameters().values()),
示例8: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main(mode, save_path, steps, num_batches, load_params):
chars = (list(string.ascii_uppercase) + list(range(10)) +
[' ', '.', ',', '\'', '"', '!', '?', '<UNK>'])
char_to_ind = {char: i for i, char in enumerate(chars)}
ind_to_char = {v: k for k, v in char_to_ind.iteritems()}
train_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_train'],
char_to_ind, bos_token=None, eos_token=None,
level='character')
valid_dataset = TextFile(['/Tmp/serdyuk/data/wsj_text_valid'],
char_to_ind, bos_token=None, eos_token=None,
level='character')
vocab_size = len(char_to_ind)
logger.info('Dictionary size: {}'.format(vocab_size))
if mode == 'continue':
continue_training(save_path)
return
elif mode == "sample":
main_loop = load(open(save_path, "rb"))
generator = main_loop.model.get_top_bricks()[-1]
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).get_theano_function()
states, outputs, costs = [data[:, 0] for data in sample()]
print("".join([ind_to_char[s] for s in outputs]))
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
trans_freqs = numpy.zeros((vocab_size, vocab_size), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
return
# Experiment configuration
batch_size = 20
dim = 650
feedback_dim = 650
valid_stream = valid_dataset.get_example_stream()
valid_stream = Batch(valid_stream,
iteration_scheme=ConstantScheme(batch_size))
valid_stream = Padding(valid_stream)
valid_stream = Mapping(valid_stream, _transpose)
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", dim=dim,
activation=Tanh())
generator = SequenceGenerator(
Readout(readout_dim=vocab_size, source_names=transition.apply.states,
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(
vocab_size, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=Uniform(std=0.04), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
transition.push_initialization_config()
generator.initialize()
# Build the cost computation graph.
features = tensor.lmatrix('features')
features_mask = tensor.matrix('features_mask')
cost_matrix = generator.cost_matrix(
features, mask=features_mask)
batch_cost = cost_matrix.sum()
cost = aggregation.mean(
batch_cost,
features.shape[1])
cost.name = "sequence_log_likelihood"
char_cost = aggregation.mean(
batch_cost, features_mask.sum())
char_cost.name = 'character_log_likelihood'
ppl = 2 ** (cost / numpy.log(2))
ppl.name = 'ppl'
bits_per_char = char_cost / tensor.log(2)
bits_per_char.name = 'bits_per_char'
length = features.shape[0]
length.name = 'length'
model = Model(batch_cost)
if load_params:
params = load_parameter_values(save_path)
model.set_parameter_values(params)
if mode == "train":
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_parameters().items()],
#.........这里部分代码省略.........
示例9: main_rnn
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main_rnn(config):
x = tensor.tensor3('features')
y = tensor.matrix('targets')
# if 'LSTM' in config['model'] :
# from models import getLSTMstack
# y_hat = getLSTMstack(input_dim=13, input_var=x, depth=int(config['model'][-1]))
# else :
# raise Exception("These are not the LSTM we are looking for")
# y_hat = model.apply(x)
emitter = TestEmitter()
# emitter = TrivialEmitter(readout_dim=config['lstm_hidden_size'])
# cost_func = SquaredError()
# @application
# def qwe(self, readouts, outputs=None):
# print(type(self), type(readouts))
# x = cost_func.apply(readouts,outputs)
# return x
print(type(emitter.cost))
# emitter.cost = qwe
# print(type(qwe))
steps = 2
n_samples= config['target_size']
transition = [LSTM(config['lstm_hidden_size']) for _ in range(4)]
transition = RecurrentStack(transition,
name="transition", skip_connections=False)
source_names = [name for name in transition.apply.states if 'states' in name]
readout = Readout(emitter, readout_dim=config['lstm_hidden_size'], source_names=source_names,feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None)
seqgen = SequenceGenerator(readout, transition, attention=None, add_contexts=False)
seqgen.weights_init = IsotropicGaussian(0.01)
seqgen.biases_init = Constant(0.)
seqgen.push_initialization_config()
seqgen.transition.biases_init = IsotropicGaussian(0.01,1)
seqgen.transition.push_initialization_config()
seqgen.initialize()
states = seqgen.transition.apply.outputs
print('states',states)
states = {name: shared_floatx_zeros((n_samples, config['lstm_hidden_size']))
for name in states}
cost_matrix = seqgen.cost_matrix(x, **states)
cost = cost_matrix.mean()
cost.name = "nll"
cg = ComputationGraph(cost)
model = Model(cost)
#Cost
# cost = SquaredError().apply(y_hat ,y)
#cost = CategoricalCrossEntropy().apply(T.flatten(),Y)
#
#for sampling
#cg = ComputationGraph(seqgen.generate(n_steps=steps,batch_size=n_samples, iterate=True))
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=Scale(learning_rate=config['learning_rate']))
#Getting the stream
train_stream = MFCC.get_stream(config['batch_size'],config['source_size'],config['target_size'],config['num_examples'])
#Monitoring stuff
extensions = [Timing(),
FinishAfter(after_n_batches=config['num_batches']),
#DataStreamMonitoring([cost, error_rate],test_stream,prefix="test"),
TrainingDataMonitoring([cost], prefix="train", every_n_batches=1),
#Checkpoint(save_to),
ProgressBar(),
Printing(every_n_batches=1)]
main_loop = MainLoop(
algorithm,
train_stream,
# model=model,
extensions=extensions)
main_loop.run()
示例10: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of generating a Markov chain with RNN.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"save_path", default="sine",
help="The part to save PyLearn2 model")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot")
parser.add_argument(
"--reset", action="store_true", default=False,
help="Start training from scratch")
args = parser.parse_args()
num_states = ChainDataset.num_states
if args.mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", activation=Tanh(),
dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.debug("Markov chain entropy: {}".format(
ChainDataset.entropy))
logger.debug("Expected min error: {}".format(
-ChainDataset.entropy * seq_len * batch_size))
if os.path.isfile(args.save_path) and not args.reset:
model = Pylearn2Model.load(args.save_path)
else:
model = Pylearn2Model(generator)
# Build the cost computation graph.
# Note: would be probably nicer to make cost part of the model.
x = tensor.ltensor3('x')
cost = Pylearn2Cost(model.brick.cost(x[:, :, 0]).sum())
dataset = ChainDataset(rng, seq_len)
sgd = SGD(learning_rate=0.0001, cost=cost,
batch_size=batch_size, batches_per_iter=10,
monitoring_dataset=dataset,
monitoring_batch_size=batch_size,
monitoring_batches=1,
learning_rule=Pylearn2LearningRule(
SGDLearningRule(),
dict(training_objective=cost.cost)))
train = Pylearn2Train(dataset, model, algorithm=sgd,
save_path=args.save_path, save_freq=10)
train.main_loop()
elif args.mode == "sample":
model = Pylearn2Model.load(args.save_path)
generator = model.brick
sample = ComputationGraph(generator.generate(
n_steps=args.steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
#.........这里部分代码省略.........
示例11: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import push_initialization_config [as 别名]
def main(mode, save_path, steps, time_budget, reset):
num_states = ChainDataset.num_states
if mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", activation=Tanh(),
dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.info("Markov chain entropy: {}".format(
ChainDataset.entropy))
logger.info("Expected min error: {}".format(
-ChainDataset.entropy * seq_len * batch_size))
if os.path.isfile(save_path) and not reset:
model = Pylearn2Model.load(save_path)
else:
model = Pylearn2Model(generator)
# Build the cost computation graph.
# Note: would be probably nicer to make cost part of the model.
x = tensor.ltensor3('x')
cost = Pylearn2Cost(model.brick.cost(x[:, :, 0]).sum())
dataset = ChainDataset(rng, seq_len)
sgd = SGD(learning_rate=0.0001, cost=cost,
batch_size=batch_size, batches_per_iter=10,
monitoring_dataset=dataset,
monitoring_batch_size=batch_size,
monitoring_batches=1,
learning_rule=Pylearn2LearningRule(
SGDLearningRule(),
dict(training_objective=cost.cost)))
train = Pylearn2Train(dataset, model, algorithm=sgd,
save_path=save_path, save_freq=10)
train.main_loop(time_budget=time_budget)
elif mode == "sample":
model = Pylearn2Model.load(save_path)
generator = model.brick
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, ChainDataset.trans_prob))
else:
assert False