本文整理汇总了Python中blocks.bricks.sequence_generators.SequenceGenerator.initialize方法的典型用法代码示例。如果您正苦于以下问题:Python SequenceGenerator.initialize方法的具体用法?Python SequenceGenerator.initialize怎么用?Python SequenceGenerator.initialize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.bricks.sequence_generators.SequenceGenerator
的用法示例。
在下文中一共展示了SequenceGenerator.initialize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_sequence_generator():
# Disclaimer: here we only check shapes, not values.
output_dim = 1
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=output_dim, source_names=["states"],
emitter=TestEmitter(name="emitter"), name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.initialize()
y = tensor.tensor3('y')
mask = tensor.matrix('mask')
costs = generator.cost(y, mask)
assert costs.ndim == 2
costs_val = theano.function([y, mask], [costs])(
numpy.zeros((n_steps, batch_size, output_dim), dtype=floatX),
numpy.ones((n_steps, batch_size), dtype=floatX))[0]
assert costs_val.shape == (n_steps, batch_size)
states, outputs, costs = [variable.eval() for variable in
generator.generate(
iterate=True, batch_size=batch_size,
n_steps=n_steps)]
assert states.shape == (n_steps, batch_size, dim)
assert outputs.shape == (n_steps, batch_size, output_dim)
assert costs.shape == (n_steps, batch_size)
示例2: getRnnGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def getRnnGenerator(vocab_size,hidden_dim,input_dim=512):
"""
"Apply" the RNN to the input x
For initializing the network, the vocab size needs to be known
Default of the hidden layer is set tot 512 like Karpathy
"""
generator = SequenceGenerator(
Readout(readout_dim = vocab_size,
source_names = ["states"], # transition.apply.states ???
emitter = SoftmaxEmitter(name="emitter"),
feedback_brick = LookupFeedback(
vocab_size,
input_dim,
name = 'feedback'
),
name = "readout"
),
MySimpleRecurrent(
name = "transition",
activation = Tanh(),
dim = hidden_dim
),
weights_init = IsotropicGaussian(0.01),
biases_init = Constant(0),
name = "generator"
)
generator.push_initialization_config()
generator.transition.weights_init = IsotropicGaussian(0.01)
generator.initialize()
return generator
示例3: build_model
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def build_model(alphabet_size, config):
layers = config['lstm_layers']
dimensions = [config['lstm_dim_' + str(i)] for i in range(layers)]
uniform_width = config['lstm_init_width']
stack = []
for dim in dimensions:
stack.append(LSTM(dim=dim, use_bias=True,
weights_init = Uniform(width=uniform_width),
forget_init=Constant(1.)))
recurrent_stack = RecurrentStack(stack, name='transition')
readout = Readout(readout_dim=alphabet_size,
source_names=['states#' + str(layers - 1)],
emitter=SoftmaxEmitter(name='emitter'),
feedback_brick=LookupFeedback(alphabet_size,
feedback_dim=alphabet_size,
name='feedback'),
name='readout')
generator = SequenceGenerator(readout=readout,
transition=recurrent_stack,
weights_init=Uniform(width=uniform_width),
biases_init=Constant(0),
name='generator')
generator.push_initialization_config()
generator.initialize()
x = tensor.lmatrix('features')
mask = tensor.fmatrix('features_mask')
cost_matrix = generator.cost_matrix(x, mask=mask)
log2e = math.log(math.e, 2)
if 'batch_length' in config:
length = config['batch_length'] - config['batch_overlap']
cost = log2e * aggregation.mean(cost_matrix[:,-length:].sum(),
mask[:,-length:].sum())
else:
cost = log2e * aggregation.mean(cost_matrix[:,:].sum(),
mask[:,:].sum())
cost.name = 'bits_per_character'
return generator, cost
示例4: test_integer_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_integer_sequence_generator():
# Disclaimer: here we only check shapes, not values.
readout_dim = 5
feedback_dim = 3
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=readout_dim, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(readout_dim, feedback_dim),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.initialize()
y = tensor.lmatrix('y')
mask = tensor.matrix('mask')
costs = generator.cost(y, mask)
assert costs.ndim == 2
costs_val = theano.function([y, mask], [costs])(
numpy.zeros((n_steps, batch_size), dtype='int64'),
numpy.ones((n_steps, batch_size), dtype=floatX))[0]
assert costs_val.shape == (n_steps, batch_size)
states, outputs, costs = generator.generate(
iterate=True, batch_size=batch_size, n_steps=n_steps)
states_val, outputs_val, costs_val = theano.function(
[], [states, outputs, costs],
updates=costs.owner.inputs[0].owner.tag.updates)()
assert states_val.shape == (n_steps, batch_size, dim)
assert outputs_val.shape == (n_steps, batch_size)
assert outputs_val.dtype == 'int64'
assert costs_val.shape == (n_steps, batch_size)
示例5: test_recurrentstack_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_recurrentstack_sequence_generator():
"""Test RecurrentStack behaviour inside a SequenceGenerator.
"""
floatX = theano.config.floatX
rng = numpy.random.RandomState(1234)
output_dim = 1
dim = 20
batch_size = 30
n_steps = 10
depth=2
transitions = [LSTM(dim=dim) for _ in range(depth)]
transition = RecurrentStack(transitions,fast=True,
weights_init=Constant(2),
biases_init=Constant(0))
generator = SequenceGenerator(
Readout(readout_dim=output_dim, source_names=["states_%d"%(depth-1)],
emitter=TestEmitter()),
transition,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0),
seed=1234)
generator.initialize()
y = tensor.tensor3('y')
cost = generator.cost(y)
# Check that all states can be accessed and not just the state connected
# to readout.
cg = ComputationGraph(cost)
from blocks.roles import INPUT, OUTPUT
dropout_target = VariableFilter(roles=[INNER_OUTPUT],
# bricks=transitions,
# name_regex='*'
)(cg.variables)
assert_equal(len(dropout_target), depth)
示例6: test_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_sequence_generator():
"""Test a sequence generator with no contexts and continuous outputs.
Such sequence generators can be used to model e.g. dynamical systems.
"""
rng = numpy.random.RandomState(1234)
output_dim = 1
dim = 20
batch_size = 30
n_steps = 10
transition = SimpleRecurrent(activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
Readout(readout_dim=output_dim, source_names=["states"],
emitter=TestEmitter()),
transition,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0),
seed=1234)
generator.initialize()
# Test 'cost_matrix' method
y = tensor.tensor3('y')
mask = tensor.matrix('mask')
costs = generator.cost_matrix(y, mask)
assert costs.ndim == 2
y_test = rng.uniform(size=(n_steps, batch_size, output_dim)).astype(floatX)
m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
costs_val = theano.function([y, mask], [costs])(y_test, m_test)[0]
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(costs_val.sum(), 115.593, rtol=1e-5)
# Test 'cost' method
cost = generator.cost(y, mask)
assert cost.ndim == 0
cost_val = theano.function([y, mask], [cost])(y_test, m_test)
assert_allclose(cost_val, 3.8531, rtol=1e-5)
# Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
cg = ComputationGraph([cost])
var_filter = VariableFilter(roles=[AUXILIARY])
aux_var_name = '_'.join([generator.name, generator.cost.name,
'per_sequence_element'])
cost_per_el = [el for el in var_filter(cg.variables)
if el.name == aux_var_name][0]
assert cost_per_el.ndim == 0
cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
assert_allclose(cost_per_el_val, 0.38531, rtol=1e-5)
# Test 'generate' method
states, outputs, costs = [variable.eval() for variable in
generator.generate(
states=rng.uniform(
size=(batch_size, dim)).astype(floatX),
iterate=True, batch_size=batch_size,
n_steps=n_steps)]
assert states.shape == (n_steps, batch_size, dim)
assert outputs.shape == (n_steps, batch_size, output_dim)
assert costs.shape == (n_steps, batch_size)
assert_allclose(outputs.sum(), -0.33683, rtol=1e-5)
assert_allclose(states.sum(), 15.7909, rtol=1e-5)
# There is no generation cost in this case, since generation is
# deterministic
assert_allclose(costs.sum(), 0.0)
示例7: test_with_attention
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_with_attention():
"""Test a sequence generator with continuous outputs and attention."""
rng = numpy.random.RandomState(1234)
inp_dim = 2
inp_len = 10
attended_dim = 3
attended_len = 11
batch_size = 4
n_steps = 30
# For values
def rand(size):
return rng.uniform(size=size).astype(floatX)
# For masks
def generate_mask(length, batch_size):
mask = numpy.ones((length, batch_size), dtype=floatX)
# To make it look like read data
for i in range(batch_size):
mask[1 + rng.randint(0, length - 1):, i] = 0.0
return mask
output_vals = rand((inp_len, batch_size, inp_dim))
output_mask_vals = generate_mask(inp_len, batch_size)
attended_vals = rand((attended_len, batch_size, attended_dim))
attended_mask_vals = generate_mask(attended_len, batch_size)
transition = TestTransition(
dim=inp_dim, attended_dim=attended_dim, activation=Identity())
attention = SequenceContentAttention(
state_names=transition.apply.states, match_dim=inp_dim)
generator = SequenceGenerator(
Readout(
readout_dim=inp_dim,
source_names=[transition.apply.states[0],
attention.take_glimpses.outputs[0]],
emitter=TestEmitter()),
transition=transition,
attention=attention,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0),
add_contexts=False, seed=1234)
generator.initialize()
# Test 'cost_matrix' method
attended = tensor.tensor3("attended")
attended_mask = tensor.matrix("attended_mask")
outputs = tensor.tensor3('outputs')
mask = tensor.matrix('mask')
costs = generator.cost_matrix(outputs, mask,
attended=attended,
attended_mask=attended_mask)
costs_vals = costs.eval({outputs: output_vals,
mask: output_mask_vals,
attended: attended_vals,
attended_mask: attended_mask_vals})
assert costs_vals.shape == (inp_len, batch_size)
assert_allclose(costs_vals.sum(), 13.5042, rtol=1e-5)
# Test `generate` method
results = (
generator.generate(n_steps=n_steps, batch_size=attended.shape[1],
attended=attended, attended_mask=attended_mask))
assert len(results) == 5
states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = (
theano.function([attended, attended_mask], results)
(attended_vals, attended_mask_vals))
assert states_vals.shape == (n_steps, batch_size, inp_dim)
assert states_vals.shape == outputs_vals.shape
assert glimpses_vals.shape == (n_steps, batch_size, attended_dim)
assert weights_vals.shape == (n_steps, batch_size, attended_len)
assert costs_vals.shape == (n_steps, batch_size)
assert_allclose(states_vals.sum(), 23.4172, rtol=1e-5)
# There is no generation cost in this case, since generation is
# deterministic
assert_allclose(costs_vals.sum(), 0.0, rtol=1e-5)
assert_allclose(weights_vals.sum(), 120.0, rtol=1e-5)
assert_allclose(glimpses_vals.sum(), 199.2402, rtol=1e-5)
assert_allclose(outputs_vals.sum(), -11.6008, rtol=1e-5)
示例8: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of language modeling with RNN",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"prefix", default="sine",
help="The prefix for model, timing and state files")
parser.add_argument(
"state", nargs="?", default="",
help="Changes to Groundhog state")
parser.add_argument("--path", help="Path to a language dataset")
parser.add_argument("--dict", help="Path to the dataset dictionary")
parser.add_argument("--restart", help="Start anew")
parser.add_argument(
"--reset", action="store_true", default=False,
help="Reset the hidden state between batches")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot for the 'sample' mode"
" OR training sequence length for the 'train' mode.")
args = parser.parse_args()
logger.debug("Args:\n" + str(args))
dim = 200
num_chars = 50
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=dim,
weights_init=Orthogonal())
generator = SequenceGenerator(
LinearReadout(readout_dim=num_chars, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_chars, dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.allocate()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
if args.mode == "train":
batch_size = 1
seq_len = args.steps
generator.initialize()
# Build cost computation graph that uses the saved hidden states.
# An issue: for Groundhog this is completely transparent, that's
# why it does not carry the hidden state over the period when
# validation in done. We should find a way to fix in the future.
x = tensor.lmatrix('x')
init_states = shared_floatx_zeros((batch_size, dim),
name='init_states')
reset = tensor.scalar('reset')
cost = ComputationGraph(
generator.cost(x, states=init_states * reset).sum())
# TODO: better search routine
states = [v for v in cost.variables
if hasattr(v.tag, 'application_call')
and v.tag.application_call.brick == generator.transition
and (v.tag.application_call.application ==
generator.transition.apply)
and v.tag.role == VariableRole.OUTPUT
and v.tag.name == 'states']
assert len(states) == 1
states = states[0]
gh_model = GroundhogModel(generator, cost)
gh_model.properties.append(
('bpc', cost.outputs[0] * numpy.log(2) / seq_len))
gh_model.properties.append(('mean_init_state', init_states.mean()))
gh_model.properties.append(('reset', reset))
if not args.reset:
gh_model.updates.append((init_states, states[-1]))
state = GroundhogState(args.prefix, batch_size,
learning_rate=0.0001).as_dict()
changes = eval("dict({})".format(args.state))
state.update(changes)
def output_format(x, y, reset):
return dict(x=x[:, None], reset=reset)
train, valid, test = [
LMIterator(batch_size=batch_size,
use_infinite_loop=mode == 'train',
path=args.path,
#.........这里部分代码省略.........
示例9: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of generating a Markov chain with RNN.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "sample"],
help="The mode to run. Use `train` to train a new model"
" and `sample` to sample a sequence generated by an"
" existing one.")
parser.add_argument(
"prefix", default="sine",
help="The prefix for model, timing and state files")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot")
args = parser.parse_args()
dim = 10
num_states = ChainIterator.num_states
feedback_dim = 8
transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.allocate()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
if args.mode == "train":
rng = numpy.random.RandomState(1)
batch_size = 50
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.debug("transition.weights_init={}".format(
transition.weights_init))
cost = generator.cost(tensor.lmatrix('x')).sum()
gh_model = GroundhogModel(generator, cost)
state = GroundhogState(args.prefix, batch_size,
learning_rate=0.0001).as_dict()
data = ChainIterator(rng, 100, batch_size)
trainer = SGD(gh_model, state, data)
main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
main_loop.main()
elif args.mode == "sample":
load_params(generator, args.prefix + "model.npz")
sample = ComputationGraph(generator.generate(
n_steps=args.steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainIterator.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, ChainIterator.trans_prob))
else:
assert False
示例10: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def main(mode, save_path, steps, num_batches):
num_states = MarkovChainDataset.num_states
if mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", dim=dim,
activation=Tanh())
generator = SequenceGenerator(
Readout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
# Give an idea of what's going on.
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.info("Markov chain entropy: {}".format(
MarkovChainDataset.entropy))
logger.info("Expected min error: {}".format(
-MarkovChainDataset.entropy * seq_len))
# Build the cost computation graph.
x = tensor.lmatrix('data')
cost = aggregation.mean(generator.cost_matrix(x[:, :]).sum(),
x.shape[1])
cost.name = "sequence_log_likelihood"
algorithm = GradientDescent(
cost=cost, params=list(Selector(generator).get_params().values()),
step_rule=Scale(0.001))
main_loop = MainLoop(
algorithm=algorithm,
data_stream=DataStream(
MarkovChainDataset(rng, seq_len),
iteration_scheme=ConstantScheme(batch_size)),
model=Model(cost),
extensions=[FinishAfter(after_n_batches=num_batches),
TrainingDataMonitoring([cost], prefix="this_step",
after_batch=True),
TrainingDataMonitoring([cost], prefix="average",
every_n_batches=100),
Checkpoint(save_path, every_n_batches=500),
Printing(every_n_batches=100)])
main_loop.run()
elif mode == "sample":
main_loop = cPickle.load(open(save_path, "rb"))
generator = main_loop.model
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).get_theano_function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(theano.config.floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
MarkovChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states),
dtype=theano.config.floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, MarkovChainDataset.trans_prob))
else:
assert False
示例11: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def main():
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
parser = argparse.ArgumentParser(
"Case study of generating simple 1d sequences with RNN.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"mode", choices=["train", "plot"],
help="The mode to run. Use `train` to train a new model"
" and `plot` to plot a sequence generated by an"
" existing one.")
parser.add_argument(
"prefix", default="sine",
help="The prefix for model, timing and state files")
parser.add_argument(
"--input-noise", type=float, default=0.0,
help="Adds Gaussian noise of given intensity to the "
" training sequences.")
parser.add_argument(
"--function", default="lambda a, x: numpy.sin(a * x)",
help="An analytical description of the sequence family to learn."
" The arguments before the last one are considered parameters.")
parser.add_argument(
"--steps", type=int, default=100,
help="Number of steps to plot")
parser.add_argument(
"--params",
help="Parameter values for plotting")
args = parser.parse_args()
function = eval(args.function)
num_params = len(inspect.getargspec(function).args) - 1
class Emitter(TrivialEmitter):
@application
def cost(self, readouts, outputs):
"""Compute MSE."""
return ((readouts - outputs) ** 2).sum(axis=readouts.ndim - 1)
transition = GatedRecurrent(
name="transition", activation=Tanh(), dim=10,
weights_init=Orthogonal())
with_params = AddParameters(transition, num_params, "params",
name="with_params")
generator = SequenceGenerator(
LinearReadout(readout_dim=1, source_names=["states"],
emitter=Emitter(name="emitter"), name="readout"),
with_params,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.allocate()
logger.debug("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
if args.mode == "train":
seed = 1
rng = numpy.random.RandomState(seed)
batch_size = 10
generator.initialize()
cost = ComputationGraph(
generator.cost(tensor.tensor3('x'),
params=tensor.matrix("params")).sum())
cost = apply_noise(cost, cost.inputs, args.input_noise)
gh_model = GroundhogModel(generator, cost)
state = GroundhogState(args.prefix, batch_size,
learning_rate=0.0001).as_dict()
data = SeriesIterator(rng, function, 100, batch_size)
trainer = SGD(gh_model, state, data)
main_loop = MainLoop(data, None, None, gh_model, trainer, state, None)
main_loop.load()
main_loop.main()
elif args.mode == "plot":
load_params(generator, args.prefix + "model.npz")
params = tensor.matrix("params")
sample = theano.function([params], generator.generate(
params=params, n_steps=args.steps, batch_size=1))
param_values = numpy.array(map(float, args.params.split()),
dtype=floatX)
states, outputs, _ = sample(param_values[None, :])
actual = outputs[:, 0, 0]
desired = numpy.array([function(*(list(param_values) + [T]))
for T in range(args.steps)])
print("MSE: {}".format(((actual - desired) ** 2).sum()))
pyplot.plot(numpy.hstack([actual[:, None], desired[:, None]]))
pyplot.show()
else:
assert False
示例12: train
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def train():
if os.path.isfile('trainingdata.tar'):
with open('trainingdata.tar', 'rb') as f:
main = load(f)
else:
hidden_size = 512
filename = 'warpeace.hdf5'
encoder = HDF5CharEncoder('warpeace_input.txt', 1000)
encoder.write(filename)
alphabet_len = encoder.length
x = theano.tensor.lmatrix('x')
readout = Readout(
readout_dim=alphabet_len,
feedback_brick=LookupFeedback(alphabet_len, hidden_size, name='feedback'),
source_names=['states'],
emitter=RandomSoftmaxEmitter(),
name='readout'
)
transition = GatedRecurrent(
activation=Tanh(),
dim=hidden_size)
transition.weights_init = IsotropicGaussian(0.01)
gen = SequenceGenerator(readout=readout,
transition=transition,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name='sequencegenerator')
gen.push_initialization_config()
gen.initialize()
cost = gen.cost(outputs=x)
cost.name = 'cost'
cg = ComputationGraph(cost)
algorithm = GradientDescent(cost=cost,
parameters=cg.parameters,
step_rule=Scale(0.5))
train_set = encoder.get_dataset()
train_stream = DataStream.default_stream(
train_set, iteration_scheme=SequentialScheme(
train_set.num_examples, batch_size=128))
main = MainLoop(
model=Model(cost),
data_stream=train_stream,
algorithm=algorithm,
extensions=[
FinishAfter(),
Printing(),
Checkpoint('trainingdata.tar', every_n_epochs=10),
ShowOutput(every_n_epochs=10)
])
main.run()
示例13: SequenceGenerator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
source_names=["states#2"],
emitter=SoftmaxEmitter(name="emitter"),
feedback_brick=LookupFeedback(alphabet_size,
feedback_dim=alphabet_size,
name="feedback"),
name="readout")
seq_gen = SequenceGenerator(readout=readout,
transition=rnn,
weights_init=IsotropicGaussian(0.01),
biases_init=Constant(0),
name="generator")
seq_gen.push_initialization_config()
rnn.weights_init = Orthogonal()
seq_gen.initialize()
# z markov_tutorial
x = tensor.lvector('features')
x = x.reshape( (x.shape[0], 1) )
cost = aggregation.mean(seq_gen.cost_matrix(x[:,:]).sum(), x.shape[1])
cost.name = "sequence_log_likelihood"
cost_cg = ComputationGraph(cost)
# theano.printing.pydotprint(cost, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)
algorithm = GradientDescent(
cost=cost,
parameters=list(Selector(seq_gen).get_parameters().values()),
step_rule=Scale(0.001))
示例14: main
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def main(mode, save_path, steps, time_budget, reset):
num_states = ChainDataset.num_states
if mode == "train":
# Experiment configuration
rng = numpy.random.RandomState(1)
batch_size = 50
seq_len = 100
dim = 10
feedback_dim = 8
# Build the bricks and initialize them
transition = GatedRecurrent(name="transition", activation=Tanh(),
dim=dim)
generator = SequenceGenerator(
LinearReadout(readout_dim=num_states, source_names=["states"],
emitter=SoftmaxEmitter(name="emitter"),
feedbacker=LookupFeedback(
num_states, feedback_dim, name='feedback'),
name="readout"),
transition,
weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
name="generator")
generator.push_initialization_config()
transition.weights_init = Orthogonal()
generator.initialize()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in Selector(generator).get_params().items()],
width=120))
logger.info("Markov chain entropy: {}".format(
ChainDataset.entropy))
logger.info("Expected min error: {}".format(
-ChainDataset.entropy * seq_len * batch_size))
if os.path.isfile(save_path) and not reset:
model = Pylearn2Model.load(save_path)
else:
model = Pylearn2Model(generator)
# Build the cost computation graph.
# Note: would be probably nicer to make cost part of the model.
x = tensor.ltensor3('x')
cost = Pylearn2Cost(model.brick.cost(x[:, :, 0]).sum())
dataset = ChainDataset(rng, seq_len)
sgd = SGD(learning_rate=0.0001, cost=cost,
batch_size=batch_size, batches_per_iter=10,
monitoring_dataset=dataset,
monitoring_batch_size=batch_size,
monitoring_batches=1,
learning_rule=Pylearn2LearningRule(
SGDLearningRule(),
dict(training_objective=cost.cost)))
train = Pylearn2Train(dataset, model, algorithm=sgd,
save_path=save_path, save_freq=10)
train.main_loop(time_budget=time_budget)
elif mode == "sample":
model = Pylearn2Model.load(save_path)
generator = model.brick
sample = ComputationGraph(generator.generate(
n_steps=steps, batch_size=1, iterate=True)).function()
states, outputs, costs = [data[:, 0] for data in sample()]
numpy.set_printoptions(precision=3, suppress=True)
print("Generation cost:\n{}".format(costs.sum()))
freqs = numpy.bincount(outputs).astype(floatX)
freqs /= freqs.sum()
print("Frequencies:\n {} vs {}".format(freqs,
ChainDataset.equilibrium))
trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX)
for a, b in zip(outputs, outputs[1:]):
trans_freqs[a, b] += 1
trans_freqs /= trans_freqs.sum(axis=1)[:, None]
print("Transition frequencies:\n{}\nvs\n{}".format(
trans_freqs, ChainDataset.trans_prob))
else:
assert False
示例15: test_integer_sequence_generator
# 需要导入模块: from blocks.bricks.sequence_generators import SequenceGenerator [as 别名]
# 或者: from blocks.bricks.sequence_generators.SequenceGenerator import initialize [as 别名]
def test_integer_sequence_generator():
"""Test a sequence generator with integer outputs.
Such sequence generators can be used to e.g. model language.
"""
rng = numpy.random.RandomState(1234)
readout_dim = 5
feedback_dim = 3
dim = 20
batch_size = 30
n_steps = 10
transition = GatedRecurrent(dim=dim, activation=Tanh(),
weights_init=Orthogonal())
generator = SequenceGenerator(
Readout(readout_dim=readout_dim, source_names=["states"],
emitter=SoftmaxEmitter(theano_seed=1234),
feedback_brick=LookupFeedback(readout_dim,
feedback_dim)),
transition,
weights_init=IsotropicGaussian(0.1), biases_init=Constant(0),
seed=1234)
generator.initialize()
# Test 'cost_matrix' method
y = tensor.lmatrix('y')
mask = tensor.matrix('mask')
costs = generator.cost_matrix(y, mask)
assert costs.ndim == 2
costs_fun = theano.function([y, mask], [costs])
y_test = rng.randint(readout_dim, size=(n_steps, batch_size))
m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
costs_val = costs_fun(y_test, m_test)[0]
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(costs_val.sum(), 482.827, rtol=1e-5)
# Test 'cost' method
cost = generator.cost(y, mask)
assert cost.ndim == 0
cost_val = theano.function([y, mask], [cost])(y_test, m_test)
assert_allclose(cost_val, 16.0942, rtol=1e-5)
# Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
cg = ComputationGraph([cost])
var_filter = VariableFilter(roles=[AUXILIARY])
aux_var_name = '_'.join([generator.name, generator.cost.name,
'per_sequence_element'])
cost_per_el = [el for el in var_filter(cg.variables)
if el.name == aux_var_name][0]
assert cost_per_el.ndim == 0
cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
assert_allclose(cost_per_el_val, 1.60942, rtol=1e-5)
# Test generate
states, outputs, costs = generator.generate(
iterate=True, batch_size=batch_size, n_steps=n_steps)
cg = ComputationGraph(states + outputs + costs)
states_val, outputs_val, costs_val = theano.function(
[], [states, outputs, costs],
updates=cg.updates)()
assert states_val.shape == (n_steps, batch_size, dim)
assert outputs_val.shape == (n_steps, batch_size)
assert outputs_val.dtype == 'int64'
assert costs_val.shape == (n_steps, batch_size)
assert_allclose(states_val.sum(), -17.91811, rtol=1e-5)
assert_allclose(costs_val.sum(), 482.863, rtol=1e-5)
assert outputs_val.sum() == 630
# Test masks agnostic results of cost
cost1 = costs_fun([[1], [2]], [[1], [1]])[0]
cost2 = costs_fun([[3, 1], [4, 2], [2, 0]],
[[1, 1], [1, 1], [1, 0]])[0]
assert_allclose(cost1.sum(), cost2[:, 1].sum(), rtol=1e-5)