本文整理汇总了Python中blocks.model.Model.get_top_bricks方法的典型用法代码示例。如果您正苦于以下问题:Python Model.get_top_bricks方法的具体用法?Python Model.get_top_bricks怎么用?Python Model.get_top_bricks使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类blocks.model.Model
的用法示例。
在下文中一共展示了Model.get_top_bricks方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
def main():
import configurations
from stream import DStream
logger = logging.getLogger(__name__)
cfig = getattr(configurations, 'get_config_penn')()
rnnlm = Rnnlm(cfig['vocabsize'], cfig['nemb'], cfig['nhids'])
rnnlm.weights_init = IsotropicGaussian(0.1)
rnnlm.biases_init = Constant(0.)
rnnlm.push_initialization_config()
rnnlm.generator.transition.weights_init = Orthogonal()
sentence = tensor.lmatrix('sentence')
sentence_mask = tensor.matrix('sentence_mask')
batch_cost = rnnlm.cost(sentence, sentence_mask).sum()
batch_size = sentence.shape[1].copy(name='batch_size')
cost = aggregation.mean(batch_cost, batch_size)
cost.name = "sequence_log_likelihood"
logger.info("Cost graph is built")
model = Model(cost)
parameters = model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in parameters.items()],
width=120))
for brick in model.get_top_bricks():
brick.initialize()
cg = ComputationGraph(cost)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))
gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
step_norm = aggregation.mean(algorithm.total_step_norm)
monitored_vars = [cost, gradient_norm, step_norm]
train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
before_first_epoch=True, prefix='tra')
extensions = [train_monitor, Timing(), Printing(after_batch=True),
FinishAfter(after_n_epochs=1000),
Printing(every_n_batches=1)]
train_stream = DStream(datatype='train', config=cfig)
main_loop = MainLoop(model=model,
data_stream=train_stream,
algorithm=algorithm,
extensions=extensions)
main_loop.run()
示例2: test_model
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
def test_model():
x = tensor.matrix('x')
mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
h1 = mlp1.apply(x)
h2 = mlp2.apply(h1)
model = Model(h2)
assert model.get_top_bricks() == [mlp1, mlp2]
# The order of parameters returned is deterministic but
# not sensible.
assert list(model.get_parameter_dict().items()) == [
('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]
# Test getting and setting parameter values
mlp3 = MLP([Tanh()], [10, 10])
mlp3.allocate()
model3 = Model(mlp3.apply(x))
parameter_values = {
'/mlp/linear_0.W': 2 * numpy.ones((10, 10),
dtype=theano.config.floatX),
'/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
model3.set_parameter_values(parameter_values)
assert numpy.all(
mlp3.linear_transformations[0].parameters[0].get_value() == 2)
assert numpy.all(
mlp3.linear_transformations[0].parameters[1].get_value() == 3)
got_parameter_values = model3.get_parameter_values()
assert len(got_parameter_values) == len(parameter_values)
for name, value in parameter_values.items():
assert_allclose(value, got_parameter_values[name])
# Test exception is raised if parameter shapes don't match
def helper():
parameter_values = {
'/mlp/linear_0.W': 2 * numpy.ones((11, 11),
dtype=theano.config.floatX),
'/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
model3.set_parameter_values(parameter_values)
assert_raises(ValueError, helper)
# Test name conflict handling
mlp4 = MLP([Tanh()], [10, 10])
def helper():
Model(mlp4.apply(mlp3.apply(x)))
assert_raises(ValueError, helper)
示例3: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
def main(mode, save_path, num_batches, data_path=None):
reverser = WordReverser(100, len(char2code), name="reverser")
if mode == "train":
# Data processing pipeline
dataset_options = dict(dictionary=char2code, level="character",
preprocess=_lower)
if data_path:
dataset = TextFile(data_path, **dataset_options)
else:
dataset = OneBillionWord("training", [99], **dataset_options)
data_stream = dataset.get_example_stream()
data_stream = Filter(data_stream, _filter_long)
data_stream = Mapping(data_stream, reverse_words,
add_sources=("targets",))
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(10))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
# Initialization settings
reverser.weights_init = IsotropicGaussian(0.1)
reverser.biases_init = Constant(0.0)
reverser.push_initialization_config()
reverser.encoder.weights_init = Orthogonal()
reverser.generator.transition.weights_init = Orthogonal()
# Build the cost computation graph
chars = tensor.lmatrix("features")
chars_mask = tensor.matrix("features_mask")
targets = tensor.lmatrix("targets")
targets_mask = tensor.matrix("targets_mask")
batch_cost = reverser.cost(
chars, chars_mask, targets, targets_mask).sum()
batch_size = chars.shape[1].copy(name="batch_size")
cost = aggregation.mean(batch_cost, batch_size)
cost.name = "sequence_log_likelihood"
logger.info("Cost graph is built")
# Give an idea of what's going on
model = Model(cost)
parameters = model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in parameters.items()],
width=120))
# Initialize parameters
for brick in model.get_top_bricks():
brick.initialize()
# Define the training algorithm.
cg = ComputationGraph(cost)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))
# Fetch variables useful for debugging
generator = reverser.generator
(energies,) = VariableFilter(
applications=[generator.readout.readout],
name_regex="output")(cg.variables)
(activations,) = VariableFilter(
applications=[generator.transition.apply],
name=generator.transition.apply.states[0])(cg.variables)
max_length = chars.shape[0].copy(name="max_length")
cost_per_character = aggregation.mean(
batch_cost, batch_size * max_length).copy(
name="character_log_likelihood")
min_energy = energies.min().copy(name="min_energy")
max_energy = energies.max().copy(name="max_energy")
mean_activation = abs(activations).mean().copy(
name="mean_activation")
observables = [
cost, min_energy, max_energy, mean_activation,
batch_size, max_length, cost_per_character,
algorithm.total_step_norm, algorithm.total_gradient_norm]
for name, parameter in parameters.items():
observables.append(parameter.norm(2).copy(name + "_norm"))
observables.append(algorithm.gradients[parameter].norm(2).copy(
name + "_grad_norm"))
# Construct the main loop and start training!
average_monitoring = TrainingDataMonitoring(
observables, prefix="average", every_n_batches=10)
main_loop = MainLoop(
model=model,
data_stream=data_stream,
algorithm=algorithm,
extensions=[
Timing(),
TrainingDataMonitoring(observables, after_batch=True),
average_monitoring,
FinishAfter(after_n_batches=num_batches)
# This shows a way to handle NaN emerging during
# training: simply finish it.
.add_condition(["after_batch"], _is_nan),
# Saving the model and the log separately is convenient,
# because loading the whole pickle takes quite some time.
Checkpoint(save_path, every_n_batches=500,
#.........这里部分代码省略.........
示例4: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
#.........这里部分代码省略.........
# Build the cost computation graph [steps, batch_size, 3]
x = T.tensor3('features', dtype=floatX)
if debug:
x.tag.test_value = np.ones((max_length,batch_size,3)).astype(floatX)
x = x[:max_length,:,:] # has to be after setting test_value
cost = generator.cost(x)
cost.name = "sequence_log_likelihood"
# Give an idea of what's going on
model = Model(cost)
params = model.get_params()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in params.items()],
width=120))
model_size = 0
for v in params.itervalues():
s = v.get_value().shape
model_size += s[0] * (s[1] if len(s) > 1 else 1)
logger.info("Total number of parameters %d"%model_size)
#------------------------------------------------------------
extensions = []
if old_model_name == 'continue':
extensions.append(LoadFromDump(jobname))
elif old_model_name:
# or you can just load the weights without state using:
old_params = LoadFromDump(old_model_name).manager.load_parameters()
model.set_param_values(old_params)
else:
# Initialize parameters
for brick in model.get_top_bricks():
brick.initialize()
if sample:
assert old_model_name and old_model_name != 'continue'
Sample(generator, steps=max_length, path=old_model_name).do(None)
exit(0)
#------------------------------------------------------------
# Define the training algorithm.
cg = ComputationGraph(cost)
if dropout > 0.:
from blocks.roles import INPUT, OUTPUT
dropout_target = VariableFilter(roles=[OUTPUT],
bricks=transitions,
name_regex='states')(cg.variables)
print('# dropout %d' % len(dropout_target))
cg = apply_dropout(cg, dropout_target, dropout)
opt_cost = cg.outputs[0]
else:
opt_cost = cost
if step_method == 'adam':
step_rule = Adam(learning_rate)
elif step_method == 'rmsprop':
step_rule = RMSProp(learning_rate, decay_rate=0.95)
elif step_method == 'adagrad':
step_rule = AdaGrad(learning_rate)
elif step_method == 'adadelta':
step_rule = AdaDelta()
elif step_method == 'scale':
step_rule = Scale(learning_rate)
else:
示例5: open
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
vocabs = pkl.load(f)
word_vocab, rel_vocab = vocabs['word'], vocabs['rel']
with open('dataset/trainXY.json') as f:
train = json.load(f)
train = wrap_stream(train)
with open('dataset/testXY.json') as f:
test = json.load(f)
test = wrap_stream(test)
model = LSTMModel(len(vocabs['word']), n_mem, len(vocabs['rel']))
cg = ComputationGraph(model.cost)
bricks_model = Model(model.cost)
for brick in bricks_model.get_top_bricks():
brick.initialize()
model.lookup.W.set_value(vocabs['word'].get_embeddings().astype(theano.config.floatX))
if dropout:
pass
# logger.info('Applying dropout of {}'.format(dropout))
# lstm_dropout = [v for v in cg.intermediary_variables if v.name in {'W_cell_to_in', 'W_cell_to_out'}]
# cg = apply_dropout(cg, lstm_dropout, drop_prob=dropout)
# summary of what's going on
parameters = bricks_model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape, value.get_value().mean()) for key, value
in parameters.items()],
示例6: Model
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
from blocks.bricks import Tanh, Softmax, Linear, MLP, Identity, Rectifier
from blocks.bricks.lookup import LookupTable
import data
from paramsaveload import SaveLoadParams
config = importlib.import_module('.deepmind_attentive_reader', 'config')
path = os.path.join(os.getenv("DATAPATH"), "deepmind-qa/cnn/questions/training")
vocab_path = os.path.join(os.getenv("DATAPATH"), "deepmind-qa/cnn/stats/training/vocab.txt")
ds, stream = data.setup_datastream(path, vocab_path, config)
model_path = "~/code/deepmind_qa/deepmind_attentive_reader_epoch2step33900.pkl"
m = config.Model(config, ds.vocab_size)
model = Model(m.sgd_cost)
SaveLoadParams(path=model_path, model=model).do_load()
bricks = model.get_top_bricks()
print "brick load completed..."
def get_prediction_function():
question = tensor.imatrix('question')
question_mask = tensor.imatrix('question_mask')
context = tensor.imatrix('context')
context_mask = tensor.imatrix('context_mask')
answer = tensor.ivector('answer')
candidates = tensor.imatrix('candidates')
candidates_mask = tensor.imatrix('candidates_mask')
"""
question = question.dimshuffle(1, 0)
question_mask = question_mask.dimshuffle(1, 0)
context = context.dimshuffle(1, 0)
示例7: main
# 需要导入模块: from blocks.model import Model [as 别名]
# 或者: from blocks.model.Model import get_top_bricks [as 别名]
def main(mode, save_path, num_batches, data_path=None):
reverser = WordReverser(100, len(char2code), name="reverser")
if mode == "train":
# Data processing pipeline
dataset_options = dict(dictionary=char2code, level="character",
preprocess=_lower)
if data_path:
dataset = TextFile(data_path, **dataset_options)
else:
dataset = OneBillionWord("training", [99], **dataset_options)
data_stream = dataset.get_example_stream()
data_stream = Filter(data_stream, _filter_long)
data_stream = Mapping(data_stream, reverse_words,
add_sources=("targets",))
data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(10))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
# Initialization settings
reverser.weights_init = IsotropicGaussian(0.1)
reverser.biases_init = Constant(0.0)
reverser.push_initialization_config()
reverser.encoder.weights_init = Orthogonal()
reverser.generator.transition.weights_init = Orthogonal()
# Build the cost computation graph
chars = tensor.lmatrix("features")
chars_mask = tensor.matrix("features_mask")
targets = tensor.lmatrix("targets")
targets_mask = tensor.matrix("targets_mask")
batch_cost = reverser.cost(
chars, chars_mask, targets, targets_mask).sum()
batch_size = chars.shape[1].copy(name="batch_size")
cost = aggregation.mean(batch_cost, batch_size)
cost.name = "sequence_log_likelihood"
logger.info("Cost graph is built")
# Give an idea of what's going on
model = Model(cost)
parameters = model.get_parameter_dict()
logger.info("Parameters:\n" +
pprint.pformat(
[(key, value.get_value().shape) for key, value
in parameters.items()],
width=120))
# Initialize parameters
for brick in model.get_top_bricks():
brick.initialize()
# Define the training algorithm.
cg = ComputationGraph(cost)
algorithm = GradientDescent(
cost=cost, parameters=cg.parameters,
step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))
# Fetch variables useful for debugging
generator = reverser.generator
(energies,) = VariableFilter(
applications=[generator.readout.readout],
name_regex="output")(cg.variables)
(activations,) = VariableFilter(
applications=[generator.transition.apply],
name=generator.transition.apply.states[0])(cg.variables)
max_length = chars.shape[0].copy(name="max_length")
cost_per_character =
aggregation.mean(batch_cost, batch_size * max_length).copy(
name="character_log_likelihood")
min_energy = energies.min().copy(name="min_energy")
max_energy = energies.max().copy(name="max_energy")
mean_activation = abs(activations).mean() .copy(name="mean_activation")
observables = [
cost, min_energy, max_energy, mean_activation,
batch_size, max_length, cost_per_character,
algorithm.total_step_norm, algorithm.total_gradient_norm]
for name, parameter in parameters.items():
observables.append( parameter.norm(2)
.copy(name=name + "_norm"))
observables.append( algorithm.gradients[parameter].norm(2)
.copy(name=name + "_grad_norm"))
# Construct the main loop and start training!
average_monitoring = TrainingDataMonitoring(
observables, prefix="average", every_n_batches=10)
main_loop = MainLoop(
model=model,
data_stream=data_stream,
algorithm=algorithm,
extensions=[
Timing(),
TrainingDataMonitoring(observables, after_batch=True),
average_monitoring,
FinishAfter(after_n_batches=num_batches)
# This shows a way to handle NaN emerging during
# training: simply finish it.
.add_condition(["after_batch"], _is_nan),
# Saving the model and the log separately is convenient,
# because loading the whole pickle takes quite some time.
Checkpoint(save_path, every_n_batches=500,
#.........这里部分代码省略.........