当前位置: 首页>>代码示例>>Python>>正文


Python cost.CategoricalCrossEntropy类代码示例

本文整理汇总了Python中blocks.bricks.cost.CategoricalCrossEntropy的典型用法代码示例。如果您正苦于以下问题:Python CategoricalCrossEntropy类的具体用法?Python CategoricalCrossEntropy怎么用?Python CategoricalCrossEntropy使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CategoricalCrossEntropy类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setup_model

def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
开发者ID:negar-rostamzadeh,项目名称:rna,代码行数:60,代码来源:cooking.py

示例2: main

def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
开发者ID:kyunghyuncho,项目名称:blocks,代码行数:55,代码来源:mnist.py

示例3: apply

 def apply(self, input_, target):
     mlp = MLP(self.non_lins, self.dims,
               weights_init=IsotropicGaussian(0.01),
               biases_init=Constant(0),
               name=self.name)
     mlp.initialize()
     probs = mlp.apply(T.flatten(input_, outdim=2))
     probs.name = 'probs'
     cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
     cost.name = "CE"
     self.outputs = {}
     self.outputs['probs'] = probs
     self.outputs['cost'] = cost
开发者ID:wavelets,项目名称:LSTM-Attention,代码行数:13,代码来源:model.py

示例4: main

def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1,3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Adam(learning_rate=0.001))
    extensions = [Timing(),
                  LogExtension('/home/belohlavek/ALI/mlp.log'),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
开发者ID:oplatek,项目名称:ALI,代码行数:43,代码来源:cifar_mlp.py

示例5: main

def main(job_id, params, config_file='params.ec'):
    config = ConfigParser.ConfigParser()
    config.readfp(open('./configs/{}'.format(config_file)))

    pr = pprint.PrettyPrinter(indent=4)
    pr.pprint(config)

    net_name  =  config.get('hyperparams', 'net_name', 'adni')
    struct_name = net_name.split('_')[0]

    max_epoch = int(config.get('hyperparams', 'max_iter', 100))
    base_lr = float(config.get('hyperparams', 'base_lr', 0.01))
    train_batch = int(config.get('hyperparams', 'train_batch', 256))
    valid_batch = int(config.get('hyperparams', 'valid_batch', 512))
    test_batch = int(config.get('hyperparams', 'valid_batch', 512))

    W_sd = float(config.get('hyperparams', 'W_sd', 0.01))
    W_mu = float(config.get('hyperparams', 'W_mu', 0.0))
    b_sd = float(config.get('hyperparams', 'b_sd', 0.01))
    b_mu = float(config.get('hyperparams', 'b_mu', 0.0))

    hidden_units = int(config.get('hyperparams', 'hidden_units', 32))
    input_dropout_ratio = float(config.get('hyperparams', 'input_dropout_ratio', 0.2))
    dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2))
    weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001))
    max_norm = float(config.get('hyperparams', 'max_norm', 100.0))
    solver = config.get('hyperparams', 'solver_type', 'rmsprop')
    data_file = config.get('hyperparams', 'data_file')
    side = config.get('hyperparams', 'side', 'b')

    input_dim = input_dims[struct_name]

    # Spearmint optimization parameters:
    if params:
        base_lr = float(params['base_lr'][0])
        dropout_ratio = float(params['dropout_ratio'][0])
        hidden_units = params['hidden_units'][0]
        weight_decay = params['weight_decay'][0]

    if 'adagrad' in solver:
        solver_type = CompositeRule([AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm)])
    else:
        solver_type = CompositeRule([RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm)])


    data_file = config.get('hyperparams', 'data_file')

    if 'b' in side:
        train = H5PYDataset(data_file, which_set='train')
        valid = H5PYDataset(data_file, which_set='valid')
        test = H5PYDataset(data_file, which_set='test')
        x_l = tensor.matrix('l_features')
        x_r = tensor.matrix('r_features')
        x = tensor.concatenate([x_l, x_r], axis=1)

    else:
        train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets'])
        valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets'])
        test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets'])
        x = tensor.matrix('{}_features'.format(side))

    y = tensor.lmatrix('targets')


    # Define a feed-forward net with an input, two hidden layers, and a softmax output:
    model = MLP(activations=[
        Rectifier(name='h1'),
        Rectifier(name='h2'),
        Softmax(name='output'),
    ],
                dims=[
                    input_dim[side],
                    hidden_units,
                    hidden_units,
                    2],
                weights_init=IsotropicGaussian(std=W_sd, mean=W_mu),
                biases_init=IsotropicGaussian(b_sd, b_mu))

    # Don't forget to initialize params:
    model.initialize()

    # y_hat is the output of the neural net with x as its inputs
    y_hat = model.apply(x)

    # Define a cost function to optimize, and a classification error rate.
    # Also apply the outputs from the net and corresponding targets:
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'error'

    # This is the model: before applying dropout
    model = Model(cost)

    # Need to define the computation graph for the cost func:
    cost_graph = ComputationGraph([cost])

    # This returns a list of weight vectors for each layer
    W = VariableFilter(roles=[WEIGHT])(cost_graph.variables)

    # Add some regularization to this model:
#.........这里部分代码省略.........
开发者ID:TIGRLab,项目名称:NI-ML,代码行数:101,代码来源:ffnet.py

示例6: Linear

                  update_prob=update_prob, name="rnn")
    h1, sd = rnn.apply(pre_rnn[:, :, :h_dim],
                   pre_rnn[:, :, h_dim:],
                   drops, is_for_test)
h1_to_o = Linear(name='h1_to_o',
                 input_dim=h_dim,
                 output_dim=y_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, y_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y, softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y, softmax_out[-1])
error_rate.name = 'error_rate'

# Initialization
for brick in (x_to_h1, h1_to_o, rnn):
    brick.weights_init = Glorot()
    brick.biases_init = Constant(0)
    brick.initialize()

train_stream = get_stream('train', batch_size, h_dim, False)
data = train_stream.get_epoch_iterator(as_dict=True).next()
cg = ComputationGraph(cost)
f = theano.function(cg.inputs, cost)
print f(data['y'], data['x'], data['is_for_test'], data['drops'])
开发者ID:mohammadpz,项目名称:LSTM_Dropout,代码行数:31,代码来源:main.py

示例7: BinaryCrossEntropy

            sampler = q_sampler,
            classifier=classifier_mlp)
draw.initialize()

#------------------------------------------------------------------------
x = tensor.matrix(u'features')
y = tensor.lmatrix(u'targets')
#y = theano.tensor.extra_ops.to_one_hot(tensor.lmatrix(u'targets'),2)

#probs, h_enc, c_enc, i_dec, h_dec, c_dec, center_y, center_x, delta = draw.reconstruct(x)
probs, h_enc, c_enc, center_y, center_x, delta = draw.reconstruct(x)

trim_probs = probs[-1,:,:] #Only take information from the last iteration
labels = y.flatten()
#cost = BinaryCrossEntropy().apply(labels, trim_probs)
cost = CategoricalCrossEntropy().apply(y, trim_probs)
error_rate = MisclassificationRate().apply(labels, trim_probs)
cost.name = "CCE"

#------------------------------------------------------------
cg = ComputationGraph([cost])
params = VariableFilter(roles=[PARAMETER])(cg.variables)

algorithm = GradientDescent(
    cost=cost, 
    parameters=params,
    step_rule=CompositeRule([
        StepClipping(10.), 
        Adam(learning_rate),
    ]),
on_unused_sources='ignore',    
开发者ID:drewlinsley,项目名称:draw_classify,代码行数:31,代码来源:fast-train-draw-sketch-classifier.py

示例8: create_model


#.........这里部分代码省略.........

        # bidirectional encoder that translates context
        context_encoder = self.create_bidi_encoder("context_encoder", embedding_dims, hidden_states)

        # bidirectional encoder for question
        question_encoder = self.create_bidi_encoder("question_encoder", embedding_dims, hidden_states)

        # Initialize the components (where not done upon creation)
        lookup.initialize()



        ###################
        # Wiring the components together
        #
        # Where present, the 3 letters at the end of the variable name identify its dimensions:
        # b ... position of the example within the batch
        # t ... position of the word within the document/question
        # f ... features of the embedding vector
        ###################

        ### Read the context document
        # Map token indices to word embeddings
        context_embedding_tbf = lookup.apply(context_bt.T)

        # Read the embedded context document using the bidirectional GRU and produce the contextual embedding of each word
        memory_encoded_btf = context_encoder.apply(context_embedding_tbf, context_mask_bt.T).dimshuffle(1,0,2)
        memory_encoded_btf.name = "memory_encoded_btf"

        ### Correspondingly, read the query
        x_embedded_tbf = lookup.apply(question_bt.T)
        x_encoded_btf = question_encoder.apply(x_embedded_tbf, question_mask_bt.T).dimshuffle(1,0,2)
        # The query encoding is a concatenation of the final states of the forward and backward GRU encoder
        x_forward_encoded_bf = x_encoded_btf[:,-1,0:hidden_states]
        x_backward_encoded_bf = x_encoded_btf[:,0,hidden_states:hidden_states*2]
        query_representation_bf = tt.concatenate([x_forward_encoded_bf,x_backward_encoded_bf],axis=1)

        # Compute the attention on each word in the context as a dot product of its contextual embedding and the query
        mem_attention_presoft_bt = tt.batched_dot(query_representation_bf, memory_encoded_btf.dimshuffle(0,2,1))

        # TODO is this pre-masking necessary?
        mem_attention_presoft_masked_bt = tt.mul(mem_attention_presoft_bt,context_mask_bt)

        # Normalize the attention using softmax
        mem_attention_bt = SoftmaxWithMask(name="memory_query_softmax").apply(mem_attention_presoft_masked_bt,context_mask_bt)

        if self.args.weighted_att:
            # compute weighted attention over original word vectors
            att_weighted_responses_bf = theano.tensor.batched_dot(mem_attention_bt, context_embedding_tbf.dimshuffle(1,0,2))


            # compare desired response to all candidate responses
            # select relevant candidate answer words
            candidates_embeddings_bfi = lookup.apply(candidates_bi).dimshuffle(0,2,1)

            # convert it to output symbol probabilities
            y_hat_presoft = tt.batched_dot(att_weighted_responses_bf, candidates_embeddings_bfi)
            y_hat = SoftmaxWithMask(name="output_softmax").apply(y_hat_presoft,candidates_bi_mask)

        else:
            # Sum the attention of each candidate word across the whole context document,
            # this is the key innovation of the model

            # TODO: Get rid of sentence-by-sentence processing?
            # TODO: Rewrite into matrix notation instead of scans?
            def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs):
                word_ixs_in_sentence = tt.eq(sentence_ixs,word_ix).nonzero()[0]
                return sentence_attention_probs[word_ixs_in_sentence].sum()

            def sum_probs_single_sentence(candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t):
                result, updates = theano.scan(
                    fn=sum_prob_of_word,
                    sequences=[candidate_indices_i],
                    non_sequences=[sentence_ixs_t, sentence_attention_probs_t])
                return result

            def sum_probs_batch(candidate_indices_bt,sentence_ixs_bt, sentence_attention_probs_bt):
                result, updates = theano.scan(
                    fn=sum_probs_single_sentence,
                    sequences=[candidate_indices_bt, sentence_ixs_bt, sentence_attention_probs_bt],
                    non_sequences=None)
                return result

            # Sum the attention of each candidate word across the whole context document
            y_hat = sum_probs_batch(candidates_bi, context_bt, mem_attention_bt)
        y_hat.name = "y_hat"

        # We use the convention that ground truth is always at index 0, so the following are the target answers
        y = y.zeros_like()

        # We use Cross Entropy as the training objective
        cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
        cost.name = "cost"


        predicted_response_index = tt.argmax(y_hat,axis=1)
        accuracy = tt.eq(y,predicted_response_index).mean()
        accuracy.name = "accuracy"

        return cost, accuracy, mem_attention_bt, y_hat, context_bt, candidates_bi, candidates_bi_mask, y, context_mask_bt, question_bt, question_mask_bt
开发者ID:BinbinBian,项目名称:asreader,代码行数:101,代码来源:as_reader.py

示例9: LookupTable

Xs = tensor.imatrix("context")
y = tensor.ivector('center')

w1 = LookupTable(name="w1", length=VOCAB_DIM, dim=EMBEDDING_DIM)
w2 = Linear(name='w2', input_dim=EMBEDDING_DIM, output_dim=VOCAB_DIM)

hidden = tensor.mean(w1.apply(Xs), axis=1)
y_hat = Softmax().apply(w2.apply(hidden))

w1.weights_init = w2.weights_init = IsotropicGaussian(0.01)
w1.biases_init = w2.biases_init = Constant(0)
w1.initialize()
w2.initialize()

cost = CategoricalCrossEntropy().apply(y, y_hat)

cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
cost.name = "loss"


#
# the actual training of the model
#
main = MainLoop(data_stream = DataStream.default_stream(
                    dataset,
                    iteration_scheme=SequentialScheme(dataset.num_instances, batch_size=512)),
                algorithm = GradientDescent(
开发者ID:Rene90,项目名称:dl4nlp,代码行数:30,代码来源:word2vec.py

示例10: Linear

# In[5]:

from blocks.bricks import Linear, Logistic, Softmax


# In[10]:

hidden_layer_size = 100
input_to_hidden = Linear(name='input_to_hidden', input_dim=117, output_dim=hidden_layer_size)
h = Logistic().apply(input_to_hidden.apply(x))
hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_layer_size, output_dim=2)
y_hat = Softmax().apply(hidden_to_output.apply(h))

y = tensor.lmatrix('targets')
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y, y_hat)
error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat)
error_rate.name = "error_rate"

# >>> from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
# >>> from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
# >>> W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
# >>> cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
# >>> cost.name = 'cost_with_regularization'
cost.name = 'cost_simple_xentropy'

from blocks.initialization import IsotropicGaussian, Constant
input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
开发者ID:Copper-Head,项目名称:cogsys-deep-learning,代码行数:31,代码来源:mushrooms.py

示例11: Softmax

probs = Softmax().apply(n5)

statistics_list=[(M1,S1,a1), (M2,S2,a2), (M3,S3,a3), (M4,S4,a4), (M5,S5,a5)]

# initialize_variables
# for variable (M,S) in variables:
# 	compute M and S in the whole data.

if normalization == 'bn2':
    for m,s,var in statistics_list:
        var.tag.aggregation_scheme = MeanAndVariance(var, var.shape[0], axis = 0)
        init_mn, init_var = DatasetEvaluator([var]).evaluate(stream_train)[var.name]
        m.set_value(init_mn.astype(floatX))
        s.set_value(sqrt(init_var).astype(floatX))

cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
cost.name = 'cost'
error_rate = MisclassificationRate().apply(y.flatten(), probs)
error_rate.name = 'error_rate'

cg = ComputationGraph([cost])
    
parameters = cg.parameters
# add gradient descent to M,S
if normalization == 'bn2':
    for m,s,var in statistics_list:
        parameters.extend([m,s])

algorithm = GradientDescent(
    cost=cost, parameters=parameters, step_rule=Adam(0.01))
开发者ID:anirudh9119,项目名称:SpeechSyn,代码行数:30,代码来源:bn2.py

示例12: main

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("every_n_batches", type=int, default=[1], nargs=1)
    args = parser.parse_args()
    print("We were asked to sync with legion at every_n_batches = %s" % str(args.every_n_batches[0]))


    # The rest is a copy paste from the blocks tutorial, except for the inclusion of the sync extension
    # at the creation of the MainLoop blocks object.
    x = tensor.matrix('features')

    input_to_hidden = Linear(name='input_to_hidden', input_dim=784, output_dim=100)
    h = Rectifier().apply(input_to_hidden.apply(x))
    hidden_to_output = Linear(name='hidden_to_output', input_dim=100, output_dim=10)
    y_hat = Softmax().apply(hidden_to_output.apply(h))

    y = tensor.lmatrix('targets')

    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)

    cg = ComputationGraph(cost)

    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    
    cost = cost + 0.005 * (W1 ** 2).sum() + 0.005 * (W2 ** 2).sum()
    cost.name = 'cost_with_regularization'

    input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian(0.01)
    input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
    input_to_hidden.initialize()
    hidden_to_output.initialize()

    mnist = MNIST(("train",))
    data_stream = Flatten(
        DataStream.default_stream(
            mnist,
            iteration_scheme=SequentialScheme(mnist.num_examples, batch_size=256)))

    algorithm = GradientDescent(
        cost=cost,
        params=cg.parameters,
        step_rule=Scale(learning_rate=0.1)
    )

    mnist_test = MNIST(("test",))
    
    data_stream_test = Flatten(DataStream.default_stream(
        mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples,
                                          batch_size=1024)))

    monitor = DataStreamMonitoring(variables=[cost],
                                   data_stream=data_stream_test,
                                   prefix="test")

    # Except for this line
    b1, b2 = VariableFilter(roles=[BIAS])(cg.variables)
    
    main_loop = MainLoop(data_stream=data_stream,
                         algorithm=algorithm,
                         extensions=[monitor,
                                     FinishAfter(after_n_epochs=500),
                                     Printing(),
                                     # And the inclusion of the legion sync module, SharedParamsRateLimited:
                                     SharedParamsRateLimited(
                                         params={"W1": W1,
                                                 "W2": W2,
                                                 "b1": b1,
                                                 "b2": b2
                                                 },
                                         alpha=.5,
                                         beta=.5,
                                         every_n_batches=args.every_n_batches[0],
                                         maximum_rate=0.1)])
    main_loop.run()
开发者ID:JulesGM,项目名称:legion,代码行数:75,代码来源:tuto0.py

示例13: main

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Tanh(), Softmax()], [784, 100, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost, error_rate])
    cost.name = 'final_cost'
    test_cost = cost

    for_dropout = VariableFilter(roles=[INPUT], 
        bricks=mlp.linear_transformations[1:])(cg.variables)
    dropout_graph = apply_dropout(cg, for_dropout, 0.5)
    dropout_graph = apply_dropout(dropout_graph, [x], 0.1)
    dropout_cost, dropout_error_rate = dropout_graph.outputs

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=dropout_cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [dropout_cost, dropout_error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(dropout_cost),
        extensions=extensions)

    main_loop.run()
开发者ID:nke001,项目名称:blocks_tutorial,代码行数:66,代码来源:mnist_ex2_solution.py

示例14: _create_main_loop

    def _create_main_loop(self):
        # hyper parameters
        hp = self.params
        batch_size = hp['batch_size']
        biases_init = Constant(0)
        batch_normalize = hp['batch_normalize']

        ### Build fprop
        tensor5 = T.TensorType(config.floatX, (False,)*5)
        X = tensor5("images")
        #X = T.tensor4("images")
        y = T.lvector('targets')

        gnet_params = OrderedDict()
        #X_shuffled = X[:, :, :, :, [2, 1, 0]]
        #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255

        X = X[:, :, :, :, [2, 1, 0]]
        X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
        X_r = X_shuffled.reshape((X_shuffled.shape[0],
                                  X_shuffled.shape[1]*X_shuffled.shape[2],
                                  X_shuffled.shape[3], X_shuffled.shape[4]))
        X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32')


        expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r),
                                                          mode='rgb')
        res = expressions['outloss']
        y_hat = res.flatten(ndim=2)

        import pdb; pdb.set_trace()

        ### Build Cost
        cost = CategoricalCrossEntropy().apply(y, y_hat)
        cost = T.cast(cost, theano.config.floatX)
        cost.name = 'cross_entropy'

        y_pred = T.argmax(y_hat, axis=1)
        misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
        misclass.name = 'misclass'

        monitored_channels = []
        monitored_quantities = [cost, misclass, y_hat, y_pred]
        model = Model(cost)

        training_cg = ComputationGraph(monitored_quantities)
        inference_cg = ComputationGraph(monitored_quantities)

        ### Get evaluation function
        #training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
        training_eval = training_cg.get_theano_function()
        #inference_eval = inference_cg.get_theano_function()


        # Dataset
        test = JpegHDF5Dataset('test',
                               #name='jpeg_data_flows.hdf5',
                               load_in_memory=True)
        #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
        import pdb; pdb.set_trace()

        ### Eval
        labels = np.zeros(test.num_video_examples)
        y_hat = np.zeros((test.num_video_examples, 101))
        labels_flip = np.zeros(test.num_video_examples)
        y_hat_flip = np.zeros((test.num_video_examples, 101))

        ### Important to shuffle list for batch normalization statistic
        #rng = np.random.RandomState()
        #examples_list = range(test.num_video_examples)
        #import pdb; pdb.set_trace()
        #rng.shuffle(examples_list)

        nb_frames=1

        for i in xrange(24):
            scheme = HDF5SeqScheme(test.video_indexes,
                                   examples=test.num_video_examples,
                                   batch_size=batch_size,
                                   f_subsample=i,
                                   nb_subsample=25,
                                   frames_per_video=nb_frames)
           #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
            for crop in ['center']:
                stream = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='noflip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))
                stream_flip = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='flip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))
#.........这里部分代码省略.........
开发者ID:BinbinBian,项目名称:LSTM-Attention,代码行数:101,代码来源:conv_full.py

示例15: create_network

def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs,'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # GatedPixelCNN
    gated = GatedPixelCNN(
        name='gated_layer_0',
        filter_size=7,
        image_size=(img_dim,img_dim),
        num_filters=h*n_channel,
        num_channels=n_channel,
        batch_size=batch,
        weights_init=IsotropicGaussian(std=0.02, mean=0),
        biases_init=Constant(0.02),
        res=False
    )
    gated.initialize()
    x_v, x_h = gated.apply(x, x)

    for i in range(n_layer):
        gated = GatedPixelCNN(
            name='gated_layer_{}'.format(i+1),
            filter_size=3,
            image_size=(img_dim,img_dim),
            num_channels=h*n_channel,
            batch_size=batch,
            weights_init=IsotropicGaussian(std=0.02, mean=0),
            biases_init=Constant(0.02),
            res=True
        )
        gated.initialize()
        x_v, x_h = gated.apply(x_v, x_h)

    conv_list = []
    conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')])
    #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
    conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')])

    sequence = ConvolutionalSequence(
        conv_list,
        num_channels=h*n_channel,
        batch_size=batch,
        image_size=(img_dim,img_dim),
        border_mode='half',
        weights_init=IsotropicGaussian(std=0.02, mean=0),
        biases_init=Constant(0.02),
        tied_biases=False
    )
    sequence.initialize()
    x = sequence.apply(x_h)
    if MODE == '256ary':
        x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
开发者ID:aalitaiga,项目名称:Generative-models,代码行数:69,代码来源:gatedpixelblocks.py


注:本文中的blocks.bricks.cost.CategoricalCrossEntropy类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。