当前位置: 首页>>代码示例>>Python>>正文


Python OrderedDict.keys方法代码示例

本文整理汇总了Python中theano.compat.python2x.OrderedDict.keys方法的典型用法代码示例。如果您正苦于以下问题:Python OrderedDict.keys方法的具体用法?Python OrderedDict.keys怎么用?Python OrderedDict.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在theano.compat.python2x.OrderedDict的用法示例。


在下文中一共展示了OrderedDict.keys方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]
def main():
    var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W')
    updates = [(var, add_uniform(input=var, noise_level=.02))]

    stats = get_stats(var)
    l1 = stats.pop('l1')
    l2 = stats.pop('l2')
    min = stats.pop('min')
    max = stats.pop('max')
    var = stats.pop('var')
    std = stats.pop('std')
    mean = stats.pop('mean')

    mean_monitor = Monitor('mean', mean, train=True, valid=True, out_service=FileService('outs/mean.txt'))
    var_monitor = Monitor('var', var, out_service=FileService('outs/var.txt'))

    w_channel = MonitorsChannel('W', monitors=mean_monitor)

    stat_channel = MonitorsChannel('stats', monitors=[var_monitor])

    monitors = [w_channel, stat_channel]

    train_collapsed_raw = collapse_channels(monitors, train=True)
    train_collapsed = OrderedDict([(item[0], item[1]) for item in train_collapsed_raw])
    train_services = OrderedDict([(item[0], item[2]) for item in train_collapsed_raw])
    valid_collapsed_raw = collapse_channels(monitors, valid=True)
    valid_collapsed = OrderedDict([(item[0], item[1]) for item in valid_collapsed_raw])
    valid_services = OrderedDict([(item[0], item[2]) for item in valid_collapsed_raw])

    log.debug('compiling...')
    f = theano.function(inputs=[], outputs=train_collapsed.values(), updates=updates)
    f2 = theano.function(inputs=[], outputs=valid_collapsed.values(), updates=updates)
    log.debug('done')

    t1=time.time()

    for epoch in range(10):
        t=time.time()
        log.debug(epoch)
        vals = f()
        m = OrderedDict(zip(train_collapsed.keys(), vals))
        for name, service in train_services.items():
            if name in m:
                service.write(m[name], TRAIN)
        log.debug('----- '+make_time_units_string(time.time()-t))

    for epoch in range(10):
        t = time.time()
        log.debug(epoch)
        vals = f2()
        m = OrderedDict(zip(valid_collapsed.keys(), vals))
        for name, service in valid_services.items():
            if name in m:
                service.write(m[name], VALID)
        log.debug('----- ' + make_time_units_string(time.time() - t))

    log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
开发者ID:52nlp,项目名称:OpenDeep,代码行数:59,代码来源:test_fileservice.py

示例2: main

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]
def main():
    w = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W')
    updates = [(w, add_uniform(input=w, noise_level=.02))]

    stats = get_stats(w)
    l1 = stats.pop('l1')
    l2 = stats.pop('l2')
    min = stats.pop('min')
    max = stats.pop('max')
    var = stats.pop('var')
    std = stats.pop('std')
    mean = stats.pop('mean')

    mean_monitor = Monitor('mean', mean, train=True, valid=True)
    stat_monitor = Monitor('max', max)

    w_channel = MonitorsChannel('W', monitors=mean_monitor)

    stat_channel = MonitorsChannel('stats', monitors=[stat_monitor])

    monitors = [w_channel, stat_channel]

    train_collapsed = collapse_channels(monitors, train=True)
    train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed])
    valid_collapsed = collapse_channels(monitors, valid=True)
    valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed])

    plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True)

    log.debug('compiling...')
    f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates)
    f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates)
    log.debug('done')

    t1=time.time()

    for epoch in range(100):
        t=time.time()
        log.debug(epoch)
        vals = f()
        m = OrderedDict(zip(train_collapsed.keys(), vals))
        plot.update_plots(epoch, m)
        time.sleep(0.02)
        log.debug('----- '+make_time_units_string(time.time()-t))

    for epoch in range(100):
        t = time.time()
        log.debug(epoch)
        vals = f2()
        m = OrderedDict(zip(valid_collapsed.keys(), vals))
        plot.update_plots(epoch, m)
        time.sleep(0.02)
        log.debug('----- ' + make_time_units_string(time.time() - t))

    log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
开发者ID:gburachas,项目名称:OpenDeep,代码行数:57,代码来源:monitor_bokeh_server.py

示例3: get_updates

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]
    def get_updates(self, grads):
        grads = OrderedDict(grads)
        updates = OrderedDict()

        for param in grads.keys():
            # mean_squared_grad := E[g^2]_{t-1}
            mean_square_grad = theano.shared(theano._asarray(param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_grad_' + param.name, borrow=False)
            self.parameters.append(mean_square_grad)
            # mean_square_dx := E[(\Delta x)^2]_{t-1}
            mean_square_dx = theano.shared(theano._asarray(param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_dx_' + param.name, borrow=False)
            self.parameters.append(mean_square_dx)

            # Accumulate gradient
            new_mean_squared_grad = self.decay * mean_square_grad + (1 - self.decay) * T.sqr(grads[param])

            # Compute update
            rms_dx_tm1 = T.sqrt(mean_square_dx + self.epsilon)
            rms_grad_t = T.sqrt(new_mean_squared_grad + self.epsilon)
            delta_x_t = - rms_dx_tm1 / rms_grad_t * grads[param]

            # Accumulate updates
            new_mean_square_dx = self.decay * mean_square_dx + (1 - self.decay) * T.sqr(delta_x_t)

            # Apply update
            updates[mean_square_grad] = new_mean_squared_grad
            updates[mean_square_dx] = new_mean_square_dx
            updates[param] = param + delta_x_t

        return updates
开发者ID:MarcCote,项目名称:TheanoNADE,代码行数:31,代码来源:momentums.py

示例4: __init__

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]
    def __init__(self, valid=None, invalid=None, valid_equivalent=None):
        '''
        Check if variables can be expressed without using variables in invalid.

        init_valid_equivalent provides a dictionary mapping some invalid
        variables to valid ones that can be used instead.
        '''

        if valid is None:
            valid = []
        if invalid is None:
            invalid = []
        if valid_equivalent is None:
            valid_equivalent = OrderedDict()

        # Nodes that are valid to have in the graph computing outputs
        self.valid = set(valid)

        # Nodes that are NOT valid to have in the graph computing outputs
        self.invalid = set(invalid)

        # Mapping from invalid variables to equivalent valid ones.
        self.valid_equivalent = valid_equivalent.copy()
        self.valid.update(valid_equivalent.values())
        self.invalid.update(valid_equivalent.keys())
开发者ID:Jackwangyang,项目名称:Theano,代码行数:27,代码来源:scan_utils.py

示例5: get_funcs

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]
    def get_funcs(self, learning_rate, grads, inp, cost, errors, lr_scalers=None):
        """
        Provides the updates for learning with gradient descent + momentum.

        Parameters
        ----------
        learning_rate : float
            Learning rate coefficient.
        grads : dict
            A dictionary mapping from the model's parameters to their
            gradients.
        lr_scalers : dict
            A dictionary mapping from the model's parameters to a learning
            rate multiplier.
        """
        gshared = OrderedDict({p: sharedX(p.get_value() * 0.,
                             name='%s_grad' % p.name)
                             for p, g in grads.iteritems()})

        gsup = [(gs, g) for gs, g in zip(gshared.values(), grads.values())]
        get_norms = lambda x: T.sqrt(sum(map(lambda y: (y**2).sum(), x)))
        gnorm = get_norms(grads.values())
        pnorm = get_norms(grads.keys())
        f_grad_shared = theano.function(inp,
                                        [cost, errors, gnorm, pnorm],
                                        updates=gsup)
        updates = OrderedDict()

        for param, grad in gshared.keys():
            vel = sharedX(param.get_value() * 0.)
            assert param.dtype == vel.dtype
            assert grad.dtype == param.dtype
            if param.name is not None:
                vel.name = 'vel_' + param.name

            scaled_lr = learning_rate * lr_scalers.get(param, 1.)
            updates[vel] = self.momentum * vel - scaled_lr * grad

            inc = updates[vel]
            if self.nesterov_momentum:
                inc = self.momentum * inc - scaled_lr * grad

            assert inc.dtype == vel.dtype
            updates[param] = param + inc

        f_update = theano.function([learning_rate],
                                   [],
                                   updates=updates,
                                   on_unused_input='ignore')

        return f_grad_shared, f_update
开发者ID:BKJackson,项目名称:Attentive_reader,代码行数:53,代码来源:learning_rule.py

示例6: get_gradients

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
            L(v, q) = log P(v) + sum_h q(h) log P(h, v) - sum_h q(h) log P(v) + const
            L(v, q) = sum_h q(h) log P(h, v) + const
            L(v, q) = sum_h q(h) -E(h, v) - log Z + const

            so the cost we want to minimize is
            expected_energy + log Z + const


            Note: for the RBM, this bound is exact, since the KL divergence goes to 0.
        """

        variational_params = flatten(q)

        # The gradients of the expected energy under q are easy, we can just do that in theano
        expected_energy_q = model.expected_energy(X, q).mean()
        params = list(model.get_params())
        gradients = OrderedDict(safe_zip(params, T.grad(expected_energy_q, params,
            consider_constant = variational_params,
            disconnected_inputs = 'ignore')))

        """
        d/d theta log Z = (d/d theta Z) / Z
                        = (d/d theta sum_h sum_v exp(-E(v,h)) ) / Z
                        = (sum_h sum_v - exp(-E(v,h)) d/d theta E(v,h) ) / Z
                        = - sum_h sum_v P(v,h)  d/d theta E(v,h)
        """

        layer_to_chains = model.make_layer_to_state(self.num_chains)

        def recurse_check(l):
            if isinstance(l, (list, tuple)):
                for elem in l:
                    recurse_check(elem)
            else:
                assert l.get_value().shape[0] == self.num_chains

        recurse_check(layer_to_chains.values())

        model.layer_to_chains = layer_to_chains

        # Note that we replace layer_to_chains with a dict mapping to the new
        # state of the chains
        updates, layer_to_chains = model.get_sampling_updates(layer_to_chains,
                self.theano_rng, num_steps=self.num_gibbs_steps,
                return_layer_to_updated = True)


        if self.toronto_neg:
            # Ruslan Salakhutdinov's undocumented negative phase from
            # http://www.mit.edu/~rsalakhu/code_DBM/dbm_mf.m
            # IG copied it here without fully understanding it, so it
            # only applies to exactly the same model structure as
            # in that code.

            assert isinstance(model.visible_layer, dbm.BinaryVector)
            assert isinstance(model.hidden_layers[0], dbm.BinaryVectorMaxPool)
            assert model.hidden_layers[0].pool_size == 1
            assert isinstance(model.hidden_layers[1], dbm.BinaryVectorMaxPool)
            assert model.hidden_layers[1].pool_size == 1
            assert isinstance(model.hidden_layers[2], dbm.Softmax)
            assert len(model.hidden_layers) == 3

            V_samples = layer_to_chains[model.visible_layer]
            H1_samples, H2_samples, Y_samples = [layer_to_chains[layer] for layer in model.hidden_layers]

            H1_mf = model.hidden_layers[0].mf_update(state_below=model.visible_layer.upward_state(V_samples),
                                                    state_above=model.hidden_layers[1].downward_state(H2_samples),
                                                    layer_above=model.hidden_layers[1])
            Y_mf = model.hidden_layers[2].mf_update(state_below=model.hidden_layers[1].upward_state(H2_samples))
            H2_mf = model.hidden_layers[1].mf_update(state_below=model.hidden_layers[0].upward_state(H1_mf),
                                                    state_above=model.hidden_layers[2].downward_state(Y_mf),
                                                    layer_above=model.hidden_layers[2])

            expected_energy_p = model.energy(V_samples, [H1_mf, H2_mf, Y_samples]).mean()

            constants = flatten([V_samples, H1_mf, H2_mf, Y_samples])

            neg_phase_grads = OrderedDict(safe_zip(params, T.grad(-expected_energy_p, params, consider_constant = constants)))
        else:
            warnings.warn("""TODO: reduce variance of negative phase by integrating out
                    the even-numbered layers. The Rao-Blackwellize method can do this
                    for you when expected gradient = gradient of expectation, but doing
                    this in general is trickier.""")
            #layer_to_chains = model.rao_blackwellize(layer_to_chains)
            expected_energy_p = model.energy(layer_to_chains[model.visible_layer],
                    [layer_to_chains[layer] for layer in model.hidden_layers]).mean()

            samples = flatten(layer_to_chains.values())
            for i, sample in enumerate(samples):
                if sample.name is None:
                    sample.name = 'sample_'+str(i)

            neg_phase_grads = OrderedDict(safe_zip(params, T.grad(-expected_energy_p, params, consider_constant
                = samples, disconnected_inputs='ignore')))


        for param in list(gradients.keys()):
            gradients[param] = neg_phase_grads[param] + gradients[param]

        return gradients, updates
开发者ID:tempbottle,项目名称:pylearn2,代码行数:104,代码来源:dbm.py

示例7: Optimizer

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
        if not self.model:
            log.error("No self.model for the Optimizer!")
            raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() "
                                 "was called from the Model. Try initializing the Optimizer with the model param "
                                 "and calling optimizer.train().")

        #########################
        # gradients and updates #
        #########################
        # grab the model parameters to use during training
        self.params = self.model.get_params()
        # Now create the training cost function for the model to use while training - update parameters
        # gradient!
        gradients = grad(cost=self.loss_expression, wrt=list(self.params.values()))
        # now create the dictionary mapping the parameter with its gradient
        gradients = OrderedDict(
            [(param, g) for param, g in zip(list(self.params.values()), gradients)]
        )
        # clip gradients if we want.
        gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip)

        # Calculate the optimizer updates each run
        # This is where the magic happens for a lot of sub-implementations of SGD!
        # It tells how to update the params each training epoch
        gradient_updates = self.get_updates(gradients)

        # Combine the updates from the model also if applicable
        updates = self.model.get_updates()
        if updates:
            updates.update(gradient_updates)
        else:
            updates = gradient_updates

        log.info("%s params: %s", self.model._classname, str(list(self.params.keys())))

        ############
        # monitors #
        ############
        # deal with the monitor channels if they were given (or take them from the plot)
        if monitor_channels is None and plot is not None and len(plot.channels) > 0:
            monitor_channels = plot.channels
        self.train_monitors_dict = {}
        self.valid_monitors_dict = {}
        self.test_monitors_dict = {}
        self.train_monitors_outservice_dict = {}
        self.valid_monitors_outservice_dict = {}
        self.test_monitors_outservice_dict = {}
        if monitor_channels:
            # collapse the appropriate monitors into their (name, expression, out_service) tuples
            train_collapsed = collapse_channels(monitor_channels, train=True)
            valid_collapsed = collapse_channels(monitor_channels, valid=True)
            test_collapsed  = collapse_channels(monitor_channels, test=True)
            # get name: expression dictionary
            self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed])
            self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed])
            self.test_monitors_dict  = OrderedDict([(name, expression) for name, expression, _ in test_collapsed])
            # get name: outservice dictionary
            self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed])
            self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed])
            self.test_monitors_outservice_dict  = OrderedDict([(name, out) for name, _, out in test_collapsed])
        # finally deal with an outservice provided to monitor training cost
        self.train_outservice = train_outservice
        # remove redundant files made by the fileservice for the train monitor.
        # TODO: THIS FEELS LIKE A HACK. I don't like it.
        if isinstance(self.train_outservice, FileService):
            os.remove(self.train_outservice.valid_filename)
开发者ID:adammenges,项目名称:OpenDeep,代码行数:70,代码来源:optimizer.py

示例8: __init__

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
            cache_name = 'BatchGradientDescent.param_to_cache[%s]' % param_name
            self.param_to_cache[param] = sharedX(param.get_value(borrow=False), name=cache_name)
            cache_updates[self.param_to_cache[param]] = param
            cached = self.param_to_cache[param]
            g = self.param_to_grad_shared[param]
            if lr_scalers is not None and param in lr_scalers:
                scaled_alpha = alpha * lr_scalers[param]
            else:
                scaled_alpha = alpha
            mul = scaled_alpha * g
            diff = cached - mul
            goto_updates[param] = diff
        self._cache_values = function([], updates = cache_updates, mode=self.theano_function_mode, name='BatchGradientDescent._cache_values')
        assert isinstance(param_constrainers, (list, tuple))
        for param_constrainer in param_constrainers:
            param_constrainer(goto_updates)
        self._goto_alpha = function([alpha], updates=goto_updates,
                mode=self.theano_function_mode, name='BatchGradientDescent._goto_alpha')

        norm = T.sqrt(sum([T.sqr(elem).sum() for elem in self.param_to_grad_shared.values()]))
        norm.name = 'BatchGradientDescent.norm'
        normalize_grad_updates = OrderedDict()
        for grad_shared in self.param_to_grad_shared.values():
            normalize_grad_updates[grad_shared] = grad_shared / norm

        # useful for monitoring
        self.ave_grad_size = sharedX(0.)
        self.new_weight = sharedX(1.)
        normalize_grad_updates[self.ave_grad_size] = self.new_weight * norm + (1.-self.new_weight) * self.ave_grad_size

        self._normalize_grad = function([], norm, updates=normalize_grad_updates, mode=self.theano_function_mode,
                name='BatchGradientDescent._normalize_grad')

        if self.conjugate:
            grad_shared = self.param_to_grad_shared.values()

            grad_to_old_grad = OrderedDict()
            for elem in grad_shared:
                grad_to_old_grad[elem] = sharedX(elem.get_value(), 'old_'+elem.name)

            self._store_old_grad = function([norm], updates = OrderedDict([(grad_to_old_grad[g], g * norm)
                for g in grad_to_old_grad]), mode=self.theano_function_mode,
                name='BatchGradientDescent._store_old_grad')

            grad_ordered = list(grad_to_old_grad.keys())
            old_grad_ordered = [ grad_to_old_grad[g] for g in grad_ordered]

            def dot_product(x, y):
                return sum([ (x_elem * y_elem).sum() for x_elem, y_elem in safe_zip(x, y) ])

            beta_pr = (dot_product(grad_ordered, grad_ordered) - dot_product(grad_ordered, old_grad_ordered)) / \
                    (1e-7+dot_product(old_grad_ordered, old_grad_ordered))
            assert beta_pr.ndim == 0

            beta = T.maximum(beta_pr, 0.)

            """

            beta_pr is the Polak-Ribiere formula for beta.
            According to wikipedia, the beta to use for NCG is "a matter of heuristics or taste"
            but max(0, beta_pr) is "a popular choice... which provides direction reset automatically."
            (ie, it is meant to revert to steepest descent when you have traveled far enough that
            the objective function is behaving non-quadratically enough that the conjugate gradient
            formulas aren't working anymore)

            http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method

            """

            assert grad not in grad_to_old_grad

            make_conjugate_updates = [(g, g + beta * grad_to_old_grad[g]) for g in grad_ordered]

            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                for v, u in make_conjugate_updates:
                    mode.record.handle_line('BatchGradientDescent._make_conjugate var ' \
                            + var_descriptor(v) + '\n')
                    mode.record.handle_line('BatchGradientDescent._make_conjugate update ' \
                            + var_descriptor(u) + '\n')

            self._make_conjugate = function([], updates=make_conjugate_updates,
                    mode=self.theano_function_mode, name='BatchGradientDescent._make_conjugate')

            if mode is not None and hasattr(mode, 'record'):
                for output in self._make_conjugate.maker.fgraph.outputs:
                    mode.record.handle_line('BatchGradientDescent._make_conjugate output ' \
                            + var_descriptor(output) + '\n')


        if tol is None:
            if objective.dtype == "float32":
                self.tol = 1e-6
            else:
                self.tol = 3e-7
        else:
            self.tol = tol

        self.ave_step_size = sharedX(0.)
        self.ave_grad_mult = sharedX(0.)
开发者ID:JakeMick,项目名称:pylearn2,代码行数:104,代码来源:batch_gradient_descent.py

示例9: Monitor

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
                                    batch_size=b,
                                    num_batches=n,
                                    topo=self.topo,
                                    targets=self.require_label,
                                    rng=sd)

            actual_ne = 0
            for X in myiterator:
                if self.require_label:
                    X, y = X
                    self.run_prereqs(X,y,d)
                    a(X, y)
                else:
                    self.run_prereqs(X, None, d)
                    a(X)
                if X.ndim == 2:
                    actual_batch_size = X.shape[0]
                else:
                    actual_batch_size = X.shape[d.get_topo_batch_axis()]
                actual_ne += actual_batch_size
            # end for X
            if actual_ne != ne:
                raise RuntimeError("At compile time, your iterator said it had "
                        + str(ne) + " examples total, but at runtime it gave us "
                        + str(actual_ne) + ".")
        # end for d


        log.info("Monitoring step:")
        log.info("\tEpochs seen: %d" % self._epochs_seen)
        log.info("\tBatches seen: %d" % self._num_batches_seen)
        log.info("\tExamples seen: %d" % self._examples_seen)
        t = time.time() - self.t0
        for channel_name in sorted(self.channels.keys(), key=number_aware_alphabetical_key):
            channel = self.channels[channel_name]
            channel.time_record.append(t)
            channel.batch_record.append(self._num_batches_seen)
            channel.example_record.append(self._examples_seen)
            channel.epoch_record.append(self._epochs_seen)
            val = channel.val_shared.get_value()
            channel.val_record.append(val)
            # TODO: use logging infrastructure so that user can configure
            # formatting
            if abs(val) < 1e4:
                val_str = str(val)
            else:
                val_str = '%.3e' % val

            log.info("\t%s: %s" % (channel_name, val_str))

    def run_prereqs(self, X, y, dataset):
        if dataset not in self.prereqs:
            return
        for prereq in self.prereqs[dataset]:
            prereq(X,y)

    def get_batches_seen(self):
        """ Returns the number of batches the model has learned on (assuming
        that the learning code has been calling Monitor.report_batch correctly)
        """
        return self._num_batches_seen

    def get_epochs_seen(self):
        return self._epochs_seen

    def get_examples_seen(self):
开发者ID:alito,项目名称:pylearn2,代码行数:70,代码来源:monitor.py

示例10: __init__

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
            if param.name is None:
                param_name = 'anon_param'
            else:
                param_name = param.name
            cache_name = 'BatchGradientDescent.param_to_cache[%s]' % param_name
            self.param_to_cache[param] = sharedX(param.get_value(borrow=False), name=cache_name)
            cache_updates[self.param_to_cache[param]] = param
            cached = self.param_to_cache[param]
            g = self.param_to_grad_shared[param]
            if lr_scalers is not None and param in lr_scalers:
                scaled_alpha = alpha * lr_scalers[param]
            else:
                scaled_alpha = alpha
            mul = scaled_alpha * g
            diff = cached - mul
            goto_updates[param] = diff
        self._cache_values = function([], updates = cache_updates, mode=self.theano_function_mode, name='BatchGradientDescent._cache_values')
        assert isinstance(param_constrainers, (list, tuple))
        for param_constrainer in param_constrainers:
            param_constrainer(goto_updates)
        self._goto_alpha = function([alpha], updates=goto_updates,
                mode=self.theano_function_mode, name='BatchGradientDescent._goto_alpha')

        norm = T.sqrt(sum([T.sqr(elem).sum() for elem in self.param_to_grad_shared.values()]))
        norm.name = 'BatchGradientDescent.norm'
        normalize_grad_updates = OrderedDict()
        for grad_shared in self.param_to_grad_shared.values():
            normalize_grad_updates[grad_shared] = grad_shared / norm

        # useful for monitoring
        self.ave_grad_size = sharedX(0.)
        self.new_weight = sharedX(1.)
        normalize_grad_updates[self.ave_grad_size] = self.new_weight * norm + (1.-self.new_weight) * self.ave_grad_size

        self._normalize_grad = function([], norm, updates=normalize_grad_updates, mode=self.theano_function_mode,
                name='BatchGradientDescent._normalize_grad')

        if self.conjugate:
            grad_shared = self.param_to_grad_shared.values()

            grad_to_old_grad = OrderedDict()
            for elem in grad_shared:
                grad_to_old_grad[elem] = sharedX(elem.get_value(), 'old_'+elem.name)

            self._store_old_grad = function([norm], updates = OrderedDict([(grad_to_old_grad[g_], g_ * norm)
                for g_ in grad_to_old_grad]), mode=self.theano_function_mode,
                name='BatchGradientDescent._store_old_grad')

            grad_ordered = list(grad_to_old_grad.keys())
            old_grad_ordered = [grad_to_old_grad[g_] for g_ in grad_ordered]

            def dot_product(x, y):
                return sum([ (x_elem * y_elem).sum() for x_elem, y_elem in safe_zip(x, y) ])

            beta_pr = (dot_product(grad_ordered, grad_ordered) - dot_product(grad_ordered, old_grad_ordered)) / \
                    (1e-7+dot_product(old_grad_ordered, old_grad_ordered))
            assert beta_pr.ndim == 0

            beta = T.maximum(beta_pr, 0.)

            #beta_pr is the Polak-Ribiere formula for beta.
            #According to wikipedia, the beta to use for NCG is "a matter of heuristics or taste"
            #but max(0, beta_pr) is "a popular choice... which provides direction reset automatically."
            #(ie, it is meant to revert to steepest descent when you have traveled far enough that
            #the objective function is behaving non-quadratically enough that the conjugate gradient
            #formulas aren't working anymore)

            #http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method

            assert grad not in grad_to_old_grad

            make_conjugate_updates = [(g_, g_ + beta * grad_to_old_grad[g_]) for g_ in grad_ordered]

            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                for v, u in make_conjugate_updates:
                    mode.record.handle_line('BatchGradientDescent._make_conjugate var ' \
                            + var_descriptor(v) + '\n')
                    mode.record.handle_line('BatchGradientDescent._make_conjugate update ' \
                            + var_descriptor(u) + '\n')

            self._make_conjugate = function([], updates=make_conjugate_updates,
                    mode=self.theano_function_mode, name='BatchGradientDescent._make_conjugate')

            if mode is not None and hasattr(mode, 'record'):
                for output in self._make_conjugate.maker.fgraph.outputs:
                    mode.record.handle_line('BatchGradientDescent._make_conjugate output ' \
                            + var_descriptor(output) + '\n')


        if tol is None:
            if objective.dtype == "float32":
                self.tol = 1e-6
            else:
                self.tol = 3e-7
        else:
            self.tol = tol

        self.ave_step_size = sharedX(0.)
        self.ave_grad_mult = sharedX(0.)
开发者ID:BloodNg,项目名称:pylearn2,代码行数:104,代码来源:batch_gradient_descent.py

示例11: Monitor

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
                                    data_specs=self._flat_data_specs,
                                    return_tuple=True,
                                    rng=sd)

            # If self._flat_data_specs is empty, no channel needs data,
            # so we do not need to call the iterator in order to average
            # the monitored values across different batches, we only
            # have to call them once.
            if len(self._flat_data_specs[1]) == 0:
                X = ()
                self.run_prereqs(X, d)
                a(*X)

            else:
                actual_ne = 0
                for X in myiterator:
                    # X is a flat (not nested) tuple
                    self.run_prereqs(X, d)
                    a(*X)
                    actual_ne += self._flat_data_specs[0].np_batch_size(X)
                # end for X
                if actual_ne != ne:
                    raise RuntimeError("At compile time, your iterator said "
                                       "it had %d examples total, but at "
                                       "runtime it gave us %d." %
                                       (ne, actual_ne))
        # end for d

        log.info("Monitoring step:")
        log.info("\tEpochs seen: %d" % self._epochs_seen)
        log.info("\tBatches seen: %d" % self._num_batches_seen)
        log.info("\tExamples seen: %d" % self._examples_seen)
        t = time.time() - self.t0
        for channel_name in sorted(self.channels.keys(),
                                   key=number_aware_alphabetical_key):
            channel = self.channels[channel_name]
            channel.time_record.append(t)
            channel.batch_record.append(self._num_batches_seen)
            channel.example_record.append(self._examples_seen)
            channel.epoch_record.append(self._epochs_seen)
            val = channel.val_shared.get_value()
            channel.val_record.append(val)
            # TODO: use logging infrastructure so that user can configure
            # formatting
            if abs(val) < 1e4:
                val_str = str(val)
            else:
                val_str = '%.3e' % val

            log.info("\t%s: %s" % (channel_name, val_str))

    def run_prereqs(self, data, dataset):
        """
        Runs all "prerequistie functions" on a batch of data. Always
        called right before computing the monitoring channels on that
        batch.

        Parameters
        ----------
        data : tuple or Variable
            a member of the Space used as input to the monitoring
            functions
        dataset : Dataset
            the Dataset the data was drawn from
        """
        if dataset not in self.prereqs:
开发者ID:goller,项目名称:pylearn2,代码行数:70,代码来源:monitor.py

示例12: Optimizer

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
            # gradient!
            gradients, _ = self.model.get_gradient(cost=train_cost)

            # Calculate the optimizer updates each run
            # This is where the magic happens for a lot of sub-implementations of SGD, including AdaDelta!
            # It tells how to update the params each training epoch
            gradient_updates = self.get_updates(gradients)

            # Combine the updates from the model also if applicable
            train_updates = self.model.get_updates()
            if train_updates:
                train_updates.update(gradient_updates)
            else:
                train_updates = gradient_updates

            # Compile the training function!
            log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_costs),
                     str(type(self.model)))
            t = time.time()

            f_learn = function(inputs=[data_idx, data_end_idx],
                               updates=train_updates,
                               outputs=train_cost,
                               givens=train_givens,
                               name='f_learn_%d' % i)

            log.info('f_learn compilation took %s', make_time_units_string(time.time() - t))
            self.train_functions.append(f_learn)

        # grab the expression(s) to use to monitor different model values during training
        log.debug("Compiling monitor functions...")
        monitor_t = time.time()
        self.monitors = OrderedDict(self.model.get_monitors())
        self.monitor_names = self.monitors.keys()
        if len(self.monitors.keys()) > 0:
            self.train_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=train_givens,
                name="train_monitor_function"
            )
        if len(self.monitors.keys()) > 0:
            self.valid_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=valid_givens,
                name="valid_monitor_function"
            )
        if len(self.monitors.keys()) > 0:
            self.test_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=test_givens,
                name="test_monitor_function"
            )
        log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t))

        self.noise_switches = raise_to_list(self.model.get_noise_switch())

        ##################
        # start training #
        ##################
        # make sure to deal with a list of train_cost functions - for layer-wise pretraining!
开发者ID:chagge,项目名称:OpenDeep,代码行数:70,代码来源:optimizer.py

示例13: Optimizer

# 需要导入模块: from theano.compat.python2x import OrderedDict [as 别名]
# 或者: from theano.compat.python2x.OrderedDict import keys [as 别名]

#.........这里部分代码省略.........
                    log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT")
                    self.STOP = True

            # save params
            if self.best_params is not None:
                log.debug("Restoring best model parameters...")
                set_shared_values(self.params, self.best_params)
            log.debug("Saving model parameters...")
            self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl')

            log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t))

        log.info("------------TOTAL %s TRAIN TIME TOOK %s---------",
                 str(type(self.model)), make_time_units_string(time.time() - start_time))


    def _perform_one_epoch(self, f_learn, plot=None):
        """
        Performs a single training iteration with the given learn function.
        """
        self.epoch_counter += 1
        t = time.time()
        log.info('EPOCH %s', str(self.epoch_counter))

        # set the noise switches on for training function! (this is where things like dropout happen)
        switch_vals = []
        if len(self.noise_switches) > 0 and (self.valid_flag or self.test_flag or self.epoch_counter == 1):
            log.debug("Turning on %s noise switches", str(len(self.noise_switches)))
            switch_vals = [switch.get_value() for switch in self.noise_switches]
            [switch.set_value(1.) for switch in self.noise_switches]

        # train
        train_costs = []
        train_monitors = {key: [] for key in self.train_monitors_dict.keys()}
        for batch_start, batch_end in self.train_batches:
            _outs = raise_to_list(f_learn(batch_start, batch_end))
            train_costs.append(_outs[0])
            # handle any user defined monitors
            if len(train_monitors) > 0:
                current_monitors = zip(self.train_monitors_dict.keys(), _outs[1:])
                for name, val in current_monitors:
                    train_monitors[name].append(val)

        # get the mean values for the batches
        mean_train = numpy.mean(train_costs, 0)
        current_mean_monitors = {key: numpy.mean(vals, 0) for key, vals in train_monitors.items()}
        # log the mean values!
        log.info('Train cost: %s', trunc(mean_train))
        if len(current_mean_monitors) > 0:
            log.info('Train monitors: %s', str(current_mean_monitors))
        # send the values to their outservices
        if self.train_outservice:
            self.train_outservice.write(mean_train, TRAIN)
        for name, service in self.train_monitors_outservice_dict.items():
            if name in current_mean_monitors and service:
                service.write(current_mean_monitors[name], TRAIN)
        # if there is a plot, also send them over!
        if plot:
            current_mean_monitors.update({TRAIN_COST_KEY: mean_train})
            plot.update_plots(epoch=self.epoch_counter, monitors=current_mean_monitors)

        # set the noise switches off for valid and test sets! we assume unseen data is noisy anyway :)
        if len(self.noise_switches) > 0 and (self.valid_flag or self.test_flag):
            log.debug("Turning off %s noise switches", str(len(self.noise_switches)))
            [switch.set_value(0.) for switch in self.noise_switches]
开发者ID:52nlp,项目名称:OpenDeep,代码行数:69,代码来源:optimizer.py


注:本文中的theano.compat.python2x.OrderedDict.keys方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。