当前位置: 首页>>代码示例>>Python>>正文


Python DataSpecsMapping.nest方法代码示例

本文整理汇总了Python中pylearn2.utils.data_specs.DataSpecsMapping.nest方法的典型用法代码示例。如果您正苦于以下问题:Python DataSpecsMapping.nest方法的具体用法?Python DataSpecsMapping.nest怎么用?Python DataSpecsMapping.nest使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pylearn2.utils.data_specs.DataSpecsMapping的用法示例。


在下文中一共展示了DataSpecsMapping.nest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_nest_specs

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
def test_nest_specs():
    x1 = TT.matrix("x1")
    x2 = TT.matrix("x2")
    x3 = TT.matrix("x3")
    x4 = TT.matrix("x4")

    for nested_space, nested_source, nested_data in [
        (VectorSpace(dim=10), "target", x2),
        (CompositeSpace([VectorSpace(dim=3), VectorSpace(dim=9)]), ("features", "features"), (x1, x4)),
        (
            CompositeSpace([VectorSpace(dim=3), CompositeSpace([VectorSpace(dim=10), VectorSpace(dim=7)])]),
            ("features", ("target", "features")),
            (x1, (x2, x3)),
        ),
    ]:

        mapping = DataSpecsMapping((nested_space, nested_source))
        flat_space = mapping.flatten(nested_space)
        flat_source = mapping.flatten(nested_source)
        flat_data = mapping.flatten(nested_data)

        renested_space = mapping.nest(flat_space)
        renested_source = mapping.nest(flat_source)
        renested_data = mapping.nest(flat_data)

        assert_equal(renested_space, nested_space)
        assert_equal(renested_source, nested_source)
        assert_equal(renested_data, nested_data)
开发者ID:Bowen-C,项目名称:pylearn2,代码行数:30,代码来源:test_data_specs.py

示例2: test_nest_specs

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
def test_nest_specs():
    x1 = TT.matrix('x1')
    x2 = TT.matrix('x2')
    x3 = TT.matrix('x3')
    x4 = TT.matrix('x4')

    for nested_space, nested_source, nested_data in [
            (VectorSpace(dim=10), 'target', x2),
            (CompositeSpace([VectorSpace(dim=3), VectorSpace(dim=9)]),
                ('features', 'features'),
                (x1, x4)),
            (CompositeSpace([VectorSpace(dim=3),
                             CompositeSpace([VectorSpace(dim=10),
                                             VectorSpace(dim=7)])]),
                ('features', ('target', 'features')),
                (x1, (x2, x3))),
            ]:

        mapping = DataSpecsMapping((nested_space, nested_source))
        flat_space = mapping.flatten(nested_space)
        flat_source = mapping.flatten(nested_source)
        flat_data = mapping.flatten(nested_data)

        renested_space = mapping.nest(flat_space)
        renested_source = mapping.nest(flat_source)
        renested_data = mapping.nest(flat_data)

        assert_equal(renested_space, nested_space)
        assert_equal(renested_source, nested_source)
        assert_equal(renested_data, nested_data)
开发者ID:123fengye741,项目名称:pylearn2,代码行数:32,代码来源:test_data_specs.py

示例3: test_variational_cd

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
def test_variational_cd():

    # Verifies that VariationalCD works well with make_layer_to_symbolic_state
    visible_layer = BinaryVector(nvis=100)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    model = DBM(visible_layer=visible_layer,
                hidden_layers=[hidden_layer],
                batch_size=100,
                niter=1)

    cost = VariationalCD(num_chains=100, num_gibbs_steps=2)

    data_specs = cost.get_data_specs(model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

    theano_args = []
    for space, source in safe_zip(space_tuple, source_tuple):
        name = '%s' % (source)
        arg = space.make_theano_batch(name=name)
        theano_args.append(arg)
    theano_args = tuple(theano_args)
    nested_args = mapping.nest(theano_args)

    grads, updates = cost.get_gradients(model, nested_args)
开发者ID:BloodNg,项目名称:pylearn2,代码行数:32,代码来源:test_dbm.py

示例4: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model: a Python object representing the model to train loosely
        implementing the interface of models.model.Model.

        dataset: a pylearn2.datasets.dataset.Dataset object used to draw
        training data
        """
        self.model = model

        self.monitor = Monitor.get_monitor(model)

        if self.monitoring_dataset is not None:
            # Get the data specifications needed by the model
            space, source = model.get_monitoring_data_specs()

            # Create Theano variables for each of the individual components
            # of that data. Usually, it will be X for inputs and Y for targets.
            # First, we need to find these components, and put them in a tuple
            mapping = DataSpecsMapping((space, source))
            space_tuple = mapping.flatten(space, return_tuple=True)
            source_tuple = mapping.flatten(source, return_tuple=True)
            # Then, build a flat tuple of these Theano variables
            ipt = tuple(sp.make_theano_batch(name='monitor_%s' % src)
                    for (sp, src) in safe_zip(space_tuple, source_tuple))
            # Finally, organize them back into a structure expected by the
            # monitoring channels of the model
            nested_ipt = mapping.nest(ipt)

            self.monitor.add_dataset(dataset=self.monitoring_dataset,
                                mode="sequential",
                                batch_size=self.batch_size,
                                num_batches=self.monitoring_batches)

            channels = model.get_monitoring_channels(nested_ipt)
            if not isinstance(channels, dict):
                raise TypeError("model.get_monitoring_channels must return a "
                                "dictionary, but it returned " + str(channels))
            for name in channels:
                J = channels[name]
                if isinstance(J, tuple):
                    assert len(J) == 2
                    J, prereqs = J
                else:
                    prereqs = None

                self.monitor.add_channel(name=name,
                                         ipt=nested_ipt,
                                         val=J,
                                         prereqs=prereqs,
                                         data_specs=(space, source))
        self.first = True
        self.bSetup = True
开发者ID:Alienfeel,项目名称:pylearn2,代码行数:62,代码来源:default.py

示例5: train

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def train(self, dataset):
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)

        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError("Unable to train with SGD, because "
                    "the cost does not actually use data from the data set. "
                    "data_specs: %s" % str(data_specs))
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                batch_size=self.batch_size,
                data_specs=flat_data_specs, return_tuple=True,
                rng = rng, num_batches = self.batches_per_iter)

        on_load_batch = self.on_load_batch
        for batch in iterator:
            for callback in on_load_batch:
                callback(mapping.nest(batch))
            self.sgd_update(*batch)
            # iterator might return a smaller batch if dataset size
            # isn't divisible by batch_size
            # Note: if data_specs[0] is a NullSpace, there is no way to know
            # how many examples would actually have been in the batch,
            # since it was empty, so actual_batch_size would be reported as 0.
            actual_batch_size = flat_data_specs[0].np_batch_size(batch)
            self.monitor.report_batch(actual_batch_size)
            for callback in self.update_callbacks:
                callback(self)

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)
开发者ID:ahmed26,项目名称:pylearn2,代码行数:58,代码来源:sgd.py

示例6: train

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def train(self, dataset):
        """
        .. todo::

            WRITEME
        """
        assert self.bSetup
        model = self.model

        rng = self.rng
        train_iteration_mode = "shuffled_sequential"
        if not is_stochastic(train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)
        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError(
                "Unable to train with BGD, because "
                "the cost does not actually use data from the data set. "
                "data_specs: %s" % str(data_specs)
            )
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(
            mode=train_iteration_mode,
            batch_size=self.batch_size,
            num_batches=self.batches_per_iter,
            data_specs=flat_data_specs,
            return_tuple=True,
            rng=rng,
        )

        mode = self.theano_function_mode
        for data in iterator:
            if "targets" in source_tuple and mode is not None and hasattr(mode, "record"):
                Y = data[source_tuple.index("targets")]
                stry = str(Y).replace("\n", " ")
                mode.record.handle_line("data Y " + stry + "\n")

            for on_load_batch in self.on_load_batch:
                on_load_batch(mapping.nest(data))

            self.before_step(model)
            self.optimizer.minimize(*data)
            self.after_step(model)
            actual_batch_size = flat_data_specs[0].np_batch_size(data)
            model.monitor.report_batch(actual_batch_size)
开发者ID:pangyuteng,项目名称:chalearn2014,代码行数:57,代码来源:bgd.py

示例7: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self):
        self.X = T.matrix('X')
        self.Y = T.matrix('Y')

        # Taken from pylearn2/training_algorithms/sgd.py


        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name, batch_size = self.batch_size)
            theano_args.append(arg)
        print 'BATCH SIZE=',self.batch_size
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        print self.cost
        fixed_var_descr = self.cost.get_fixed_var_descr(self.model, nested_args)
        print self.cost
        self.on_load_batch = fixed_var_descr.on_load_batch
        params = list(self.model.get_params())
        self.X = nested_args[0]
        self.Y = nested_args[1]
        init_grads, updates = self.cost.get_gradients(self.model, nested_args)

        params = self.model.get_params()
        # We need to replace parameters with purely symbolic variables in case some are shared
        # Create gradient and cost functions
        self.params = params
        symbolic_params = [self._convert_variable(param) for param in params]
        givens = dict(zip(params, symbolic_params))
        costfn = self.model.cost_from_X((self.X, self.Y))
        gradfns = [init_grads[param] for param in params]
        #self.symbolic_params = symbolic_params
        #self._loss = theano.function(symbolic_para[self.X, self.Y], self.model.cost_from_X((self.X, self.Y)))#, givens=givens)
        #1/0
        print 'Compiling function...'
        self.theano_f_df = theano.function(inputs=symbolic_params + [self.X, self.Y], outputs=[costfn] + gradfns, givens=givens)
        print 'done'
开发者ID:NuelASRB,项目名称:Sum-of-Functions-Optimizer,代码行数:53,代码来源:model_gradient.py

示例8: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, model, dataset, algorithm):
        self.origin = model.get_param_vector()

        cost = algorithm.cost
        # Cargo cult all the Pascal bullshit needed to evaluate the fucking cost function now
        # =======================================
        data_specs = cost.get_data_specs(model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)
        # End cargo culting
        # ======================

        print "Compiling cost function..."
        cost_fn = function(theano_args, cost_value)
        self.cost_fn = cost_fn
开发者ID:cc13ny,项目名称:galatea,代码行数:40,代码来源:__init__.py

示例9: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        batch_size = self.batch_size
        if hasattr(model, "force_batch_size"):
            if model.force_batch_size > 0:
                if batch_size is not None:
                    if batch_size != model.force_batch_size:
                        if self.set_batch_size:
                            model.set_batch_size(batch_size)
                        else:
                            raise ValueError("batch_size argument to SGD " +
                                             "conflicts with model's " +
                                             "force_batch_size attribute")
                else:
                    self.batch_size = model.force_batch_size
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(
                    dataset=self.monitoring_dataset,
                    cost=self.cost,
                    batch_size=self.batch_size,
                    num_batches=self.monitoring_batches,
                    extra_costs=self.monitoring_costs,
                    mode=self.monitor_iteration_mode
                    )
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
#.........这里部分代码省略.........
开发者ID:yosinski,项目名称:pylearn2,代码行数:103,代码来源:sgd.py

示例10: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, dataset, cost, batch_size, num_batches=None,
              extra_costs=None, mode='sequential', obj_prereqs=None,
              cost_monitoring_args=None):
        """
        Sets up the monitor for a cost minimization problem.
        Adds channels defined by both the model and the cost for
        the specified dataset(s), as well as a channel called
        'objective' defined by the costs' __call__ method.

        Parameters
        ----------
        dataset : pylearn2.datasets.Dataset
            Dataset or dictionary mapping string names to Datasets.
            If string names are used, then for every dataset, each
            channel defined by the model or cost will be replicated
            with that dataset's name followed by an underscore as the
            prefix. For example, if your cost defines a channel called
            'misclass', and datasets is
            {'train' : train_dataset, 'valid' : valid_dataset},
            you will get channels called 'train_misclass' and
            'valid_misclass'.
        cost : pylearn2.costs.Cost
            The cost being optimized by training. The value of the cost
            will appear as the `objective` channel. Its
            `get_monitoring_channels` method will also be used to
            supply other channels.
        extra_costs : OrderedDict, optional
            A dictionary mapping channel names to Cost objects.
            Their value will appear as the specified channel name.
            They will also provide more monitoring channels via their
            `get_monitoring_channels` method.
        obj_prereqs : None, or list of functions
            Functions to pass as prerequisites to the `objective` channel.
        cost_monitoring_args : dict
            Dictionary of kwargs that will be passed to
            `cost.get_monitoring_channels()`
            (but not for the extra_costs).
        """

        if dataset is None:
            return
        if isinstance(dataset, Dataset):
            dataset = {'': dataset}
        else:
            assert isinstance(dataset, dict)
            assert all(isinstance(key, str) for key in dataset)
            assert all(isinstance(dataset[key], Dataset) for key in dataset)

        if extra_costs is None:
            costs = {}
        else:
            assert isinstance(extra_costs, (OrderedDict, dict))
            costs = extra_costs
        assert '' not in costs
        costs[''] = cost

        if cost_monitoring_args is None:
            cost_monitoring_args = {}

        model = self.model

        # Build a composite data_specs containing the specs for all costs,
        # then the specs of the model
        cost_names = sorted(costs.keys())
        spaces = []
        sources = []
        for c in cost_names:
            c_space, c_source = costs[c].get_data_specs(model)
            spaces.append(c_space)
            sources.append(c_source)

        # Ask the model for the data_specs needed
        m_space, m_source = model.get_monitoring_data_specs()
        spaces.append(m_space)
        sources.append(m_source)

        nested_space = CompositeSpace(spaces)
        nested_sources = tuple(sources)

        # Flatten this data_specs, so we build only one symbolic Theano
        # variable for each of the unique (space, source) pairs.
        mapping = DataSpecsMapping((nested_space, nested_sources))
        space_tuple = mapping.flatten(nested_space, return_tuple=True)
        source_tuple = mapping.flatten(nested_sources, return_tuple=True)
        ipt = tuple(space.make_theano_batch(name='monitor_%s' % source,
                                            batch_size=None)
                    for (space, source) in safe_zip(space_tuple, source_tuple))

        # Build a nested tuple from ipt, to dispatch the appropriate parts
        # of the ipt batch to each cost
        nested_ipt = mapping.nest(ipt)

        custom_channels = {}
        for i, cost_name in enumerate(cost_names):
            if cost_name == '':
                prefix = ''
            else:
                prefix = cost_name + '_'
            cost = costs[cost_name]
            cost_ipt = nested_ipt[i]
#.........这里部分代码省略.........
开发者ID:goller,项目名称:pylearn2,代码行数:103,代码来源:monitor.py

示例11: Monitor

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
class Monitor(object):
    """
    A class for monitoring Models while they are being trained.

    A monitor object records the number of minibatches and number of
    examples the model has trained, as well as any number of "channels"
    that track quantities of interest (examples: the objective
    function, measures of hidden unit activity, reconstruction error,
    sum of squared second derivatives, average norm of the weight
    vectors, etc.)

    Parameters
    ----------
    model : `pylearn2.models.model.Model`
    """

    def __init__(self, model):
        self.training_succeeded = False
        self.model = model
        self.channels = OrderedDict()
        self._num_batches_seen = 0
        self._examples_seen = 0
        self._epochs_seen = 0
        self._datasets = []
        self._iteration_mode = []
        self._batch_size = []
        self._num_batches = []
        self._dirty = True
        self._rng_seed = []
        self.names_to_del = ['theano_function_mode']
        self.t0 = time.time()
        self.theano_function_mode = None

        # Initialize self._nested_data_specs, self._data_specs_mapping,
        # and self._flat_data_specs
        self._build_data_specs()

    def _build_data_specs(self):
        """
        Computes a nested data_specs for input and all channels

        Also computes the mapping to flatten it. This function is
        called from redo_theano.
        """
        # Ask the model what it needs
        m_space, m_source = self.model.get_monitoring_data_specs()
        input_spaces = [m_space]
        input_sources = [m_source]
        for channel in self.channels.values():
            space = channel.data_specs[0]
            assert isinstance(space, Space)
            input_spaces.append(space)
            input_sources.append(channel.data_specs[1])

        nested_space = CompositeSpace(input_spaces)
        nested_source = tuple(input_sources)

        self._nested_data_specs = (nested_space, nested_source)
        self._data_specs_mapping = DataSpecsMapping(self._nested_data_specs)

        flat_space = self._data_specs_mapping.flatten(nested_space,
                                                      return_tuple=True)
        flat_source = self._data_specs_mapping.flatten(nested_source,
                                                       return_tuple=True)
        self._flat_data_specs = (CompositeSpace(flat_space), flat_source)

    def set_theano_function_mode(self, mode):
        """
        .. todo::

            WRITEME

        Parameters
        ----------
        mode : theano.compile.Mode
            Theano functions for the monitoring channels will be
            compiled and run using this mode.
        """
        if self.theano_function_mode != mode:
            self._dirty = True
            self.theano_function_mode = mode

    def add_dataset(self, dataset, mode='sequential', batch_size=None,
                    num_batches=None, seed=None):
        """
        Determines the data used to calculate the values of each channel.

        Parameters
        ----------
        dataset : object
            A `pylearn2.datasets.Dataset` object.
        mode : str or object, optional
            Iteration mode; see the docstring of the `iterator` method
            on `pylearn2.datasets.Dataset` for details.
        batch_size : int, optional
            The size of an individual batch. Optional if `mode` is
            'sequential' and `num_batches` is specified (batch size
            will be calculated based on full dataset size).
        num_batches : int, optional
            The total number of batches. Unnecessary if `mode` is
#.........这里部分代码省略.........
开发者ID:goller,项目名称:pylearn2,代码行数:103,代码来源:monitor.py

示例12: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, dataset, cost, batch_size, num_batches=None,
              extra_costs=None, mode='sequential', obj_prereqs=None,
              cost_monitoring_args=None):

        if dataset is None:
            return
        if isinstance(dataset, Dataset):
            dataset = {'': dataset}
        else:
            assert isinstance(dataset, dict)
            assert all(isinstance(key, str) for key in dataset)
            assert all(isinstance(dataset[key], Dataset) for key in dataset)

        if extra_costs is None:
            costs = {}
        else:
            assert isinstance(extra_costs, (OrderedDict, dict))
            costs = extra_costs
        assert '' not in costs
        costs[''] = cost

        if cost_monitoring_args is None:
            cost_monitoring_args = {}

        model = self.model

        # Build a composite data_specs containing the specs for all costs,
        # then the specs of the model
        cost_names = sorted(costs.keys())
        spaces = []
        sources = []
        for c in cost_names:
            c_space, c_source = costs[c].get_data_specs(model)
            spaces.append(c_space)
            sources.append(c_source)

        # Ask the model for the data_specs needed
        m_space, m_source = model.get_monitoring_data_specs()
        spaces.append(m_space)
        sources.append(m_source)

        nested_space = CompositeSpace(spaces)
        nested_sources = tuple(sources)

        # Flatten this data_specs, so we build only one symbolic Theano
        # variable for each of the unique (space, source) pairs.
        mapping = DataSpecsMapping((nested_space, nested_sources))
        space_tuple = mapping.flatten(nested_space, return_tuple=True)
        source_tuple = mapping.flatten(nested_sources, return_tuple=True)
        ipt = tuple(space.make_theano_batch(name='monitor_%s' % source,
                                            batch_size=None)
                    for (space, source) in safe_zip(space_tuple, source_tuple))

        # Build a nested tuple from ipt, to dispatch the appropriate parts
        # of the ipt batch to each cost
        nested_ipt = mapping.nest(ipt)

        # custom_channels = {}
        # for i, cost_name in enumerate(cost_names):
        #     if cost_name == '':
        #         prefix = ''
        #     else:
        #         prefix = cost_name + '_'
        #     cost = costs[cost_name]
        #     cost_ipt = nested_ipt[i]
        #     raw_channels = cost.get_monitoring_channels(model, cost_ipt)
        #     channels = {}
        #     for name in raw_channels:
        #         # We need three things: the value itself (raw_channels[name]),
        #         # the input variables (cost_ipt), and the data_specs for
        #         # these input variables ((spaces[i], sources[i]))
        #         channels[prefix + name] = (raw_channels[name],
        #                                    cost_ipt,
        #                                    (spaces[i], sources[i]))
        #     custom_channels.update(channels)
        #
        # # Use the last inputs from nested_ipt for the model
        # model_channels = model.get_monitoring_channels(nested_ipt[-1])
        # channels = {}
        # for name in model_channels:
        #     # Note: some code used to consider that model_channels[name]
        #     # could be a a (channel, prereqs) pair, this is not supported.
        #     channels[name] = (model_channels[name],
        #                       nested_ipt[-1],
        #                       (spaces[-1], sources[-1]))
        # custom_channels.update(channels)

        if is_stochastic(mode):
            seed = [[2013, 2, 22]]
        else:
            seed = None

        for dataset_name in dataset:
            cur_dataset = dataset[dataset_name]
            self.add_dataset(dataset=cur_dataset,
                             mode=mode,
                             batch_size=batch_size,
                             num_batches=num_batches,
                             seed=seed)
            if dataset_name == '':
#.........这里部分代码省略.........
开发者ID:HiQiQi,项目名称:src,代码行数:103,代码来源:myMonitor.py

示例13: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        if getattr(model, "force_batch_size", False) and \
           any(dataset.get_design_matrix().shape[0] % self.batch_size != 0 for
               dataset in self.monitoring_dataset.values()) and \
           not has_uniform_batch_size(self.monitor_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set monitor_iteration_mode to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            if (self.monitoring_batch_size is None and
                    self.monitoring_batches is None):
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.monitoring_batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
#.........这里部分代码省略.........
开发者ID:AdityoSanjaya,项目名称:adversarial,代码行数:103,代码来源:sgd_alt.py

示例14: setup

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model : object
            A Python object representing the model to train. Loosely
            implementing the interface of models.model.Model.
        dataset : pylearn2.datasets.dataset.Dataset
            Dataset object used to draw training data
        """
        self.model = model

        if self.cost is None:
            self.cost = model.get_default_cost()

        try:
            if self.cost.is_stochastic():
                raise TypeError("BGD is not compatible with stochastic "
                                "costs.")
        except NotImplementedError:
            warnings.warn("BGD is not compatible with stochastic costs "
                          "and cannot determine whether the current cost is "
                          "stochastic.")

        if self.batch_size is None:
            self.batch_size = model.force_batch_size
        else:
            batch_size = self.batch_size
            if self.set_batch_size:
                model.set_batch_size(batch_size)
            elif hasattr(model, 'force_batch_size'):
                if not (model.force_batch_size is None or
                        model.force_batch_size <= 0 or
                        batch_size == model.force_batch_size):
                    raise ValueError("batch_size is %d but " +
                                     "model.force_batch_size is %d" %
                                     (batch_size, model.force_batch_size))

        self.monitor = Monitor.get_monitor(model)
        self.monitor.set_theano_function_mode(self.theano_function_mode)

        data_specs = self.cost.get_data_specs(model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space,
        # named according to the sources.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = 'BGD_[%s]' % source
            arg = space.make_theano_batch(name=name)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with their data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)
        grads, grad_updates = self.cost.get_gradients(
            model, nested_args, ** fixed_var_descr.fixed_vars)

        assert isinstance(grads, OrderedDict)
        assert isinstance(grad_updates, OrderedDict)

        if cost_value is None:
            raise ValueError("BGD is incompatible with " + str(self.cost) +
                             " because it is intractable, but BGD uses the " +
                             "cost function value to do line searches.")

        # obj_prereqs has to be a list of function f called with f(*data),
        # where data is a data tuple coming from the iterator.
        # this function enables capturing "mapping" and "f", while
        # enabling the "*data" syntax
        def capture(f, mapping=mapping):
            new_f = lambda *args: f(mapping.flatten(args, return_tuple=True))
            return new_f

        obj_prereqs = [capture(f) for f in fixed_var_descr.on_load_batch]

        if self.monitoring_dataset is not None:
            if (self.monitoring_batch_size is None and
                    self.monitoring_batches is None):
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(
                dataset=self.monitoring_dataset,
                cost=self.cost,
                batch_size=self.monitoring_batch_size,
                num_batches=self.monitoring_batches,
                obj_prereqs=obj_prereqs,
#.........这里部分代码省略.........
开发者ID:123fengye741,项目名称:pylearn2,代码行数:103,代码来源:bgd.py

示例15: setup_impl

# 需要导入模块: from pylearn2.utils.data_specs import DataSpecsMapping [as 别名]
# 或者: from pylearn2.utils.data_specs.DataSpecsMapping import nest [as 别名]
    def setup_impl(self, model, dataset, algorithm):
        cost = algorithm.cost

        root = model.get_param_vector()

        dim = root.size

        rng = self.rng


        points = rng.randn(self.num_points, self.num_basis_vectors)
        points = points.astype(root.dtype)
        points *= self.scale

        if self.include_root:
            points[0, :] = 0.

        if not hasattr(self, 'cost_fn'):
            # Cargo cult all the Pascal bullshit needed to evaluate the fucking cost function now
            # =======================================
            data_specs = cost.get_data_specs(model)
            mapping = DataSpecsMapping(data_specs)
            space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
            source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

            # Build a flat tuple of Theano Variables, one for each space.
            # We want that so that if the same space/source is specified
            # more than once in data_specs, only one Theano Variable
            # is generated for it, and the corresponding value is passed
            # only once to the compiled Theano function.
            theano_args = []
            for space, source in safe_zip(space_tuple, source_tuple):
                name = '%s[%s]' % (self.__class__.__name__, source)
                arg = space.make_theano_batch(name=name,
                                              batch_size=self.batch_size)
                theano_args.append(arg)
            theano_args = tuple(theano_args)

            # Methods of `cost` need args to be passed in a format compatible
            # with data_specs
            nested_args = mapping.nest(theano_args)
            fixed_var_descr = cost.get_fixed_var_descr(model, nested_args)
            self.on_load_batch = fixed_var_descr.on_load_batch

            cost_value = cost.expr(model, nested_args,
                                        ** fixed_var_descr.fixed_vars)
            # End cargo culting
            # ======================

            print "Compiling cost function..."
            cost_fn = function(theano_args, cost_value)
            self.cost_fn = cost_fn
        else:
            cost_fn = self.cost_fn

        cost_values = np.zeros(self.num_points)


        data = list(dataset.get_batch_design(self.batch_size,
            include_labels=True))
        from pylearn2.utils.one_hot import one_hot
        data[1] = one_hot(data[1])


        if self.method == 'gaussian':
            basis = rng.normal(dim, self.num_basis_vectors).astype(root.dtype)
        elif self.method == 'element':
            basis = np.zeros((dim, self.num_basis_vectors)).astype(root.dtype)
            for i in xrange(self.num_basis_vectors):
                basis[rng.randint(dim), i] = 1.
        elif self.method == 'gradient':
            if not hasattr(self, 'grad_fn'):
                self.grad_fn = function(theano_args, grad(cost_value, model.get_params()))
            grad_fn = self.grad_fn

            basis = np.zeros((dim, self.num_basis_vectors)).astype(root.dtype)
            for i in xrange(self.num_basis_vectors):
                ipt = list(dataset.get_batch_design(1, include_labels=True))
                label = ipt[1]
                assert label.size == 1
                label = label[0]
                one_hot = np.zeros((1, 10,),dtype='float32')
                one_hot[0, label] = 1
                ipt[1] = one_hot
                g = grad_fn(*ipt)
                basis[:,i] = np.concatenate([e.reshape(e.size) for e in g], axis=0)
        else:
            assert False

        basis /= np.sqrt(np.square(basis).sum(axis=0))

        # Orthogonalize basis
        for i in xrange(self.num_basis_vectors):
            v = basis[:,i ].copy()
            for j in xrange(i - 1):
                u = basis[:, j].copy()
                v -= np.dot(u, v) * u
            norm = np.sqrt(np.square(v).sum())
            assert norm > 1e-4
            v /= norm
#.........这里部分代码省略.........
开发者ID:cc13ny,项目名称:galatea,代码行数:103,代码来源:__init__.py


注:本文中的pylearn2.utils.data_specs.DataSpecsMapping.nest方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。