Python splitters.Splitter类代码示例

本文整理汇总了Python中mvpa2.generators.splitters.Splitter类的典型用法代码示例。如果您正苦于以下问题：Python Splitter类的具体用法？Python Splitter怎么用？Python Splitter使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Splitter类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_custom_split

    def test_custom_split(self):
        #simulate half splitter
        hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
        spl = Splitter(attr='partitions')
        splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])


        # check fully customized split with working and validation set specified
        cs = CustomPartitioner([([0,3,4],[5,9])])
        # we want to discared the unselected partition of the data, hence attr_value
        # these two splitters should do exactly the same thing
        splitters = (Splitter(attr='partitions', attr_values=[1,2]),
                     Splitter(attr='partitions', ignore_values=(0,)))
        for spl in splitters:
            splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
            self.failUnless(len(splits) == 1)

            for i,p in enumerate(splits):
                self.failUnless( len(p) == 2 )
                self.failUnless( p[0].nsamples == 30 )
                self.failUnless( p[1].nsamples == 20 )

            self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
            self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())

开发者ID:psederberg，项目名称:PyMVPA，代码行数:35，代码来源:test_splitter.py

示例2: test_repeated_features

    def test_repeated_features(self):
        class CountFeatures(Measure):
            is_trained = True
            def _call(self, ds):
                return Dataset([ds.nfeatures],
                                fa={'nonbogus_targets': list(ds.fa['nonbogus_targets'].unique)})

        cf = CountFeatures()
        spl = Splitter('fa.nonbogus_targets')
        nsplits = len(list(spl.generate(self.dataset)))
        assert_equal(nsplits, 3)
        rm = RepeatedMeasure(cf, spl, concat_as='features')
        res = rm(self.dataset)
        assert_equal(res.shape, (1, nsplits))
        # due to https://github.com/numpy/numpy/issues/641 we are
        # using list(set(...)) construct and there order of
        # nonbogus_targets.unique can vary from run to run, thus there
        # is no guarantee that we would get 18 first, which is a
        # questionable assumption anyways, thus performing checks
        # which do not require any specific order.
        # And yet due to another issue
        # https://github.com/numpy/numpy/issues/3759
        # we can't just == None for the bool mask
        None_fa = np.array([x == None for x in  res.fa.nonbogus_targets])
        assert_array_equal(res.samples[0, None_fa], [18])
        assert_array_equal(res.samples[0, ~None_fa], [1, 1])

        if sys.version_info[0] < 3:
            # with python2 order seems to be consistent
            assert_array_equal(res.samples[0], [18, 1, 1])

开发者ID:andreirusu，项目名称:PyMVPA，代码行数:30，代码来源:test_datameasure.py

示例3: test_label_splitter

    def test_label_splitter(self):
        oes = OddEvenPartitioner(attr='targets')
        spl = Splitter(attr='partitions')

        splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])

开发者ID:psederberg，项目名称:PyMVPA，代码行数:10，代码来源:test_splitter.py

示例4: test_simplest_cv_pat_gen

    def test_simplest_cv_pat_gen(self):
        # create the generator
        nfs = NFoldPartitioner(cvtype=1)
        spl = Splitter(attr='partitions')
        # now get the xval pattern sets One-Fold CV)
        xvpat = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]

        self.failUnless( len(xvpat) == 10 )

        for i,p in enumerate(xvpat):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 90 )
            self.failUnless( p[1].nsamples == 10 )
            self.failUnless( p[1].chunks[0] == i )

开发者ID:psederberg，项目名称:PyMVPA，代码行数:14，代码来源:test_splitter.py

示例5: test_slicing

    def test_slicing(self):
        hs = HalfPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(hs.generate(self.data))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is self.data.samples)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]

        # with numpy 1.7.0b1 "chaining" was deprecated so let's create
        # check function appropriate for the given numpy version
        _a = np.arange(5)
        __a = _a[:4][:3]
        if __a.base is _a:
            # 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base is base

        elif __a.base.base is _a:
            # prior 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base.base is base

        else:
            raise RuntimeError("Uknown handling of .base by numpy")

        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples))
            assert_true(is_the_same_base(s[1].samples))
        spl = Splitter(attr="partitions", noslicing=True)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
        for s in splits:
            # we no slicing at all
            assert_false(s[0].samples.base is self.data.samples)
            assert_false(s[1].samples.base is self.data.samples)
        nfs = NFoldPartitioner()
        spl = Splitter(attr="partitions")
        splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
        for i, s in enumerate(splits):
            # training only first and last split
            if i == 0 or i == len(splits) - 1:
                assert_true(is_the_same_base(s[0].samples))
            else:
                assert_true(s[0].samples.base is None)
            # we get slicing all the time
            assert_true(is_the_same_base(s[1].samples))
        step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
        oes = OddEvenPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(oes.generate(step_ds))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is step_ds.samples)
        splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
        assert_equal(len(splits), 2)
        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples, step_ds.samples))
            assert_true(is_the_same_base(s[1].samples, step_ds.samples))

开发者ID:pckillerbrici，项目名称:PyMVPA，代码行数:60，代码来源:test_splitter.py

示例6: test_repeated_features

    def test_repeated_features(self):
        print self.dataset
        print self.dataset.fa.nonbogus_targets
        class CountFeatures(Measure):
            is_trained = True
            def _call(self, ds):
                return ds.nfeatures

        cf = CountFeatures()
        spl = Splitter('fa.nonbogus_targets')
        nsplits = len(list(spl.generate(self.dataset)))
        assert_equal(nsplits, 3)
        rm = RepeatedMeasure(cf, spl, concat_as='features')
        res = rm(self.dataset)
        assert_equal(res.shape, (1, nsplits))
        assert_array_equal(res.samples[0], [18,1,1])

开发者ID:arnaudsj，项目名称:PyMVPA，代码行数:16，代码来源:test_datameasure.py

示例7: test_counted_splitting

    def test_counted_splitting(self):
        spl = Splitter(attr='partitions')
        # count > #chunks, should result in 10 splits
        nchunks = len(self.data.sa['chunks'].unique)
        for strategy in Partitioner._STRATEGIES:
            for count, target in [ (nchunks*2, nchunks),
                                   (nchunks, nchunks),
                                   (nchunks-1, nchunks-1),
                                   (3, 3),
                                   (0, 0),
                                   (1, 1)
                                   ]:
                nfs = NFoldPartitioner(cvtype=1, count=count,
                                       selection_strategy=strategy)
                splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
                self.failUnless(len(splits) == target)
                chosenchunks = [int(s[1].uniquechunks) for s in splits]

                # Test if configuration matches as well
                nsplits_cfg = len(nfs.get_partition_specs(self.data))
                self.failUnlessEqual(nsplits_cfg, target)

                # Check if "lastsplit" dsattr was assigned appropriately
                nsplits = len(splits)
                if nsplits > 0:
                    # dummy-proof testing of last split
                    for ds_ in splits[-1]:
                        self.failUnless(ds_.a.lastpartitionset)
                    # test all now
                    for isplit,split in enumerate(splits):
                        for ds_ in split:
                            ds_.a.lastpartitionset == isplit==nsplits-1

                # Check results of different strategies
                if strategy == 'first':
                    self.failUnlessEqual(chosenchunks, range(target))
                elif strategy == 'equidistant':
                    if target == 3:
                        self.failUnlessEqual(chosenchunks, [0, 3, 7])
                elif strategy == 'random':
                    # none is selected twice
                    self.failUnless(len(set(chosenchunks)) == len(chosenchunks))
                    self.failUnless(target == len(chosenchunks))
                else:
                    raise RuntimeError, "Add unittest for strategy %s" \
                          % strategy

开发者ID:psederberg，项目名称:PyMVPA，代码行数:46，代码来源:test_splitter.py

示例8: _forward_dataset

 def _forward_dataset(self, ds):
     if self.__chunks_attr is None:
         return self._forward_dataset_helper(ds)
     else:
         # strip down dataset to speedup local processing
         if self.__attr_strategy == "remove":
             keep_sa = []
         else:
             keep_sa = None
         proc_ds = ds.copy(deep=False, sa=keep_sa, fa=[], a=[])
         # process all chunks individually
         # use a customsplitter to speed-up splitting
         spl = Splitter(self.__chunks_attr)
         dses = [self._forward_dataset_helper(d) for d in spl.generate(proc_ds)]
         # and merge them again
         mds = vstack(dses)
         # put back attributes
         mds.fa.update(ds.fa)
         mds.a.update(ds.a)
         return mds

开发者ID:robbisg，项目名称:PyMVPA，代码行数:20，代码来源:filters.py

示例9: test_svms

    def test_svms(self, clf):
        knows_probabilities = \
            'probabilities' in clf.ca.keys() and clf.params.probability
        enable_ca = ['estimates']
        if knows_probabilities:
            enable_ca += ['probabilities']

        clf.ca.change_temporarily(enable_ca = enable_ca)
        spl = Splitter('train', count=2)
        traindata, testdata = list(spl.generate(datasets['uni2small']))
        clf.train(traindata)
        predicts = clf.predict(testdata.samples)
        # values should be different from predictions for SVMs we have
        self.assertTrue(np.any(predicts != clf.ca.estimates))

        if knows_probabilities and clf.ca.is_set('probabilities'):
            # XXX test more thoroughly what we are getting here ;-)
            self.assertEqual( len(clf.ca.probabilities),
                                  len(testdata.samples)  )
        clf.ca.reset_changed_temporarily()

开发者ID:Anhmike，项目名称:PyMVPA，代码行数:20，代码来源:test_clf.py

示例10: test_exclude_targets_combinations

def test_exclude_targets_combinations():
    partitioner = ChainNode(
        [NFoldPartitioner(), ExcludeTargetsCombinationsPartitioner(k=2, targets_attr="targets", space="partitions")],
        space="partitions",
    )
    from mvpa2.misc.data_generators import normal_feature_dataset

    ds = normal_feature_dataset(snr=0.0, nlabels=4, perlabel=3, nchunks=3, nonbogus_features=[0, 1, 2, 3], nfeatures=4)
    partitions = list(partitioner.generate(ds))
    assert_equal(len(partitions), 3 * 6)
    splitter = Splitter("partitions")
    combs = []
    comb_chunks = []
    for p in partitions:
        trds, teds = list(splitter.generate(p))[:2]
        comb = tuple(np.unique(teds.targets))
        combs.append(comb)
        comb_chunks.append(comb + tuple(np.unique(teds.chunks)))
    assert_equal(len(set(combs)), 6)  # just 6 possible combinations of 2 out of 4
    assert_equal(len(set(comb_chunks)), 3 * 6)  # all unique

开发者ID:hanke，项目名称:PyMVPA，代码行数:20，代码来源:test_generators.py

示例11: test_half_split

    def test_half_split(self):
        hs = HalfPartitioner()
        spl = Splitter(attr='partitions')

        splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]

        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])

        # check if it works on pure odd and even chunk ids
        moresplits = [ list(spl.generate(p)) for p in hs.generate(splits[0][0])]

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)

开发者ID:psederberg，项目名称:PyMVPA，代码行数:24，代码来源:test_splitter.py

示例12: test_odd_even_split

    def test_odd_even_split(self):
        oes = OddEvenPartitioner()
        spl = Splitter(attr='partitions')

        splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]

        self.assertTrue(len(splits) == 2)

        for i,p in enumerate(splits):
            self.assertTrue( len(p) == 2 )
            self.assertTrue( p[0].nsamples == 50 )
            self.assertTrue( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])

        # check if it works on pure odd and even chunk ids
        moresplits = [ list(spl.generate(p)) for p in oes.generate(splits[0][0])]

        for split in moresplits:
            self.assertTrue(split[0] != None)
            self.assertTrue(split[1] != None)

开发者ID:PepGardiola，项目名称:PyMVPA，代码行数:24，代码来源:test_splitter.py

示例13: test_slicing

 def test_slicing(self):
     hs = HalfPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(hs.generate(self.data))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is self.data.samples)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is self.data.samples)
         assert_true(s[1].samples.base.base is self.data.samples)
     spl = Splitter(attr='partitions', noslicing=True)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     nfs = NFoldPartitioner()
     spl = Splitter(attr='partitions')
     splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base.base is self.data.samples)
         else:
             assert_true(s[0].samples.base is None)
         # we get slicing all the time
         assert_true(s[1].samples.base.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20,2),
                       sa={'chunks': np.tile([0,1], 10)})
     oes = OddEvenPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(oes.generate(step_ds))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is step_ds.samples)
     splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is step_ds.samples)
         assert_true(s[1].samples.base.base is step_ds.samples)

开发者ID:psederberg，项目名称:PyMVPA，代码行数:43，代码来源:test_splitter.py

示例14: test_splitter

def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
                   noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0,1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)

开发者ID:Soletmons，项目名称:PyMVPA，代码行数:47，代码来源:test_generators.py

示例15: _sl_call

    def _sl_call(self, dataset, roi_ids, nproc):
        """Call to GNBSearchlight
        """
        # Local bindings
        gnb = self.gnb
        params = gnb.params
        generator = self.generator
        errorfx = self.errorfx
        qe = self.queryengine

        ## if False:
        ##     class A(Learner):
        ##         pass
        ##     self = A()
        ##     import numpy as np
        ##     from mvpa2.clfs.gnb import GNB
        ##     from mvpa2.generators.partition import NFoldPartitioner
        ##     from mvpa2.misc.errorfx import mean_mismatch_error
        ##     from mvpa2.testing.datasets import datasets as tdatasets
        ##     from mvpa2.datasets import Dataset
        ##     from mvpa2.misc.neighborhood import IndexQueryEngine, Sphere
        ##     from mvpa2.clfs.distance import absmin_distance
        ##     import time
        ##     if __debug__:
        ##         from mvpa2.base import debug
        ##         debug.active += ['SLC.*']
        ##         # XXX is it that ugly?
        ##         debug.active.pop(debug.active.index('SLC_'))
        ##         debug.metrics += ['reltime']
        ##     dataset = tdatasets['3dlarge'].copy()
        ##     dataset.fa['voxel_indices'] = dataset.fa.myspace
        ##     sphere = Sphere(radius=1,
        ##                     distance_func=absmin_distance)
        ##     qe = IndexQueryEngine(myspace=sphere)

        ##     # Fracisco's data
        ##     #dataset = ds_fp
        ##     qe = IndexQueryEngine(voxel_indices=sphere)

        ##     qe.train(dataset)
        ##     roi_ids = np.arange(dataset.nfeatures)
        ##     gnb = GNB()
        ##     params = gnb.params
        ##     generator = NFoldPartitioner()
        ##     errorfx = mean_mismatch_error

        if __debug__:
            time_start = time.time()

        targets_sa_name = gnb.get_space()
        targets_sa = dataset.sa[targets_sa_name]

        if __debug__:
            debug_slc_ = 'SLC_' in debug.active

        # get the dataset information into easy vars
        X = dataset.samples
        if len(X.shape) != 2:
            raise ValueError, \
                  'Unlike GNB, GNBSearchlight (for now) operates on already' \
                  'flattened datasets'
        labels = targets_sa.value
        ulabels = targets_sa.unique
        nlabels = len(ulabels)
        label2index = dict((l, il) for il, l in enumerate(ulabels))
        labels_numeric = np.array([label2index[l] for l in labels])
        ulabels_numeric = [label2index[l] for l in ulabels]
        # set the feature dimensions
        nsamples = len(X)
        nrois = len(roi_ids)
        s_shape = X.shape[1:]           # shape of a single sample
        # The shape of results
        r_shape = (nrois,) + X.shape[2:]

        #
        # Everything toward optimization ;)
        #
        # Silly Yarik thinks that it might be worth to pre-compute
        # statistics per each feature within a block of the samples
        # which always come together in splits -- most often it is a
        # (chunk, label) combination, but since we simply use a
        # generator -- who knows! Therefore lets figure out what are
        # those blocks and operate on them instead of original samples.
        #
        # After additional thinking about this -- probably it would be
        # just minor additional improvements (ie not worth it) but
        # since it is coded already -- let it be so

        # 1. Query generator for the splits we will have
        if __debug__:
            debug('SLC',
                  'Phase 1. Initializing partitions using %s on %s'
                  % (generator, dataset))

        # Lets just create a dummy ds which will store for us actual sample
        # indicies
        # XXX we could make it even more lightweight I guess...
        dataset_indicies = Dataset(np.arange(nsamples), sa=dataset.sa)
        splitter = Splitter(attr=generator.get_space())
        splits = list(tuple(splitter.generate(ds_))
#.........这里部分代码省略.........

开发者ID:arnaudsj，项目名称:PyMVPA，代码行数:101，代码来源:gnbsearchlight.py

注：本文中的mvpa2.generators.splitters.Splitter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。