本文整理汇总了Python中mvpa2.generators.splitters.Splitter.generate方法的典型用法代码示例。如果您正苦于以下问题:Python Splitter.generate方法的具体用法?Python Splitter.generate怎么用?Python Splitter.generate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mvpa2.generators.splitters.Splitter
的用法示例。
在下文中一共展示了Splitter.generate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_custom_split
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_custom_split(self):
#simulate half splitter
hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])
# check fully customized split with working and validation set specified
cs = CustomPartitioner([([0,3,4],[5,9])])
# we want to discared the unselected partition of the data, hence attr_value
# these two splitters should do exactly the same thing
splitters = (Splitter(attr='partitions', attr_values=[1,2]),
Splitter(attr='partitions', ignore_values=(0,)))
for spl in splitters:
splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
self.failUnless(len(splits) == 1)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 30 )
self.failUnless( p[1].nsamples == 20 )
self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
示例2: test_slicing
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr="partitions")
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
# with numpy 1.7.0b1 "chaining" was deprecated so let's create
# check function appropriate for the given numpy version
_a = np.arange(5)
__a = _a[:4][:3]
if __a.base is _a:
# 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base is base
elif __a.base.base is _a:
# prior 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base.base is base
else:
raise RuntimeError("Uknown handling of .base by numpy")
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples))
assert_true(is_the_same_base(s[1].samples))
spl = Splitter(attr="partitions", noslicing=True)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr="partitions")
splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(is_the_same_base(s[0].samples))
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(is_the_same_base(s[1].samples))
step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr="partitions")
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples, step_ds.samples))
assert_true(is_the_same_base(s[1].samples, step_ds.samples))
示例3: test_repeated_features
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_repeated_features(self):
class CountFeatures(Measure):
is_trained = True
def _call(self, ds):
return Dataset([ds.nfeatures],
fa={'nonbogus_targets': list(ds.fa['nonbogus_targets'].unique)})
cf = CountFeatures()
spl = Splitter('fa.nonbogus_targets')
nsplits = len(list(spl.generate(self.dataset)))
assert_equal(nsplits, 3)
rm = RepeatedMeasure(cf, spl, concat_as='features')
res = rm(self.dataset)
assert_equal(res.shape, (1, nsplits))
# due to https://github.com/numpy/numpy/issues/641 we are
# using list(set(...)) construct and there order of
# nonbogus_targets.unique can vary from run to run, thus there
# is no guarantee that we would get 18 first, which is a
# questionable assumption anyways, thus performing checks
# which do not require any specific order.
# And yet due to another issue
# https://github.com/numpy/numpy/issues/3759
# we can't just == None for the bool mask
None_fa = np.array([x == None for x in res.fa.nonbogus_targets])
assert_array_equal(res.samples[0, None_fa], [18])
assert_array_equal(res.samples[0, ~None_fa], [1, 1])
if sys.version_info[0] < 3:
# with python2 order seems to be consistent
assert_array_equal(res.samples[0], [18, 1, 1])
示例4: test_splitter
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_splitter():
ds = give_data()
# split with defaults
spl1 = Splitter('chunks')
assert_raises(NotImplementedError, spl1, ds)
splits = list(spl1.generate(ds))
assert_equal(len(splits), len(ds.sa['chunks'].unique))
for split in splits:
# it should have perform basic slicing!
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.sa['chunks'].unique), 1)
assert_true('lastsplit' in split.a)
assert_true(splits[-1].a.lastsplit)
# now again, more customized
spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
noslicing=True)
splits = list(spl2.generate(ds))
assert_equal(len(splits), 4)
for split in splits:
# it should NOT have perform basic slicing!
assert_false(split.samples.base is ds.samples)
assert_equal(len(split.sa['targets'].unique), 1)
assert_equal(len(split.sa['chunks'].unique), 10)
assert_true(splits[-1].a.lastsplit)
# two should be identical
assert_array_equal(splits[1].samples, splits[2].samples)
# now go wild and split by feature attribute
ds.fa['roi'] = np.repeat([0,1], 5)
# splitter should auto-detect that this is a feature attribute
spl3 = Splitter('roi')
splits = list(spl3.generate(ds))
assert_equal(len(splits), 2)
for split in splits:
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.fa['roi'].unique), 1)
assert_equal(split.shape, (100, 5))
# and finally test chained splitters
cspl = ChainNode([spl2, spl3, spl1])
splits = list(cspl.generate(ds))
# 4 target splits and 2 roi splits each and 10 chunks each
assert_equal(len(splits), 80)
示例5: test_label_splitter
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_label_splitter(self):
oes = OddEvenPartitioner(attr='targets')
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]
assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
示例6: test_slicing
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr='partitions')
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is self.data.samples)
assert_true(s[1].samples.base.base is self.data.samples)
spl = Splitter(attr='partitions', noslicing=True)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(s[0].samples.base.base is self.data.samples)
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(s[1].samples.base.base is self.data.samples)
step_ds = Dataset(np.random.randn(20,2),
sa={'chunks': np.tile([0,1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr='partitions')
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is step_ds.samples)
assert_true(s[1].samples.base.base is step_ds.samples)
示例7: test_simplest_cv_pat_gen
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_simplest_cv_pat_gen(self):
# create the generator
nfs = NFoldPartitioner(cvtype=1)
spl = Splitter(attr='partitions')
# now get the xval pattern sets One-Fold CV)
xvpat = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless( len(xvpat) == 10 )
for i,p in enumerate(xvpat):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 90 )
self.failUnless( p[1].nsamples == 10 )
self.failUnless( p[1].chunks[0] == i )
示例8: test_half_split
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_half_split(self):
hs = HalfPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in hs.generate(splits[0][0])]
for split in moresplits:
self.failUnless(split[0] != None)
self.failUnless(split[1] != None)
示例9: test_odd_even_split
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_odd_even_split(self):
oes = OddEvenPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]
self.assertTrue(len(splits) == 2)
for i,p in enumerate(splits):
self.assertTrue( len(p) == 2 )
self.assertTrue( p[0].nsamples == 50 )
self.assertTrue( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in oes.generate(splits[0][0])]
for split in moresplits:
self.assertTrue(split[0] != None)
self.assertTrue(split[1] != None)
示例10: test_repeated_features
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_repeated_features(self):
print self.dataset
print self.dataset.fa.nonbogus_targets
class CountFeatures(Measure):
is_trained = True
def _call(self, ds):
return ds.nfeatures
cf = CountFeatures()
spl = Splitter('fa.nonbogus_targets')
nsplits = len(list(spl.generate(self.dataset)))
assert_equal(nsplits, 3)
rm = RepeatedMeasure(cf, spl, concat_as='features')
res = rm(self.dataset)
assert_equal(res.shape, (1, nsplits))
assert_array_equal(res.samples[0], [18,1,1])
示例11: test_counted_splitting
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_counted_splitting(self):
spl = Splitter(attr='partitions')
# count > #chunks, should result in 10 splits
nchunks = len(self.data.sa['chunks'].unique)
for strategy in Partitioner._STRATEGIES:
for count, target in [ (nchunks*2, nchunks),
(nchunks, nchunks),
(nchunks-1, nchunks-1),
(3, 3),
(0, 0),
(1, 1)
]:
nfs = NFoldPartitioner(cvtype=1, count=count,
selection_strategy=strategy)
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless(len(splits) == target)
chosenchunks = [int(s[1].uniquechunks) for s in splits]
# Test if configuration matches as well
nsplits_cfg = len(nfs.get_partition_specs(self.data))
self.failUnlessEqual(nsplits_cfg, target)
# Check if "lastsplit" dsattr was assigned appropriately
nsplits = len(splits)
if nsplits > 0:
# dummy-proof testing of last split
for ds_ in splits[-1]:
self.failUnless(ds_.a.lastpartitionset)
# test all now
for isplit,split in enumerate(splits):
for ds_ in split:
ds_.a.lastpartitionset == isplit==nsplits-1
# Check results of different strategies
if strategy == 'first':
self.failUnlessEqual(chosenchunks, range(target))
elif strategy == 'equidistant':
if target == 3:
self.failUnlessEqual(chosenchunks, [0, 3, 7])
elif strategy == 'random':
# none is selected twice
self.failUnless(len(set(chosenchunks)) == len(chosenchunks))
self.failUnless(target == len(chosenchunks))
else:
raise RuntimeError, "Add unittest for strategy %s" \
% strategy
示例12: test_exclude_targets_combinations
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_exclude_targets_combinations():
partitioner = ChainNode(
[NFoldPartitioner(), ExcludeTargetsCombinationsPartitioner(k=2, targets_attr="targets", space="partitions")],
space="partitions",
)
from mvpa2.misc.data_generators import normal_feature_dataset
ds = normal_feature_dataset(snr=0.0, nlabels=4, perlabel=3, nchunks=3, nonbogus_features=[0, 1, 2, 3], nfeatures=4)
partitions = list(partitioner.generate(ds))
assert_equal(len(partitions), 3 * 6)
splitter = Splitter("partitions")
combs = []
comb_chunks = []
for p in partitions:
trds, teds = list(splitter.generate(p))[:2]
comb = tuple(np.unique(teds.targets))
combs.append(comb)
comb_chunks.append(comb + tuple(np.unique(teds.chunks)))
assert_equal(len(set(combs)), 6) # just 6 possible combinations of 2 out of 4
assert_equal(len(set(comb_chunks)), 3 * 6) # all unique
示例13: _forward_dataset
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def _forward_dataset(self, ds):
if self.__chunks_attr is None:
return self._forward_dataset_helper(ds)
else:
# strip down dataset to speedup local processing
if self.__attr_strategy == "remove":
keep_sa = []
else:
keep_sa = None
proc_ds = ds.copy(deep=False, sa=keep_sa, fa=[], a=[])
# process all chunks individually
# use a customsplitter to speed-up splitting
spl = Splitter(self.__chunks_attr)
dses = [self._forward_dataset_helper(d) for d in spl.generate(proc_ds)]
# and merge them again
mds = vstack(dses)
# put back attributes
mds.fa.update(ds.fa)
mds.a.update(ds.a)
return mds
示例14: test_svms
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_svms(self, clf):
knows_probabilities = \
'probabilities' in clf.ca.keys() and clf.params.probability
enable_ca = ['estimates']
if knows_probabilities:
enable_ca += ['probabilities']
clf.ca.change_temporarily(enable_ca = enable_ca)
spl = Splitter('train', count=2)
traindata, testdata = list(spl.generate(datasets['uni2small']))
clf.train(traindata)
predicts = clf.predict(testdata.samples)
# values should be different from predictions for SVMs we have
self.assertTrue(np.any(predicts != clf.ca.estimates))
if knows_probabilities and clf.ca.is_set('probabilities'):
# XXX test more thoroughly what we are getting here ;-)
self.assertEqual( len(clf.ca.probabilities),
len(testdata.samples) )
clf.ca.reset_changed_temporarily()
示例15: test_n_group_split
# 需要导入模块: from mvpa2.generators.splitters import Splitter [as 别名]
# 或者: from mvpa2.generators.splitters.Splitter import generate [as 别名]
def test_n_group_split(self):
"""Test NGroupSplitter alongside with the reversal of the
order of spit out datasets
"""
# Test 2 groups like HalfSplitter first
hs = NGroupPartitioner(2)
for isreversed, splitter in enumerate((hs, hs)):
if isreversed:
spl = Splitter(attr='partitions', reverse=True)
else:
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i, p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1-isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4])
assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
[5, 6, 7, 8, 9])
assert_array_equal(splits[1][1-isreversed].sa['chunks'].unique,
[5, 6, 7, 8, 9])
assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in hs.generate(splits[0][0])]
for split in moresplits:
self.failUnless(split[0] != None)
self.failUnless(split[1] != None)
# now test more groups
s5 = NGroupPartitioner(5)
# get the splits
for isreversed, s5splitter in enumerate((s5, s5)):
if isreversed:
spl = Splitter(attr='partitions', reverse=True)
else:
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in s5splitter.generate(self.data) ]
# must have 10 splits
self.failUnless(len(splits) == 5)
# check split content
assert_array_equal(splits[0][1-isreversed].sa['chunks'].unique,
[0, 1])
assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
[2, 3, 4, 5, 6, 7, 8, 9])
assert_array_equal(splits[1][1-isreversed].sa['chunks'].unique,
[2, 3])
assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
[0, 1, 4, 5, 6, 7, 8, 9])
# ...
assert_array_equal(splits[4][1-isreversed].sa['chunks'].unique,
[8, 9])
assert_array_equal(splits[4][isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4, 5, 6, 7])
# Test for too many groups
def splitcall(spl, dat):
return list(spl.generate(dat))
s20 = NGroupPartitioner(20)
self.assertRaises(ValueError,splitcall,s20,self.data)