本文整理匯總了Python中mvpa2.generators.partition.NFoldPartitioner類的典型用法代碼示例。如果您正苦於以下問題:Python NFoldPartitioner類的具體用法?Python NFoldPartitioner怎麽用?Python NFoldPartitioner使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了NFoldPartitioner類的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: blocked_detection_n_equals_1
def blocked_detection_n_equals_1(mech_vec_list, mech_nm_list):
data, _ = mar.create_blocked_dataset_semantic_classes(mech_vec_list, mech_nm_list, append_robot = False)
nfs = NFoldPartitioner(cvtype=1, attr='targets') # 1-fold ?
spl = splitters.Splitter(attr='partitions')
splits = [list(spl.generate(x)) for x in nfs.generate(data)]
## splitter = NFoldSplitter(cvtype=1)
## label_splitter = NFoldSplitter(cvtype=1, attr='labels')
mean_thresh_known_mech_dict = {}
for l_wdata, l_vdata in splits:
mean_thresh_known_mech_list = []
Ms = mar.compute_Ms(data, l_vdata.targets[0], plot=True)
break
mechs = l_vdata.uniquechunks
for m in mechs:
n_std = 0.
all_trials = l_vdata.samples[np.where(l_vdata.chunks == m)]
le = all_trials.shape[1]
for i in range(all_trials.shape[0]):
one_trial = all_trials[i,:].reshape(1,le)
mn_list, std_list = mar.estimate_theta(one_trial, Ms, plot=False)
mn_arr, std_arr = np.array(mn_list), np.array(std_list)
n_std = max(n_std, np.max(np.abs(all_trials - mn_arr) / std_arr))
mean_thresh_known_mech_dict[m] = (Ms, n_std) # store on a per mechanism granularity
print 'n_std for', m, ':', n_std
print 'max error force for', m, ':', np.max(n_std*std_arr[2:])
示例2: test_slicing
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr="partitions")
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
# with numpy 1.7.0b1 "chaining" was deprecated so let's create
# check function appropriate for the given numpy version
_a = np.arange(5)
__a = _a[:4][:3]
if __a.base is _a:
# 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base is base
elif __a.base.base is _a:
# prior 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base.base is base
else:
raise RuntimeError("Uknown handling of .base by numpy")
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples))
assert_true(is_the_same_base(s[1].samples))
spl = Splitter(attr="partitions", noslicing=True)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr="partitions")
splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(is_the_same_base(s[0].samples))
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(is_the_same_base(s[1].samples))
step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr="partitions")
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples, step_ds.samples))
assert_true(is_the_same_base(s[1].samples, step_ds.samples))
示例3: test_simplest_cv_pat_gen
def test_simplest_cv_pat_gen(self):
# create the generator
nfs = NFoldPartitioner(cvtype=1)
spl = Splitter(attr='partitions')
# now get the xval pattern sets One-Fold CV)
xvpat = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless( len(xvpat) == 10 )
for i,p in enumerate(xvpat):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 90 )
self.failUnless( p[1].nsamples == 10 )
self.failUnless( p[1].chunks[0] == i )
示例4: test_counted_splitting
def test_counted_splitting(self):
spl = Splitter(attr='partitions')
# count > #chunks, should result in 10 splits
nchunks = len(self.data.sa['chunks'].unique)
for strategy in Partitioner._STRATEGIES:
for count, target in [ (nchunks*2, nchunks),
(nchunks, nchunks),
(nchunks-1, nchunks-1),
(3, 3),
(0, 0),
(1, 1)
]:
nfs = NFoldPartitioner(cvtype=1, count=count,
selection_strategy=strategy)
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless(len(splits) == target)
chosenchunks = [int(s[1].uniquechunks) for s in splits]
# Test if configuration matches as well
nsplits_cfg = len(nfs.get_partition_specs(self.data))
self.failUnlessEqual(nsplits_cfg, target)
# Check if "lastsplit" dsattr was assigned appropriately
nsplits = len(splits)
if nsplits > 0:
# dummy-proof testing of last split
for ds_ in splits[-1]:
self.failUnless(ds_.a.lastpartitionset)
# test all now
for isplit,split in enumerate(splits):
for ds_ in split:
ds_.a.lastpartitionset == isplit==nsplits-1
# Check results of different strategies
if strategy == 'first':
self.failUnlessEqual(chosenchunks, range(target))
elif strategy == 'equidistant':
if target == 3:
self.failUnlessEqual(chosenchunks, [0, 3, 7])
elif strategy == 'random':
# none is selected twice
self.failUnless(len(set(chosenchunks)) == len(chosenchunks))
self.failUnless(target == len(chosenchunks))
else:
raise RuntimeError, "Add unittest for strategy %s" \
% strategy
示例5: test_slicing
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr='partitions')
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is self.data.samples)
assert_true(s[1].samples.base.base is self.data.samples)
spl = Splitter(attr='partitions', noslicing=True)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(s[0].samples.base.base is self.data.samples)
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(s[1].samples.base.base is self.data.samples)
step_ds = Dataset(np.random.randn(20,2),
sa={'chunks': np.tile([0,1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr='partitions')
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is step_ds.samples)
assert_true(s[1].samples.base.base is step_ds.samples)
示例6: _test_gideon_weird_case
def _test_gideon_weird_case(self):
"""'The utter collapse' -- communicated by Peter J. Kohler
Desire to collapse all samples per each category in training
and testing sets, thus resulting only in a single
sample/category per training and per testing. As it is now,
CrossValidation on MappedClassifier would not work
observations: chance distribution obviously gets wide, but
also gets skewed to anti-learning on nfolds like 4.
"""
from mvpa2.mappers.fx import mean_group_sample
from mvpa2.clfs.knn import kNN
clf = kNN()
print "HERE"
ds = datasets['uni2large'].copy()
ds = ds[ds.sa.chunks < 9]
accs = []
for i in xrange(10): # # of random samples
ds.samples = np.random.randn(*ds.shape)
if False: # this would have been a native way IF we allowed change of number of samples
clf2 = MappedClassifier(clf=kNN(), #clf,
mapper=mean_group_sample(['targets', 'partitions']))
cv = CrossValidation(clf2, NFoldPartitioner(4), postproc=None,
enable_ca=['stats'])
print cv(ds)
else:
from mvpa2.clfs.transerror import ConfusionMatrix
partitioner = NFoldPartitioner(6)
meaner = mean_group_sample(['targets', 'partitions'])
cm = ConfusionMatrix()
te = TransferMeasure(clf, Splitter('partitions'),
postproc=BinaryFxNode(mean_mismatch_error,
'targets'),
enable_ca = ['stats']
)
for part in partitioner.generate(ds):
ds_meaned = meaner(part)
error = np.asscalar(te(ds_meaned))
cm += te.ca.stats
print i, cm.stats['ACC']
accs.append(cm.stats['ACC'])
示例7: test_gnbsearchlight_permutations
def test_gnbsearchlight_permutations():
import mvpa2
from mvpa2.base.node import ChainNode
from mvpa2.clfs.gnb import GNB
from mvpa2.generators.base import Repeater
from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
#import mvpa2.generators.permutation
#reload(mvpa2.generators.permutation)
from mvpa2.generators.permutation import AttributePermutator
from mvpa2.testing.datasets import datasets
from mvpa2.measures.base import CrossValidation
from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
from mvpa2.measures.searchlight import sphere_searchlight
from mvpa2.mappers.fx import mean_sample
from mvpa2.misc.errorfx import mean_mismatch_error
from mvpa2.clfs.stats import MCNullDist
from mvpa2.testing.tools import assert_raises, ok_, assert_array_less
# mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM']
# mvpa2.debug.metrics += ['pid']
count = 10
nproc = 1 + int(mvpa2.externals.exists('pprocess'))
ds = datasets['3dsmall'].copy()
ds.fa['voxel_indices'] = ds.fa.myspace
slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'],
center_ids=[1, 10, 70, 100])
mvpa2.seed(mvpa2._random_seed)
clf = GNB()
splt = NFoldPartitioner(cvtype=2, attr='chunks')
repeater = Repeater(count=count)
permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)
null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()),
postproc=mean_sample(), errorfx=mean_mismatch_error,
**slkwargs)
distr_est = MCNullDist(repeater, tail='left', measure=null_sl,
enable_ca=['dist_samples'])
sl = sphere_gnbsearchlight(clf, splt,
reuse_neighbors=True,
null_dist=distr_est, postproc=mean_sample(),
errorfx=mean_mismatch_error,
**slkwargs)
if __debug__: # assert is done only without -O mode
assert_raises(NotImplementedError, sl, ds)
# "ad-hoc searchlights can't handle yet varying targets across partitions"
if False:
# after above limitation is removed -- enable
sl_map = sl(ds)
sl_null_prob = sl.ca.null_prob.samples.copy()
mvpa2.seed(mvpa2._random_seed)
### 'normal' Searchlight
clf = GNB()
splt = NFoldPartitioner(cvtype=2, attr='chunks')
repeater = Repeater(count=count)
permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)
# rng=np.random.RandomState(0)) # to trigger failure since the same np.random state
# would be reused across all pprocesses
null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()),
postproc=mean_sample())
null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs)
distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal,
enable_ca=['dist_samples'])
cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error,
enable_ca=['stats'], postproc=mean_sample() )
sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs)
sl_map_normal = sl(ds)
sl_null_prob_normal = sl.ca.null_prob.samples.copy()
# For every feature -- we should get some variance in estimates In
# case of failure they are all really close to each other (up to
# numerical precision), so variance will be close to 0
assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0],
axis=1), -1e-5)
for s in distr_est_normal.ca.dist_samples.samples[0]:
ok_(len(np.unique(s)) > 1)
示例8: test_gideon_weird_case
def test_gideon_weird_case(self):
"""Test if MappedClassifier could handle a mapper altering number of samples
'The utter collapse' -- communicated by Peter J. Kohler
Desire to collapse all samples per each category in training
and testing sets, thus resulting only in a single
sample/category per training and per testing.
It is a peculiar scenario which pin points the problem that so
far mappers assumed not to change number of samples
"""
from mvpa2.mappers.fx import mean_group_sample
from mvpa2.clfs.knn import kNN
from mvpa2.mappers.base import ChainMapper
ds = datasets['uni2large'].copy()
#ds = ds[ds.sa.chunks < 9]
accs = []
k = 1 # for kNN
nf = 1 # for NFoldPartitioner
for i in xrange(1): # # of random runs
ds.samples = np.random.randn(*ds.shape)
#
# There are 3 ways to accomplish needed goal
#
# 0. Hard way: overcome the problem by manually
# pre-splitting/meaning in a loop
from mvpa2.clfs.transerror import ConfusionMatrix
partitioner = NFoldPartitioner(nf)
meaner = mean_group_sample(['targets', 'partitions'])
cm = ConfusionMatrix()
te = TransferMeasure(kNN(k), Splitter('partitions'),
postproc=BinaryFxNode(mean_mismatch_error,
'targets'),
enable_ca = ['stats']
)
errors = []
for part in partitioner.generate(ds):
ds_meaned = meaner(part)
errors.append(np.asscalar(te(ds_meaned)))
cm += te.ca.stats
#print i, cm.stats['ACC']
accs.append(cm.stats['ACC'])
if False: # not yet working -- see _tent/allow_ch_nsamples
# branch for attempt to make it work
# 1. This is a "native way" IF we allow change of number
# of samples via _call to be done by MappedClassifier
# while operating solely on the mapped dataset
clf2 = MappedClassifier(clf=kNN(k), #clf,
mapper=mean_group_sample(['targets', 'partitions']))
cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None,
enable_ca=['stats'])
# meaning all should be ok since we should have ballanced
# sets across all chunks here
errors_native = cv(ds)
self.assertEqual(np.max(np.abs(errors_native.samples[:,0] - errors)),
0)
# 2. Work without fixes to MappedClassifier allowing
# change of # of samples
#
# CrossValidation will operate on a chain mapper which
# would perform necessary meaning first before dealing with
# kNN cons: .stats would not be exposed since ChainMapper
# doesn't expose them from ChainMapper (yet)
if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active:
raise SkipTest("Known to fail while trying to enable "
"training_stats for the ChainMapper")
cv2 = CrossValidation(ChainMapper([mean_group_sample(['targets', 'partitions']),
kNN(k)],
space='targets'),
NFoldPartitioner(nf),
postproc=None)
errors_native2 = cv2(ds)
self.assertEqual(np.max(np.abs(errors_native2.samples[:,0] - errors)),
0)
# All of the ways should provide the same results
#print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \
# np.max(np.abs(errors_native2.samples[:,0] - errors))
if False: # just to investigate the distribution if we have enough iterations
import pylab as pl
uaccs = np.unique(accs)
step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4)))
bins = np.linspace(0., 1., np.round(1./step+1))
xx = pl.hist(accs, bins=bins, align='left')
pl.xlim((0. - step/2, 1.+step/2))
示例9: setup_classifier
def setup_classifier(**kwargs):
'''
Thinked!
'''
for arg in kwargs:
if arg == 'clf_type':
clf_type = kwargs[arg]
if arg == 'fsel':
f_sel = kwargs[arg]
if arg == 'cv_type':
cv_approach = kwargs[arg]
if arg == 'cv_folds':
if np.int(kwargs[arg]) == 0:
cv_type = np.float(kwargs[arg])
else:
cv_type = np.int(kwargs[arg])
if arg == 'permutations':
permutations = np.int(kwargs[arg])
if arg == 'cv_attribute':
attribute = kwargs[arg]
cv_n = cv_type
################# Classifier #######################
if clf_type == 'SVM':
clf = LinearCSVMC(C=1, probability=1, enable_ca=['probabilities'])
elif clf_type == 'GNB':
clf = GNB()
elif clf_type == 'LDA':
clf = LDA()
elif clf_type == 'QDA':
clf = QDA()
elif clf_type == 'SMLR':
clf = SMLR()
elif clf_type == 'RbfSVM':
sk_clf = SVC(gamma=0.1, C=1)
clf = SKLLearnerAdapter(sk_clf, enable_ca=['probabilities'])
elif clf_type == 'GP':
clf = GPR()
else:
clf = LinearCSVMC(C=1, probability=1, enable_ca=['probabilities'])
############## Feature Selection #########################
if f_sel == 'True':
logger.info('Feature Selection selected.')
fsel = SensitivityBasedFeatureSelection(OneWayAnova(),
FractionTailSelector(0.05,
mode='select',
tail='upper'))
fclf = FeatureSelectionClassifier(clf, fsel)
elif f_sel == 'Fixed':
logger.info('Fixed Feature Selection selected.')
fsel = SensitivityBasedFeatureSelection(OneWayAnova(),
FixedNElementTailSelector(100,
mode='select',
tail='upper'))
fclf = FeatureSelectionClassifier(clf, fsel)
elif f_sel == 'PCA':
from mvpa2.mappers.skl_adaptor import SKLTransformer
from sklearn.decomposition import PCA
logger.info('Fixed Feature Selection selected.')
fsel = SKLTransformer(PCA(n_components=45))
fclf = FeatureSelectionClassifier(clf, fsel)
else:
fclf = clf
######################### Permutations #############################
if permutations != 0:
if __debug__:
debug.active += ["STATMC"]
repeater = Repeater(count=permutations)
permutator = AttributePermutator('targets', limit={'partitions': 1},
count=1)
partitioner = NFoldPartitioner(cvtype=cv_n, attr=attribute)
null_cv = CrossValidation(
clf,
ChainNode([partitioner, permutator],
space=partitioner.get_space()),
errorfx=mean_mismatch_error)
distr_est = MCNullDist(repeater, tail='left', measure=null_cv,
enable_ca=['dist_samples'])
#postproc = mean_sample()
else:
distr_est = None
#postproc = None
########################################################
if cv_approach == 'n_fold':
if cv_type != 0:
splitter_used = NFoldPartitioner(cvtype=cv_type, attr=attribute)
else:
splitter_used = NFoldPartitioner(cvtype=1, attr=attribute)
else:
#.........這裏部分代碼省略.........
示例10: test_analyzer_with_split_classifier
def test_analyzer_with_split_classifier(self, clfds):
"""Test analyzers in split classifier
"""
clf, ds = clfds # unroll the tuple
# We need to skip some LARSes here
_sclf = str(clf)
if 'LARS(' in _sclf and "type='stepwise'" in _sclf:
# ADD KnownToFail thingie from NiPy
return
# To don't waste too much time testing lets limit to 3 splits
nsplits = 3
partitioner = NFoldPartitioner(count=nsplits)
mclf = SplitClassifier(clf=clf,
partitioner=partitioner,
enable_ca=['training_stats',
'stats'])
sana = mclf.get_sensitivity_analyzer(# postproc=absolute_features(),
pass_attr=['fa.nonbogus_targets'],
enable_ca=["sensitivities"])
ulabels = ds.uniquetargets
nlabels = len(ulabels)
# Can't rely on splitcfg since count-limit is done in __call__
assert(nsplits == len(list(partitioner.generate(ds))))
sens = sana(ds)
assert('nonbogus_targets' in sens.fa) # were they passsed?
# TODO: those few do not expose biases
if not len(set(clf.__tags__).intersection(('lars', 'glmnet', 'gpr'))):
assert('biases' in sens.sa)
# print sens.sa.biases
# It should return either ...
# nlabels * nsplits
req_nsamples = [ nlabels * nsplits ]
if nlabels == 2:
# A single sensitivity in case of binary
req_nsamples += [ nsplits ]
else:
# and for pairs in case of multiclass
req_nsamples += [ (nlabels * (nlabels - 1) / 2) * nsplits ]
# and for 1-vs-1 embedded within Multiclass operating on
# pairs (e.g. SMLR)
req_nsamples += [req_nsamples[-1] * 2]
# Also for regression_based -- they can do multiclass
# but only 1 sensitivity is provided
if 'regression_based' in clf.__tags__:
req_nsamples += [ nsplits ]
# # of features should correspond
self.assertEqual(sens.shape[1], ds.nfeatures)
# # of samples/sensitivities should also be reasonable
self.assertTrue(sens.shape[0] in req_nsamples)
# Check if labels are present
self.assertTrue('splits' in sens.sa)
self.assertTrue('targets' in sens.sa)
# should be 1D -- otherwise dtype object
self.assertTrue(sens.sa.targets.ndim == 1)
sens_ulabels = sens.sa['targets'].unique
# Some labels might be pairs(tuples) so ndarray would be of
# dtype object and we would need to get them all
if sens_ulabels.dtype is np.dtype('object'):
sens_ulabels = np.unique(
reduce(lambda x, y: x + y, [list(x) for x in sens_ulabels]))
assert_array_equal(sens_ulabels, ds.sa['targets'].unique)
errors = [x.percent_correct
for x in sana.clf.ca.stats.matrices]
# lets go through all sensitivities and see if we selected the right
# features
#if 'meta' in clf.__tags__ and len(sens.samples[0].nonzero()[0])<2:
if '5%' in clf.descr \
or (nlabels > 2 and 'regression_based' in clf.__tags__):
# Some meta classifiers (5% of ANOVA) are too harsh ;-)
# if we get less than 2 features with on-zero sensitivities we
# cannot really test
# Also -- regression based classifiers performance for multiclass
# is expected to suck in general
return
if cfg.getboolean('tests', 'labile', default='yes'):
for conf_matrix in [sana.clf.ca.training_stats] \
+ sana.clf.ca.stats.matrices:
self.assertTrue(
conf_matrix.percent_correct >= 70,
msg="We must have trained on each one more or " \
"less correctly. Got %f%% correct on %d labels" %
(conf_matrix.percent_correct,
nlabels))
# Since now we have per split and possibly per label -- lets just find
# mean per each feature per label across splits
sensm = FxMapper('samples', lambda x: np.sum(x),
uattrs=['targets']).forward(sens)
sensgm = maxofabs_sample().forward(sensm) # global max of abs of means
#.........這裏部分代碼省略.........
示例11: test_factorialpartitioner
def test_factorialpartitioner():
# Test against sifter and chainmap implemented in test_usecases
# -- code below copied from test_usecases --
# Let's simulate the beast -- 6 categories total groupped into 3
# super-ordinate, and actually without any 'superordinate' effect
# since subordinate categories independent
ds = normal_feature_dataset(
nlabels=6, snr=100, perlabel=30, nfeatures=6, nonbogus_features=range(6), nchunks=5 # pure signal! ;)
)
ds.sa["subord"] = ds.sa.targets.copy()
ds.sa["superord"] = ["super%d" % (int(i[1]) % 3,) for i in ds.targets] # 3 superord categories
# let's override original targets just to be sure that we aren't relying on them
ds.targets[:] = 0
# let's make two other datasets to test later
# one superordinate category only
ds_1super = ds.copy()
ds_1super.sa["superord"] = ["super1" for i in ds_1super.targets]
# one superordinate category has only one subordinate
# ds_unbalanced = ds.copy()
# nsuper1 = np.sum(ds_unbalanced.sa.superord == 'super1')
# mask_superord = ds_unbalanced.sa.superord == 'super1'
# uniq_subord = np.unique(ds_unbalanced.sa.subord[mask_superord])
# ds_unbalanced.sa.subord[mask_superord] = [uniq_subord[0] for i in range(nsuper1)]
ds_unbalanced = Dataset(range(4), sa={"subord": [0, 0, 1, 2], "superord": [1, 1, 2, 2]})
npart = ChainNode(
[
## so we split based on superord
NFoldPartitioner(len(ds.sa["superord"].unique), attr="subord"),
## so it should select only those splits where we took 1 from
## each of the superord categories leaving things in balance
Sifter([("partitions", 2), ("superord", {"uvalues": ds.sa["superord"].unique, "balanced": True})]),
],
space="partitions",
)
# now the new implementation
factpart = FactorialPartitioner(NFoldPartitioner(attr="subord"), attr="superord")
partitions_npart = [p.sa.partitions for p in npart.generate(ds)]
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds)]
assert_array_equal(np.sort(partitions_npart), np.sort(partitions_factpart))
# now let's check it behaves correctly if we have only one superord class
nfold = NFoldPartitioner(attr="subord")
partitions_nfold = [p.sa.partitions for p in nfold.generate(ds_1super)]
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_1super)]
assert_array_equal(np.sort(partitions_nfold), np.sort(partitions_factpart))
# smoke test for unbalanced subord classes
warning_msg = (
"One or more superordinate attributes do not have the same "
"number of subordinate attributes. This could yield to "
"unbalanced partitions."
)
with assert_warnings([(RuntimeWarning, warning_msg)]):
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_unbalanced)]
partitions_unbalanced = [np.array([2, 2, 2, 1]), np.array([2, 2, 1, 2])]
superord_unbalanced = [([2], [1, 1, 2]), ([2], [1, 1, 2])]
subord_unbalanced = [([2], [0, 0, 1]), ([1], [0, 0, 2])]
for out_part, true_part, super_out, sub_out in zip(
partitions_factpart, partitions_unbalanced, superord_unbalanced, subord_unbalanced
):
assert_array_equal(out_part, true_part)
assert_array_equal(
(ds_unbalanced[out_part == 1].sa.superord.tolist(), ds_unbalanced[out_part == 2].sa.superord.tolist()),
super_out,
)
assert_array_equal(
(ds_unbalanced[out_part == 1].sa.subord.tolist(), ds_unbalanced[out_part == 2].sa.subord.tolist()), sub_out
)
# now let's test on a dummy dataset
ds_dummy = Dataset(range(4), sa={"subord": range(4), "superord": [1, 2] * 2})
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_dummy)]
assert_array_equal(partitions_factpart, [[2, 2, 1, 1], [2, 1, 1, 2], [1, 2, 2, 1], [1, 1, 2, 2]])
示例12: generate_roc_curve
def generate_roc_curve(mech_vec_list, mech_nm_list,
semantic_range = np.arange(0.2, 2.7, 0.3),
mech_range = np.arange(0.2, 6.5, 0.7),
n_prev_trials = 1, prev_c = 'r',
plot_prev=True, sem_c = 'b', sem_m = '+',
plot_semantic=True, semantic_label='operating 1st time and \n known mechanism class'):
t_nm_list, t_mech_vec_list = [], []
for i, nm in enumerate(mech_nm_list):
## print 'nm:', nm
if 'known' in nm:
continue
t_nm_list.append(nm)
t_mech_vec_list.append(mech_vec_list[i])
data, _ = mar.create_blocked_dataset_semantic_classes(t_mech_vec_list, t_nm_list, append_robot = False)
## label_splitter = NFoldSplitter(cvtype=1, attr='labels')
thresh_dict = ut.load_pickle('blocked_thresh_dict.pkl') # human + robot data
mean_charlie_dict = thresh_dict['mean_charlie']
mean_known_mech_dict = thresh_dict['mean_known_mech']
#---------------- semantic class prior -------------
if plot_semantic:
fp_l_l = []
mn_l_l = []
err_l_l = []
mech_fp_l_l = []
mech_mn_l_l = []
mech_err_l_l = []
nfs = NFoldPartitioner(cvtype=1, attr='targets') # 1-fold ?
label_splitter = splitters.Splitter(attr='partitions')
splits = [list(label_splitter.generate(x)) for x in nfs.generate(data)]
# Grouping by labels
for l_wdata, l_vdata in splits: #label_splitter(data):
print "Number of data: ", len(l_vdata.chunks)
# Why zero??? Do we want specific chunk? -> changed into 10
lab = l_vdata.targets[0] # all same label
chunk = l_vdata.chunks[0] # chunk should be independant!!
trials = l_vdata.samples
if lab == 'Refrigerator':
lab = 'Fridge'
## tot_mean = None
## tot_std = None
## for chunk in l_vdata.chunks:
## _, mean, std = mean_charlie_dict[chunk] # mean except the specified chunk in same class
## if tot_mean is None:
## tot_mean = mean
## tot_std = std
## else:
## tot_mean += mean
## tot_std += std
## print chunk, mean[0], tot_mean[0]
## mean = tot_mean/float(len(l_vdata.chunks))
## std = tot_std/float(len(l_vdata.chunks))
## print mean[0], tot_mean[0], float(len(l_vdata.chunks))
## sys.exit()
# Select evaluation chunk for the ROC ?
## _, mean, std = mean_charlie_dict[lab]
_, mean, std = mean_charlie_dict[chunk]
# cutting into the same length
min_len = min(len(mean), trials.shape[1])
trials = trials[:,:min_len]
mean = mean[:min_len]
std = std[:min_len] #???
mn_list = []
fp_list, err_list = [], []
for n in semantic_range:
err = (mean + n*std) - trials
#false_pos = np.sum(np.any(err<0, 1))
#tot = trials.shape[0]
false_pos = np.sum(err<0) # Count false cases
tot = trials.shape[0] * trials.shape[1]
fp_list.append(false_pos/(tot*0.01))
err = err[np.where(err>0)]
err_list.append(err.flatten())
mn_list.append(np.mean(err))
err_l_l.append(err_list)
fp_l_l.append(fp_list)
mn_l_l.append(mn_list)
ll = [[] for i in err_l_l[0]] # why 0?
for i,e in enumerate(err_l_l): # labels
for j,l in enumerate(ll): # multiplier range
l.append(e[j])
std_list = []
#.........這裏部分代碼省略.........