本文整理匯總了Python中mvpa2.base.node.ChainNode類的典型用法代碼示例。如果您正苦於以下問題:Python ChainNode類的具體用法?Python ChainNode怎麽用?Python ChainNode使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了ChainNode類的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_sifter_with_balancing
def test_sifter_with_balancing():
# extended previous test which was already
# "... somewhat duplicating the doctest"
ds = Dataset(samples=np.arange(12).reshape((-1, 2)),
sa={'chunks': [ 0 , 1 , 2 , 3 , 4, 5 ],
'targets': ['c', 'c', 'c', 'p', 'p', 'p']})
# Without sifter -- just to assure that we do get all of them
# i.e. 6*5*4*3/(4!) = 15
par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')])
assert_equal(len(list(par.generate(ds))), 15)
# so we will take 4 chunks out of available 7, but would care only
# about those partitions where we have balanced number of 'c' and 'p'
# entries
assert_raises(ValueError,
lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)),
ds)
par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks'),
Sifter([('partitions', 2),
('targets',
dict(uvalues=['c', 'p'],
balanced=True))])
])
dss = list(par.generate(ds))
# print [ x[x.sa.partitions==2].sa.targets for x in dss ]
assert_equal(len(dss), 9)
for ds_ in dss:
testing = ds[ds_.sa.partitions == 2]
assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
# and we still have both targets present in training
training = ds[ds_.sa.partitions == 1]
assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
示例2: _test_edmund_chong_20120907
def _test_edmund_chong_20120907(): # pragma: no cover
# commented out to avoid syntax warnings while compiling
# from mvpa2.suite import *
from mvpa2.testing.datasets import datasets
repeater = Repeater(count=20)
partitioner = ChainNode([NFoldPartitioner(cvtype=1),
Balancer(attr='targets',
count=1, # for real data > 1
limit='partitions',
apply_selection=True
)],
space='partitions')
clf = LinearCSVMC() #choice of classifier
permutator = AttributePermutator('targets', limit={'partitions': 1},
count=1)
null_cv = CrossValidation(
clf,
ChainNode([partitioner, permutator], space=partitioner.get_space()),
errorfx=mean_mismatch_error)
distr_est = MCNullDist(repeater, tail='left', measure=null_cv,
enable_ca=['dist_samples'])
cvte = CrossValidation(clf, partitioner,
errorfx=mean_mismatch_error,
null_dist=distr_est,
enable_ca=['stats'])
errors = cvte(datasets['uni2small'])
示例3: test_exclude_targets_combinations_subjectchunks
def test_exclude_targets_combinations_subjectchunks():
partitioner = ChainNode([NFoldPartitioner(attr='subjects'),
ExcludeTargetsCombinationsPartitioner(
k=1,
targets_attr='chunks',
space='partitions')],
space='partitions')
# targets do not need even to be defined!
ds = Dataset(np.arange(18).reshape(9, 2),
sa={'chunks': np.arange(9) // 3,
'subjects': np.arange(9) % 3})
dss = list(partitioner.generate(ds))
assert_equal(len(dss), 9)
testing_subjs, testing_chunks = [], []
for ds_ in dss:
testing_partition = ds_.sa.partitions == 2
training_partition = ds_.sa.partitions == 1
# must be scalars -- so implicit test here
# if not -- would be error
testing_subj = np.asscalar(np.unique(ds_.sa.subjects[testing_partition]))
testing_subjs.append(testing_subj)
testing_chunk = np.asscalar(np.unique(ds_.sa.chunks[testing_partition]))
testing_chunks.append(testing_chunk)
# and those must not appear for training
ok_(not testing_subj in ds_.sa.subjects[training_partition])
ok_(not testing_chunk in ds_.sa.chunks[training_partition])
# and we should have gone through all chunks/subjs pairs
testing_pairs = set(zip(testing_subjs, testing_chunks))
assert_equal(len(testing_pairs), 9)
# yoh: equivalent to set(itertools.product(range(3), range(3))))
# but .product is N/A for python2.5
assert_equal(testing_pairs, set(zip(*np.where(np.ones((3,3))))))
示例4: test_permute_superord
def test_permute_superord():
from mvpa2.base.node import ChainNode
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.generators.base import Sifter
from mvpa2.generators.permutation import AttributePermutator
ds = _get_superord_dataset()
# mvpa2.seed(1)
part = ChainNode([
## so we split based on superord
NFoldPartitioner(len(ds.sa['superord'].unique),
attr='subord'),
## so it should select only those splits where we took 1 from
## each of the superord categories leaving things in balance
Sifter([('partitions', 2),
('superord',
{ 'uvalues': ds.sa['superord'].unique,
'balanced': True})]),
AttributePermutator(['superord'], limit=['partitions',
'chunks']),
], space='partitions')
for ds_perm in part.generate(ds):
# it does permutation
assert(np.sum(ds_perm.sa.superord != ds.sa.superord) != 0)
示例5: test_splitter
def test_splitter():
ds = give_data()
# split with defaults
spl1 = Splitter('chunks')
assert_raises(NotImplementedError, spl1, ds)
splits = list(spl1.generate(ds))
assert_equal(len(splits), len(ds.sa['chunks'].unique))
for split in splits:
# it should have perform basic slicing!
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.sa['chunks'].unique), 1)
assert_true('lastsplit' in split.a)
assert_true(splits[-1].a.lastsplit)
# now again, more customized
spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
noslicing=True)
splits = list(spl2.generate(ds))
assert_equal(len(splits), 4)
for split in splits:
# it should NOT have perform basic slicing!
assert_false(split.samples.base is ds.samples)
assert_equal(len(split.sa['targets'].unique), 1)
assert_equal(len(split.sa['chunks'].unique), 10)
assert_true(splits[-1].a.lastsplit)
# two should be identical
assert_array_equal(splits[1].samples, splits[2].samples)
# now go wild and split by feature attribute
ds.fa['roi'] = np.repeat([0,1], 5)
# splitter should auto-detect that this is a feature attribute
spl3 = Splitter('roi')
splits = list(spl3.generate(ds))
assert_equal(len(splits), 2)
for split in splits:
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.fa['roi'].unique), 1)
assert_equal(split.shape, (100, 5))
# and finally test chained splitters
cspl = ChainNode([spl2, spl3, spl1])
splits = list(cspl.generate(ds))
# 4 target splits and 2 roi splits each and 10 chunks each
assert_equal(len(splits), 80)
示例6: test_sifter
def test_sifter():
# somewhat duplicating the doctest
ds = Dataset(samples=np.arange(8).reshape((4,2)),
sa={'chunks': [ 0 , 1 , 2 , 3 ],
'targets': ['c', 'c', 'p', 'p']})
par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
Sifter([('partitions', 2),
('targets', ['c', 'p'])])
])
dss = list(par.generate(ds))
assert_equal(len(dss), 4)
for ds_ in dss:
testing = ds[ds_.sa.partitions == 2]
assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
# and we still have both targets present in training
training = ds[ds_.sa.partitions == 1]
assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
示例7: test_sifter
def test_sifter():
# somewhat duplicating the doctest
ds = Dataset(samples=np.arange(8).reshape((4, 2)), sa={"chunks": [0, 1, 2, 3], "targets": ["c", "c", "p", "p"]})
for sift_targets_definition in (["c", "p"], dict(uvalues=["c", "p"])):
par = ChainNode(
[
NFoldPartitioner(cvtype=2, attr="chunks"),
Sifter([("partitions", 2), ("targets", sift_targets_definition)]),
]
)
dss = list(par.generate(ds))
assert_equal(len(dss), 4)
for ds_ in dss:
testing = ds[ds_.sa.partitions == 2]
assert_array_equal(np.unique(testing.sa.targets), ["c", "p"])
# and we still have both targets present in training
training = ds[ds_.sa.partitions == 1]
assert_array_equal(np.unique(training.sa.targets), ["c", "p"])
示例8: test_discarded_boundaries
def test_discarded_boundaries(self):
ds = datasets["hollow"]
# four runs
ds.sa["chunks"] = np.repeat(np.arange(4), 10)
# do odd even splitting for lots of boundaries in few splits
part = ChainNode([OddEvenPartitioner(), StripBoundariesSamples("chunks", 1, 2)])
parts = [d.samples.sid for d in part.generate(ds)]
# both dataset should have the same samples, because the boundaries are
# identical and the same sample should be stripped
assert_array_equal(parts[0], parts[1])
# we strip 3 samples per boundary
assert_equal(len(parts[0]), len(ds) - (3 * 3))
for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
assert_false(i in parts[0])
示例9: test_exclude_targets_combinations
def test_exclude_targets_combinations():
partitioner = ChainNode(
[NFoldPartitioner(), ExcludeTargetsCombinationsPartitioner(k=2, targets_attr="targets", space="partitions")],
space="partitions",
)
from mvpa2.misc.data_generators import normal_feature_dataset
ds = normal_feature_dataset(snr=0.0, nlabels=4, perlabel=3, nchunks=3, nonbogus_features=[0, 1, 2, 3], nfeatures=4)
partitions = list(partitioner.generate(ds))
assert_equal(len(partitions), 3 * 6)
splitter = Splitter("partitions")
combs = []
comb_chunks = []
for p in partitions:
trds, teds = list(splitter.generate(p))[:2]
comb = tuple(np.unique(teds.targets))
combs.append(comb)
comb_chunks.append(comb + tuple(np.unique(teds.chunks)))
assert_equal(len(set(combs)), 6) # just 6 possible combinations of 2 out of 4
assert_equal(len(set(comb_chunks)), 3 * 6) # all unique
示例10: test_split_clf_on_chainpartitioner
def test_split_clf_on_chainpartitioner(self):
# pretty much a smoke test for #156
ds = datasets['uni2small']
part = ChainNode([NFoldPartitioner(cvtype=1),
Balancer(attr='targets', count=2,
limit='partitions', apply_selection=True)])
partitions = list(part.generate(ds))
sclf = SplitClassifier(sample_clf_lin, part, enable_ca=['stats', 'splits'])
sclf.train(ds)
pred = sclf.predict(ds)
assert_equal(len(pred), len(ds)) # rudimentary check
assert_equal(len(sclf.ca.splits), len(partitions))
assert_equal(len(sclf.clfs), len(partitions))
# now let's do sensitivity analyzer just in case
sclf.untrain()
sensana = sclf.get_sensitivity_analyzer()
sens = sensana(ds)
# basic check that sensitivities varied across splits
from mvpa2.mappers.fx import FxMapper
sens_stds = FxMapper('samples', np.std, uattrs=['targets'])(sens)
assert_true(np.any(sens_stds != 0))
示例11: get_crossvalidation_instance
def get_crossvalidation_instance(learner, partitioner, errorfx,
sampling_repetitions=1,
learner_space='targets',
balance_training=None,
permutations=0,
avg_datafold_results=True,
prob_tail='left'):
from mvpa2.base.node import ChainNode
from mvpa2.measures.base import CrossValidation
if not balance_training is None:
# balance training data
try:
amount = int(balance_training)
except ValueError:
try:
amount = float(balance_training)
except ValueError:
amount = balance_training
from mvpa2.generators.resampling import Balancer
balancer = Balancer(amount=amount, attr=learner_space,
count=sampling_repetitions,
limit={partitioner.get_space(): 1},
apply_selection=True,
include_offlimit=True)
else:
balancer = None
# set learner space
learner.set_space(learner_space)
# setup generator for data folding -- put in a chain node for easy
# amending
gennode = ChainNode([partitioner], space=partitioner.get_space())
if avg_datafold_results:
from mvpa2.mappers.fx import mean_sample
postproc = mean_sample()
else:
postproc = None
if not balancer is None:
# enable balancing step for each partitioning step
gennode.append(balancer)
if permutations > 0:
from mvpa2.generators.base import Repeater
from mvpa2.generators.permutation import AttributePermutator
from mvpa2.clfs.stats import MCNullDist
# how often do we want to shuffle the data
repeater = Repeater(count=permutations)
# permute the training part of a dataset exactly ONCE
permutator = AttributePermutator(
learner_space,
limit={partitioner.get_space(): 1},
count=1)
# CV with null-distribution estimation that permutes the training data for
# each fold independently
perm_gen_node = copy.deepcopy(gennode)
perm_gen_node.append(permutator)
null_cv = CrossValidation(learner,
perm_gen_node,
postproc=postproc,
errorfx=errorfx)
# Monte Carlo distribution estimator
distr_est = MCNullDist(repeater,
tail=prob_tail,
measure=null_cv,
enable_ca=['dist_samples'])
# pass the p-values as feature attributes on to the results
pass_attr = [('ca.null_prob', 'fa', 1)]
else:
distr_est = None
pass_attr = None
# final CV node
cv = CrossValidation(learner,
gennode,
errorfx=errorfx,
null_dist=distr_est,
postproc=postproc,
enable_ca=['stats', 'null_prob'],
pass_attr=pass_attr)
return cv
示例12: _call
def _call(self, ds):
return ChainNode._call(self, ds)
示例13: plot_feature_hist
def plot_feature_hist(dataset, xlim=None, noticks=True,
targets_attr='targets', chunks_attr=None,
**kwargs):
"""Plot histograms of feature values for each labels.
Parameters
----------
dataset : Dataset
xlim : None or 2-tuple
Common x-axis limits for all histograms.
noticks : bool
If True, no axis ticks will be plotted. This is useful to save
space in large plots.
targets_attr : string, optional
Name of samples attribute to be used as targets
chunks_attr : None or string
If a string, a histogram will be plotted per each target and each
chunk (as defined in sa named `chunks_attr`), resulting is a
histogram grid (targets x chunks).
**kwargs
Any additional arguments are passed to matplotlib's hist().
"""
lsplit = ChainNode([NFoldPartitioner(1, attr=targets_attr),
Splitter('partitions', attr_values=[2])])
csplit = ChainNode([NFoldPartitioner(1, attr=chunks_attr),
Splitter('partitions', attr_values=[2])])
nrows = len(dataset.sa[targets_attr].unique)
ncols = len(dataset.sa[chunks_attr].unique)
def doplot(data):
"""Just a little helper which plots the histogram and removes
ticks etc"""
pl.hist(data, **kwargs)
if xlim is not None:
pl.xlim(xlim)
if noticks:
pl.yticks([])
pl.xticks([])
fig = 1
# for all labels
for row, ds in enumerate(lsplit.generate(dataset)):
if chunks_attr:
for col, d in enumerate(csplit.generate(ds)):
pl.subplot(nrows, ncols, fig)
doplot(d.samples.ravel())
if row == 0:
pl.title('C:' + str(d.sa[chunks_attr].unique[0]))
if col == 0:
pl.ylabel('L:' + str(d.sa[targets_attr].unique[0]))
fig += 1
else:
pl.subplot(1, nrows, fig)
doplot(ds.samples)
pl.title('L:' + str(ds.sa[targets_attr].unique[0]))
fig += 1
示例14: test_factorialpartitioner
def test_factorialpartitioner():
# Test against sifter and chainmap implemented in test_usecases
# -- code below copied from test_usecases --
# Let's simulate the beast -- 6 categories total groupped into 3
# super-ordinate, and actually without any 'superordinate' effect
# since subordinate categories independent
ds = normal_feature_dataset(
nlabels=6, snr=100, perlabel=30, nfeatures=6, nonbogus_features=range(6), nchunks=5 # pure signal! ;)
)
ds.sa["subord"] = ds.sa.targets.copy()
ds.sa["superord"] = ["super%d" % (int(i[1]) % 3,) for i in ds.targets] # 3 superord categories
# let's override original targets just to be sure that we aren't relying on them
ds.targets[:] = 0
# let's make two other datasets to test later
# one superordinate category only
ds_1super = ds.copy()
ds_1super.sa["superord"] = ["super1" for i in ds_1super.targets]
# one superordinate category has only one subordinate
# ds_unbalanced = ds.copy()
# nsuper1 = np.sum(ds_unbalanced.sa.superord == 'super1')
# mask_superord = ds_unbalanced.sa.superord == 'super1'
# uniq_subord = np.unique(ds_unbalanced.sa.subord[mask_superord])
# ds_unbalanced.sa.subord[mask_superord] = [uniq_subord[0] for i in range(nsuper1)]
ds_unbalanced = Dataset(range(4), sa={"subord": [0, 0, 1, 2], "superord": [1, 1, 2, 2]})
npart = ChainNode(
[
## so we split based on superord
NFoldPartitioner(len(ds.sa["superord"].unique), attr="subord"),
## so it should select only those splits where we took 1 from
## each of the superord categories leaving things in balance
Sifter([("partitions", 2), ("superord", {"uvalues": ds.sa["superord"].unique, "balanced": True})]),
],
space="partitions",
)
# now the new implementation
factpart = FactorialPartitioner(NFoldPartitioner(attr="subord"), attr="superord")
partitions_npart = [p.sa.partitions for p in npart.generate(ds)]
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds)]
assert_array_equal(np.sort(partitions_npart), np.sort(partitions_factpart))
# now let's check it behaves correctly if we have only one superord class
nfold = NFoldPartitioner(attr="subord")
partitions_nfold = [p.sa.partitions for p in nfold.generate(ds_1super)]
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_1super)]
assert_array_equal(np.sort(partitions_nfold), np.sort(partitions_factpart))
# smoke test for unbalanced subord classes
warning_msg = (
"One or more superordinate attributes do not have the same "
"number of subordinate attributes. This could yield to "
"unbalanced partitions."
)
with assert_warnings([(RuntimeWarning, warning_msg)]):
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_unbalanced)]
partitions_unbalanced = [np.array([2, 2, 2, 1]), np.array([2, 2, 1, 2])]
superord_unbalanced = [([2], [1, 1, 2]), ([2], [1, 1, 2])]
subord_unbalanced = [([2], [0, 0, 1]), ([1], [0, 0, 2])]
for out_part, true_part, super_out, sub_out in zip(
partitions_factpart, partitions_unbalanced, superord_unbalanced, subord_unbalanced
):
assert_array_equal(out_part, true_part)
assert_array_equal(
(ds_unbalanced[out_part == 1].sa.superord.tolist(), ds_unbalanced[out_part == 2].sa.superord.tolist()),
super_out,
)
assert_array_equal(
(ds_unbalanced[out_part == 1].sa.subord.tolist(), ds_unbalanced[out_part == 2].sa.subord.tolist()), sub_out
)
# now let's test on a dummy dataset
ds_dummy = Dataset(range(4), sa={"subord": range(4), "superord": [1, 2] * 2})
partitions_factpart = [p.sa.partitions for p in factpart.generate(ds_dummy)]
assert_array_equal(partitions_factpart, [[2, 2, 1, 1], [2, 1, 1, 2], [1, 2, 2, 1], [1, 1, 2, 2]])