本文整理汇总了Python中mvpa2.datasets.base.Dataset类的典型用法代码示例。如果您正苦于以下问题:Python Dataset类的具体用法?Python Dataset怎么用?Python Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_unique_stack
def test_unique_stack():
data = Dataset(np.reshape(np.arange(24), (4, 6)),
sa=dict(x=[0, 1, 0, 1]),
fa=dict(y=[x for x in 'abccba']))
sa_stack = stack_by_unique_sample_attribute(data, 'x')
assert_equal(sa_stack.shape, (2, 12))
assert_array_equal(sa_stack.fa.x, [0] * 6 + [1] * 6)
assert_array_equal(sa_stack.fa.y, [x for x in 'abccbaabccba'])
fa_stack = stack_by_unique_feature_attribute(data, 'y')
assert_equal(fa_stack.shape, (12, 2))
assert_array_equal(fa_stack.sa.x, [0, 1] * 6)
assert_array_equal(fa_stack.sa.y, [y for y in 'aaaabbbbcccc'])
#assert_array_equal(fa_stack.fa.y,[''])
# check values match the fa or sa
for i in xrange(4):
for j in xrange(6):
d = data[i, j]
for k, other in enumerate((sa_stack, fa_stack)):
msk = other.samples == d.samples
ii, jj = np.nonzero(msk) # find matching indices in other
o = other[ii, jj]
coll = [o.fa, o.sa][k]
assert_equal(coll.x, d.sa.x)
assert_equal(coll.y, d.fa.y)
ystacker = lambda y: lambda x: stack_by_unique_feature_attribute(x, y)
assert_raises(KeyError, ystacker('z'), data)
data.fa['z'] = [z for z in '123451']
assert_raises(ValueError, ystacker('z'), data)
示例2: _call
def _call(self, dataset=None):
"""Extract weights from SMLR classifier.
SMLR always has weights available, so nothing has to be computed here.
"""
clf = self.clf
# transpose to have the number of features on the second axis
# (as usual)
weights = clf.weights.T
if __debug__:
debug('SMLR',
"Extracting weights for %d-class SMLR" %
(len(weights) + 1) +
"Result: min=%f max=%f" %\
(np.min(weights), np.max(weights)))
# limit the labels to the number of sensitivity sets, to deal
# with the case of `fit_all_weights=False`
ds = Dataset(weights,
sa={clf.get_space(): clf._ulabels[:len(weights)]})
if clf.params.has_bias:
ds.sa['biases'] = clf.biases
return ds
示例3: test_stack_add_attributes
def test_stack_add_attributes():
data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
data0.fa['ok'] = data0.sa['ok'] = np.arange(5)
data1.fa['ok'] = data1.sa['ok'] = np.arange(5)
data0.fa['nok'] = data0.sa['nok'] = [0]
data1.fa['nok'] = data1.sa['nok'] = np.arange(5)
# function, collection name, the other collection name
for xstack, colname, ocolname in ((vstack, 'fa', 'sa'),
(hstack, 'sa', 'fa')):
for add_param in None, 'update', 'drop_nonunique':
kw = {colname: add_param} if add_param else {}
r = xstack((data0, data1), **kw)
COL = lambda x: getattr(x, colname)
col = COL(r)
ocol = getattr(r, ocolname)
# in any scenario, the other collection should have got
# both names and be just fine
assert_array_equal(ocol['nok'].value, [0] * 5 + range(5))
assert_array_equal(ocol['ok'].value, range(5) * 2)
if add_param in ('update',):
# will be of the last dataset
assert_array_equal(col['nok'].value, COL(data1)['nok'].value)
assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
elif add_param in (None, 'drop_nonunique'):
assert('nok' not in col) # must be dropped since not unique
# both the same but let's check ;)
assert_array_equal(col['ok'].value, COL(data0)['ok'].value)
assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
示例4: test_query_engine
def test_query_engine():
data = np.arange(54)
# indices in 3D
ind = np.transpose((np.ones((3, 3, 3)).nonzero()))
# sphere generator for 3 elements diameter
sphere = ne.Sphere(1)
# dataset with just one "space"
ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))})
# and the query engine attaching the generator to the "index-space"
qe = ne.IndexQueryEngine(s_ind=sphere)
# cannot train since the engine does not know about the second space
assert_raises(ValueError, qe.train, ds)
# now do it again with a full spec
ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind)),
't_ind': np.repeat([0,1], 27)})
qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None)
qe.train(ds)
# internal representation check
# YOH: invalid for new implementation with lookup tables (dictionaries)
#assert_array_equal(qe._searcharray,
# np.arange(54).reshape(qe._searcharray.shape) + 1)
# should give us one corner, collapsing the 't_ind'
assert_array_equal(qe(s_ind=(0, 0, 0)),
[0, 1, 3, 9, 27, 28, 30, 36])
# directly specifying an index for 't_ind' without having an ROI
# generator, should give the same corner, but just once
assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9])
# just out of the mask -- no match
assert_array_equal(qe(s_ind=(3, 3, 3)), [])
# also out of the mask -- but single match
assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53])
# query by id
assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0])
assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]),
qe(s_ind=(0, 0, 0)))
# should not fail if t_ind is outside
assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]),
qe(s_ind=(0, 0, 0)))
# should fail if asked about some unknown thing
assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0)
# Test by using some literal feature atttribute
ds.fa['lit'] = ['roi1', 'ro2', 'r3']*18
# should work as well as before
assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
# should fail if asked about some unknown (yet) thing
assert_raises(ValueError, qe.__call__, s_ind=(0,0,0), lit='roi1')
# Create qe which can query literals as well
qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None)
qe_lit.train(ds)
# should work as well as before
assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
# and subselect nicely -- only /3 ones
assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'),
[0, 3, 9, 27, 30, 36])
assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']),
[0, 1, 3, 9, 27, 28, 30, 36])
示例5: test_mergeds
def test_mergeds():
data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
data0.fa['one'] = np.ones(5)
data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
data1.fa['one'] = np.zeros(5)
data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
data4.fa['test'] = np.arange(5)
# cannot merge if there are attributes missing in one of the datasets
assert_raises(DatasetError, data1.append, data0)
merged = data1.copy()
merged.append(data2)
ok_( merged.nfeatures == 5 )
l12 = [1]*5 + [2]*3
l1 = [1]*8
ok_((merged.targets == l12).all())
ok_((merged.chunks == l1).all())
data_append = data1.copy()
data_append.append(data2)
ok_(data_append.nfeatures == 5)
ok_((data_append.targets == l12).all())
ok_((data_append.chunks == l1).all())
#
# appending
#
# we need the same samples attributes in both datasets
assert_raises(DatasetError, data2.append, data3)
#
# vstacking
#
if __debug__:
# tested only in __debug__
assert_raises(ValueError, vstack, (data0, data1, data2, data3))
datasets = (data1, data2, data4)
merged = vstack(datasets)
assert_equal(merged.shape,
(np.sum([len(ds) for ds in datasets]), data1.nfeatures))
assert_true('test' in merged.fa)
assert_array_equal(merged.sa.targets, [1]*5 + [2]*3 + [3]*2)
#
# hstacking
#
assert_raises(ValueError, hstack, datasets)
datasets = (data0, data1)
merged = hstack(datasets)
assert_equal(merged.shape,
(len(data1), np.sum([ds.nfeatures for ds in datasets])))
assert_true('chunks' in merged.sa)
assert_array_equal(merged.fa.one, [1]*5 + [0]*5)
示例6: _call
def _call(self, dataset):
# XXX Hm... it might make sense to unify access functions
# naming across our swig libsvm wrapper and sg access
# functions for svm
clf = self.clf
sgsvm = clf.svm
sens_labels = None
if isinstance(sgsvm, shogun.Classifier.MultiClassSVM):
sens, biases = [], []
nsvms = sgsvm.get_num_svms()
clabels = sorted(clf._attrmap.values())
nclabels = len(clabels)
sens_labels = []
isvm = 0 # index for svm among known
for i in xrange(nclabels):
for j in xrange(i+1, nclabels):
sgsvmi = sgsvm.get_svm(isvm)
labels_tuple = (clabels[i], clabels[j])
# Since we gave the labels in incremental order,
# we always should be right - but it does not
# hurt to check if set of labels is the same
if __debug__ and _shogun_exposes_slavesvm_labels:
if not sgsvmi.get_labels():
# We need to call classify() so labels get assigned
# to the multiclass SVM
sgsvm.classify()
assert(set([sgsvmi.get_label(int(x))
for x in sgsvmi.get_support_vectors()])
== set(labels_tuple))
sens1, bias = self.__sg_helper(sgsvmi)
sens.append(sens1)
biases.append(bias)
sens_labels += [labels_tuple[::-1]] # ??? positive first
isvm += 1
assert(len(sens) == nsvms) # we should have covered all
else:
sens1, bias = self.__sg_helper(sgsvm)
biases = np.atleast_1d(bias)
sens = np.atleast_2d(sens1)
if not clf.__is_regression__:
assert(set(clf._attrmap.values()) == set([-1.0, 1.0]))
assert(sens.shape[0] == 1)
sens_labels = [(-1.0, 1.0)]
ds = Dataset(np.atleast_2d(sens))
if sens_labels is not None:
if isinstance(sens_labels[0], tuple):
# Need to have them in array of dtype object
sens_labels = asobjarray(sens_labels)
if len(clf._attrmap):
sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True)
ds.sa[clf.get_space()] = sens_labels
ds.sa['biases'] = biases
return ds
示例7: create_mvpa_dataset
def create_mvpa_dataset(aXData1, aXData2, chunks, labels):
feat_list = []
for x1, x2, chunk in zip(aXData1, aXData2, chunks):
feat_list.append([x1, x2])
data = Dataset(samples=feat_list)
data.sa['id'] = range(0,len(labels))
data.sa['chunks'] = chunks
data.sa['targets'] = labels
return data
示例8: _get_test_dataset
def _get_test_dataset(include_nodes=True):
# returns test dataset matching the contents of _get_test_sample_node_data
samples, nodes, _ = _get_test_sample_node_data()
ds = Dataset(np.asarray(samples))
if include_nodes:
ds.fa['node_indices'] = np.asarray(nodes)
nsamples = ds.nsamples
ds.sa['intents'] = ['NIFTI_INTENT_NONE'] * nsamples
return ds
示例9: test_stack_add_dataset_attributes
def test_stack_add_dataset_attributes():
data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
data0.a['one'] = np.ones(2)
data0.a['two'] = 2
data0.a['three'] = 'three'
data0.a['common'] = range(10)
data0.a['array'] = np.arange(10)
data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
data1.a['one'] = np.ones(3)
data1.a['two'] = 3
data1.a['four'] = 'four'
data1.a['common'] = range(10)
data1.a['array'] = np.arange(10)
vstacker = lambda x: vstack((data0, data1), a=x)
hstacker = lambda x: hstack((data0, data1), a=x)
add_params = (1, None, 'unique', 'uniques', 'all', 'drop_nonunique')
for stacker in (vstacker, hstacker):
for add_param in add_params:
if add_param == 'unique':
assert_raises(DatasetError, stacker, add_param)
continue
r = stacker(add_param)
if add_param == 1:
assert_array_equal(data1.a.one, r.a.one)
assert_equal(r.a.two, 3)
assert_equal(r.a.four, 'four')
assert_true('three' not in r.a.keys())
assert_true('array' in r.a.keys())
elif add_param == 'uniques':
assert_equal(set(r.a.keys()),
set(['one', 'two', 'three',
'four', 'common', 'array']))
assert_equal(r.a.two, (2, 3))
assert_equal(r.a.four, ('four',))
elif add_param == 'all':
assert_equal(set(r.a.keys()),
set(['one', 'two', 'three',
'four', 'common', 'array']))
assert_equal(r.a.two, (2, 3))
assert_equal(r.a.three, ('three', None))
elif add_param == 'drop_nonunique':
assert_equal(set(r.a.keys()),
set(['common', 'three', 'four', 'array']))
assert_equal(r.a.three, 'three')
assert_equal(r.a.four, 'four')
assert_equal(r.a.common, range(10))
assert_array_equal(r.a.array, np.arange(10))
示例10: test_cosmo_do_not_store_unsupported_datatype
def test_cosmo_do_not_store_unsupported_datatype():
ds = Dataset(np.zeros((0, 0)))
class ArbitraryClass(object):
pass
ds.a['unused'] = ArbitraryClass()
c = cosmo.map2cosmo(ds)
assert_false('a' in c.keys())
ds.a['foo'] = np.zeros((1,))
c = cosmo.map2cosmo(ds)
assert_true('a' in c.keys())
示例11: test_assign_sa
def test_assign_sa():
# https://github.com/PyMVPA/PyMVPA/issues/149
ds = Dataset(np.arange(6).reshape((2,-1)), sa=dict(targets=range(2)))
ds.sa['task'] = ds.sa['targets']
# so it should be a new collectable now
assert_equal(ds.sa['task'].name, 'task')
assert_equal(ds.sa['targets'].name, 'targets') # this lead to issue reported in 149
assert('task' in ds.sa.keys())
assert('targets' in ds.sa.keys())
ds1 = ds[:, 1]
assert('task' in ds1.sa.keys())
assert('targets' in ds1.sa.keys()) # issue reported in 149
assert_equal(ds1.sa['task'].name, 'task')
assert_equal(ds1.sa['targets'].name,'targets')
示例12: test_labelpermutation_randomsampling
def test_labelpermutation_randomsampling():
ds = Dataset.from_wizard(np.ones((5, 10)), targets=range(5), chunks=1)
for i in xrange(1, 5):
ds.append(Dataset.from_wizard(np.ones((5, 10)) + i,
targets=range(5), chunks=i+1))
# assign some feature attributes
ds.fa['roi'] = np.repeat(np.arange(5), 2)
ds.fa['lucky'] = np.arange(10)%2
# use subclass for testing if it would survive
ds.samples = ds.samples.view(myarray)
ok_(ds.get_nsamples_per_attr('targets') == {0:5, 1:5, 2:5, 3:5, 4:5})
sample = ds.random_samples(2)
ok_(sample.get_nsamples_per_attr('targets').values() == [ 2, 2, 2, 2, 2 ])
ok_((ds.sa['chunks'].unique == range(1, 6)).all())
示例13: _call
def _call(self, dataset):
# just for the beauty of it
X = self._design
# precompute transformation is not yet done
if self._inv_design is None:
self._inv_ip = (X.T * X).I
self._inv_design = self._inv_ip * X.T
# get parameter estimations for all features at once
# (betas x features)
betas = self._inv_design * dataset.samples
# charge state
self.ca.pe = pe = betas.T.A
# if betas and no z-stats are desired return them right away
if not self._voi == 'pe' or self.ca.is_enabled('zstat'):
# compute residuals
residuals = X * betas
residuals -= dataset.samples
# estimates of the parameter variance and compute zstats
# assumption of mean(E) == 0 and equal variance
# XXX next lines ignore off-diagonal elements and hence covariance
# between regressors. The humble being writing these lines asks the
# god of statistics for forgives, because it knows not what it does
diag_ip = np.diag(self._inv_ip)
# (features x betas)
beta_vars = np.array([ r.var() * diag_ip for r in residuals.T ])
# (parameter x feature)
zstat = pe / np.sqrt(beta_vars)
# charge state
self.ca.zstat = zstat
if self._voi == 'pe':
# return as (beta x feature)
result = Dataset(pe.T)
elif self._voi == 'zstat':
# return as (zstat x feature)
result = Dataset(zstat.T)
else:
# we shall never get to this point
raise ValueError, \
"Unknown variable of interest '%s'" % str(self._voi)
result.sa['regressor'] = np.arange(len(result))
return result
示例14: test_h5py_io
def test_h5py_io(dsfile):
skip_if_no_external('h5py')
# store random dataset to file
ds = datasets['3dlarge']
ds.save(dsfile)
# reload and check for identity
ds2 = Dataset.from_hdf5(dsfile)
assert_array_equal(ds.samples, ds2.samples)
for attr in ds.sa:
assert_array_equal(ds.sa[attr].value, ds2.sa[attr].value)
for attr in ds.fa:
assert_array_equal(ds.fa[attr].value, ds2.fa[attr].value)
assert_true(len(ds.a.mapper), 2)
# since we have no __equal__ do at least some comparison
assert_equal(repr(ds.a.mapper), repr(ds2.a.mapper))
if __debug__:
# debug mode needs special test as it enhances the repr output
# with module info and id() appendix for objects
#
# INCORRECT slicing (:-1) since without any hash it results in
# empty list -- moreover we seems of not reporting ids with #
# any longer
#
#assert_equal('#'.join(repr(ds.a.mapper).split('#')[:-1]),
# '#'.join(repr(ds2.a.mapper).split('#')[:-1]))
pass
示例15: get_data
def get_data(self):
data = np.random.standard_normal(( 100, 2, 2, 2 ))
labels = np.concatenate( ( np.repeat( 0, 50 ),
np.repeat( 1, 50 ) ) )
chunks = np.repeat( range(5), 10 )
chunks = np.concatenate( (chunks, chunks) )
return Dataset.from_wizard(samples=data, targets=labels, chunks=chunks)