本文整理匯總了Python中mvpa2.datasets.Dataset類的典型用法代碼示例。如果您正苦於以下問題:Python Dataset類的具體用法?Python Dataset怎麽用?Python Dataset使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Dataset類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_surf_ring_queryengine
def test_surf_ring_queryengine(self):
s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)
# add second layer
s2 = surf.merge(s, (s + (.01, 0, 0)))
ds = Dataset(samples=np.arange(20)[np.newaxis],
fa=dict(node_indices=np.arange(39, 0, -2)))
# add more features (with shared node indices)
ds3 = hstack((ds, ds, ds))
radius = 2.5
inner_radius = 1.0
# Makes sure it raises error if inner_radius is >= radius
assert_raises(ValueError,
lambda: queryengine.SurfaceRingQueryEngine(surface=s2,
inner_radius=2.5,
radius=radius))
distance_metrics = ('euclidean', 'dijkstra', 'euclidean', 'dijkstra')
for distance_metric, include_center in zip(distance_metrics, [True, False]*2):
qe = queryengine.SurfaceRingQueryEngine(surface=s2, radius=radius,
inner_radius=inner_radius, distance_metric=distance_metric,
include_center=include_center)
# untrained qe should give errors
assert_raises(ValueError, lambda: qe.ids)
assert_raises(ValueError, lambda: qe.query_byid(0))
# node index out of bounds should give error
ds_ = ds.copy()
ds_.fa.node_indices[0] = 100
assert_raises(ValueError, lambda: qe.train(ds_))
# lack of node indices should give error
ds_.fa.pop('node_indices')
assert_raises(ValueError, lambda: qe.train(ds_))
# train the qe
qe.train(ds3)
for node in np.arange(-1, s2.nvertices + 1):
if node < 0 or node >= s2.nvertices:
assert_raises(KeyError, lambda: qe.query_byid(node))
continue
feature_ids = np.asarray(qe.query_byid(node))
# node indices relative to ds
base_ids = feature_ids[feature_ids < 20]
# should have multiples of 20
assert_equal(set(feature_ids),
set((base_ids[np.newaxis].T + \
[0, 20, 40]).ravel()))
node_indices = s2.circlearound_n2d(node,
radius, distance_metric or 'dijkstra')
fa_indices = [fa_index for fa_index, inode in
enumerate(ds3.fa.node_indices)
if inode in node_indices and node_indices[inode] > inner_radius]
if include_center and node in ds3.fa.node_indices:
fa_indices += np.where(ds3.fa.node_indices == node)[0].tolist()
assert_equal(set(feature_ids), set(fa_indices))
示例2: test_zscore_withoutchunks
def test_zscore_withoutchunks():
# just a smoke test to see if all issues of
# https://github.com/PyMVPA/PyMVPA/issues/26
# are fixed
from mvpa2.datasets import Dataset
ds = Dataset(np.arange(32).reshape((8,-1)), sa=dict(targets=range(8)))
zscore(ds, chunks_attr=None)
assert(np.any(ds.samples != np.arange(32).reshape((8,-1))))
ds_summary = ds.summary()
assert(ds_summary is not None)
示例3: _proc_block
def _proc_block(self, block, ds, measure):
"""Little helper to capture the parts of the computation that can be
parallelized
"""
if __debug__:
debug_slc_ = 'SLC_' in debug.active
debug('SLC',
"Starting computing block for %i elements" % len(block))
if self.ca.is_enabled('roi_sizes'):
roi_sizes = []
else:
roi_sizes = None
results = []
# put rois around all features in the dataset and compute the
# measure within them
for i, f in enumerate(block):
# retrieve the feature ids of all features in the ROI from the query
# engine
roi_fids = self._queryengine[f]
if __debug__ and debug_slc_:
debug('SLC_', 'For %r query returned ids %r' % (f, roi_fids))
# slice the dataset
roi = ds[:, roi_fids]
if self.__add_center_fa:
# add fa to indicate ROI seed if requested
roi_seed = np.zeros(roi.nfeatures, dtype='bool')
roi_seed[roi_fids.index(f)] = True
roi.fa[self.__add_center_fa] = roi_seed
# compute the datameasure and store in results
res = measure(roi)
if self.ca.is_enabled('roi_feature_ids'):
if not is_datasetlike(res):
res = Dataset(np.atleast_1d(res))
# add roi feature ids to intermediate result dataset for later
# aggregation
res.a['roi_feature_ids'] = roi_fids
results.append(res)
# store the size of the roi dataset
if not roi_sizes is None:
roi_sizes.append(roi.nfeatures)
if __debug__:
debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
% (len(block),
f+1,
roi.nfeatures,
float(i+1)/len(block)*100,), cr=True)
return results, roi_sizes
示例4: run
def run(args):
from mvpa2.base.hdf5 import h5save
ds = None
if not args.txt_data is None:
verbose(1, "Load data from TXT file '%s'" % args.txt_data)
samples = _load_from_txt(args.txt_data)
ds = Dataset(samples)
elif not args.npy_data is None:
verbose(1, "Load data from NPY file '%s'" % args.npy_data)
samples = _load_from_npy(args.npy_data)
ds = Dataset(samples)
elif not args.mri_data is None:
verbose(1, "Load data from MRI image(s) %s" % args.mri_data)
from mvpa2.datasets.mri import fmri_dataset
vol_attr = dict()
if not args.add_vol_attr is None:
# XXX add a way to use the mapper of an existing dataset to
# add a volume attribute without having to load the entire
# mri data again
vol_attr = dict(args.add_vol_attr)
if not len(args.add_vol_attr) == len(vol_attr):
warning("--vol-attr option with duplicate attribute name: "
"check arguments!")
verbose(2, "Add volumetric feature attributes: %s" % vol_attr)
ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr)
if ds is None:
if args.data is None:
raise RuntimeError('no data source specific')
else:
ds = hdf2ds(args.data)[0]
else:
if args.data is not None:
verbose(1, 'ignoring dataset input in favor of other data source -- remove either one to disambiguate')
# act on all attribute options
ds = process_common_dsattr_opts(ds, args)
if not args.add_fsl_mcpar is None:
from mvpa2.misc.fsl.base import McFlirtParams
mc_par = McFlirtParams(args.add_fsl_mcpar)
for param in mc_par:
verbose(2, "Add motion regressor as sample attribute '%s'"
% ('mc_' + param))
ds.sa['mc_' + param] = mc_par[param]
verbose(3, "Dataset summary %s" % (ds.summary()))
# and store
outfilename = args.output
if not outfilename.endswith('.hdf5'):
outfilename += '.hdf5'
verbose(1, "Save dataset to '%s'" % outfilename)
h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
示例5: eep_dataset
def eep_dataset(samples, targets=None, chunks=None):
"""Create a dataset using an EEP binary file as source.
EEP files are used by *eeprobe* a software for analysing even-related
potentials (ERP), which was developed at the Max-Planck Institute for
Cognitive Neuroscience in Leipzig, Germany.
http://www.ant-neuro.com/products/eeprobe
"""
if isinstance(samples, str):
# open the eep file
eb = EEPBin(samples)
elif isinstance(samples, EEPBin):
# nothing special
eb = samples
else:
raise ValueError("eep_dataset takes the filename of an "
"EEP file or a EEPBin object as 'samples' argument.")
# init dataset
ds = Dataset.from_channeltimeseries(
eb.data, targets=targets, chunks=chunks, t0=eb.t0, dt=eb.dt,
channelids=eb.channels)
return ds
示例6: eep_dataset
def eep_dataset(samples, targets=None, chunks=None):
"""Create a dataset using an EEP binary file as source.
EEP files are used by *eeprobe* a software for analysing even-related
potentials (ERP), which was developed at the Max-Planck Institute for
Cognitive Neuroscience in Leipzig, Germany.
http://www.ant-neuro.com/products/eeprobe
Parameters
----------
samples : str or EEPBin instance
This is either a filename of an EEP file, or an EEPBin instance, providing
the samples data in EEP format.
targets, chunks : sequence or scalar or None
Values are pass through to `Dataset.from_wizard()`. See its documentation
for more information.
"""
if isinstance(samples, str):
# open the eep file
eb = EEPBin(samples)
elif isinstance(samples, EEPBin):
# nothing special
eb = samples
else:
raise ValueError("eep_dataset takes the filename of an "
"EEP file or a EEPBin object as 'samples' argument.")
# init dataset
ds = Dataset.from_channeltimeseries(
eb.data, targets=targets, chunks=chunks, t0=eb.t0, dt=eb.dt,
channelids=eb.channels)
return ds
示例7: test_random_affine_transformation
def test_random_affine_transformation():
ds = Dataset.from_wizard(np.random.randn(8,3,2))
ds_d = random_affine_transformation(ds)
# compare original to the inverse of the distortion using reported
# parameters
assert_array_almost_equal(
np.dot((ds_d.samples - ds_d.a.random_shift) / ds_d.a.random_scale,
ds_d.a.random_rotation.T),
ds.samples)
示例8: _fill_in_scattered_results
def _fill_in_scattered_results(sl, dataset, roi_ids, results):
"""this requires the searchlight conditional attribute 'roi_feature_ids'
to be enabled"""
import numpy as np
from mvpa2.datasets import Dataset
resmap = None
probmap = None
for resblock in results:
for res in resblock:
if resmap is None:
# prepare the result container
resmap = np.zeros((len(res), dataset.nfeatures),
dtype=res.samples.dtype)
if 'null_prob' in res.fa:
# initialize the prob map also with zeroes, as p=0 can never
# happen as an empirical result
probmap = np.zeros((dataset.nfeatures,) + res.fa.null_prob.shape[1:],
dtype=res.samples.dtype)
observ_counter = np.zeros(dataset.nfeatures, dtype=int)
# project the result onto all features -- love broadcasting!
#print "averaging"
resmap[:, res.a.roi_feature_ids] += res.samples
if not probmap is None:
probmap[res.a.roi_feature_ids] += res.fa.null_prob
# increment observation counter for all relevant features
observ_counter[res.a.roi_feature_ids] += 1
# when all results have been added up average them according to the number
# of observations
observ_mask = observ_counter > 0
resmap[:, observ_mask] /= observ_counter[observ_mask]
result_ds = Dataset(resmap,
fa={'observations': observ_counter})
if not probmap is None:
# transpose to make broadcasting work -- creates a view, so in-place
# modification still does the job
probmap.T[:, observ_mask] /= observ_counter[observ_mask]
result_ds.fa['null_prob'] = probmap.squeeze()
if 'mapper' in dataset.a:
import copy
result_ds.a['mapper'] = copy.copy(dataset.a.mapper)
return result_ds
示例9: test_resample
def test_resample():
time = np.linspace(0, 2*np.pi, 100)
ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T,
sa = {'time': time,
'section': np.repeat(range(10), 10)})
assert_equal(ds.shape, (100, 2))
# downsample
num = 10
rm = FFTResampleMapper(num, window=('gauss', 50),
position_attr='time',
attr_strategy='sample')
mds = rm.forward(ds)
assert_equal(mds.shape, (num, ds.nfeatures))
# didn't change the orig
assert_equal(len(ds), 100)
# check position-based resampling
ds_partial = ds[0::10]
mds_partial = rm.forward(ds_partial)
# despite different input sampling should yield the same output timepoints
assert_array_almost_equal(mds.sa.time, mds_partial.sa.time)
# exclude the first points to prevent edge effects, but the data should be
# very similar too
assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2)
# simple sample of sa's should give meaningful stuff
assert_array_equal(mds.sa.section, range(10))
# and now for a dataset with chunks
cds = vstack([ds.copy(), ds.copy()])
cds.sa['chunks'] = np.repeat([0,1], len(ds))
rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks',
window=('gauss', 50))
mcds = rm.forward(cds)
assert_equal(mcds.shape, (20, 2))
assert_array_equal(mcds.sa.section, np.tile(range(10),2))
# each individual chunks should be identical to previous dataset
assert_array_almost_equal(mds.samples, mcds.samples[:10])
assert_array_almost_equal(mds.samples, mcds.samples[10:])
示例10: test_1d_multispace_searchlight
def test_1d_multispace_searchlight(self):
ds = Dataset([np.arange(6)])
ds.fa['coord1'] = np.repeat(np.arange(3), 2)
# add a second space to the dataset
ds.fa['coord2'] = np.tile(np.arange(2), 3)
measure = lambda x: "+".join([str(x) for x in x.samples[0]])
# simply select each feature once
res = Searchlight(measure,
IndexQueryEngine(coord1=Sphere(0),
coord2=Sphere(0)),
nproc=1)(ds)
assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
res = Searchlight(measure,
IndexQueryEngine(coord1=Sphere(0),
coord2=Sphere(1)),
nproc=1)(ds)
assert_array_equal(res.samples,
[['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
res = Searchlight(measure,
IndexQueryEngine(coord1=Sphere(1),
coord2=Sphere(0)),
nproc=1)(ds)
assert_array_equal(res.samples,
[['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
示例11: _proc_block
def _proc_block(self, block, ds, measure, seed=None, iblock='main'):
"""Little helper to capture the parts of the computation that can be
parallelized
Parameters
----------
seed
RNG seed. Should be provided e.g. in child process invocations
to guarantee that they all seed differently to not keep generating
the same sequencies due to reusing the same copy of numpy's RNG
block
Critical for generating non-colliding temp filenames in case
of hdf5 backend. Otherwise RNGs of different processes might
collide in their temporary file names leading to problems.
"""
if seed is not None:
mvpa2.seed(seed)
if __debug__:
debug_slc_ = 'SLC_' in debug.active
debug('SLC',
"Starting computing block for %i elements" % len(block))
results = []
store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids')
store_roi_sizes = self.ca.is_enabled('roi_sizes')
store_roi_center_ids = self.ca.is_enabled('roi_center_ids')
assure_dataset = any([store_roi_feature_ids,
store_roi_sizes,
store_roi_center_ids])
# put rois around all features in the dataset and compute the
# measure within them
for i, f in enumerate(block):
# retrieve the feature ids of all features in the ROI from the query
# engine
roi_specs = self._queryengine[f]
if __debug__ and debug_slc_:
debug('SLC_', 'For %r query returned roi_specs %r'
% (f, roi_specs))
if is_datasetlike(roi_specs):
# TODO: unittest
assert(len(roi_specs) == 1)
roi_fids = roi_specs.samples[0]
else:
roi_fids = roi_specs
# slice the dataset
roi = ds[:, roi_fids]
if is_datasetlike(roi_specs):
for n, v in roi_specs.fa.iteritems():
roi.fa[n] = v
if self.__add_center_fa:
# add fa to indicate ROI seed if requested
roi_seed = np.zeros(roi.nfeatures, dtype='bool')
if f in roi_fids:
roi_seed[roi_fids.index(f)] = True
else:
warning("Center feature attribute id %s not found" % f)
roi.fa[self.__add_center_fa] = roi_seed
# compute the datameasure and store in results
res = measure(roi)
if assure_dataset and not is_datasetlike(res):
res = Dataset(np.atleast_1d(res))
if store_roi_feature_ids:
# add roi feature ids to intermediate result dataset for later
# aggregation
res.a['roi_feature_ids'] = roi_fids
if store_roi_sizes:
res.a['roi_sizes'] = roi.nfeatures
if store_roi_center_ids:
res.a['roi_center_ids'] = f
results.append(res)
if __debug__:
debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
% (len(block),
f + 1,
roi.nfeatures,
float(i + 1) / len(block) * 100,), cr=True)
if self.results_postproc_fx:
if __debug__:
debug('SLC', "Post-processing %d results in proc_block using %s"
% (len(results), self.results_postproc_fx))
results = self.results_postproc_fx(results)
if self.results_backend == 'native':
pass # nothing special
elif self.results_backend == 'hdf5':
# store results in a temporary file and return a filename
results_file = tempfile.mktemp(prefix=self.tmp_prefix,
suffix='-%s.hdf5' % iblock)
if __debug__:
debug('SLC', "Storing results into %s" % results_file)
h5save(results_file, results)
#.........這裏部分代碼省略.........
示例12: test_surf_queryengine
def test_surf_queryengine(self, qefn):
s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)
# add scond layer
s2 = surf.merge(s, (s + (.01, 0, 0)))
ds = Dataset(samples=np.arange(20)[np.newaxis],
fa=dict(node_indices=np.arange(39, 0, -2)))
# add more features (with shared node indices)
ds3 = hstack((ds, ds, ds))
radius = 2.5
# Note: sweepargs it not used to avoid re-generating the same
# surface and dataset multiple times.
for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None):
builder = lambda: queryengine.SurfaceQueryEngine(s2, radius,
distance_metric)
if distance_metric in ('<illegal>', None):
assert_raises(ValueError, builder)
continue
qe = builder()
# test i/o and ensure that the untrained instance is not trained
if externals.exists('h5py'):
fd, qefn = tempfile.mkstemp('qe.hdf5', 'test'); os.close(fd)
h5save(qefn, qe)
qe = h5load(qefn)
os.remove(qefn)
# untrained qe should give errors
assert_raises(ValueError, lambda:qe.ids)
assert_raises(ValueError, lambda:qe.query_byid(0))
# node index out of bounds should give error
ds_ = ds.copy()
ds_.fa.node_indices[0] = 100
assert_raises(ValueError, lambda: qe.train(ds_))
# lack of node indices should give error
ds_.fa.pop('node_indices')
assert_raises(ValueError, lambda: qe.train(ds_))
# train the qe
qe.train(ds3)
# test i/o and ensure that the loaded instance is trained
if externals.exists('h5py'):
h5save(qefn, qe)
qe = h5load(qefn)
for node in np.arange(-1, s2.nvertices + 1):
if node < 0 or node >= s2.nvertices:
assert_raises(KeyError, lambda: qe.query_byid(node))
continue
feature_ids = np.asarray(qe.query_byid(node))
# node indices relative to ds
base_ids = feature_ids[feature_ids < 20]
# should have multiples of 20
assert_equal(set(feature_ids),
set((base_ids[np.newaxis].T + \
[0, 20, 40]).ravel()))
node_indices = list(s2.circlearound_n2d(node,
radius, distance_metric or 'dijkstra'))
fa_indices = [fa_index for fa_index, node in
enumerate(ds3.fa.node_indices)
if node in node_indices]
assert_equal(set(feature_ids), set(fa_indices))
# smoke tests
assert_true('SurfaceQueryEngine' in '%s' % qe)
assert_true('SurfaceQueryEngine' in '%r' % qe)
示例13: _proc_block
def _proc_block(self, block, ds, measure, iblock='main'):
"""Little helper to capture the parts of the computation that can be
parallelized
Parameters
----------
iblock
Critical for generating non-colliding temp filenames in case
of hdf5 backend. Otherwise RNGs of different processes might
collide in their temporary file names leading to problems.
"""
if __debug__:
debug_slc_ = 'SLC_' in debug.active
debug('SLC',
"Starting computing block for %i elements" % len(block))
if self.ca.is_enabled('roi_sizes'):
roi_sizes = []
else:
roi_sizes = None
results = []
# put rois around all features in the dataset and compute the
# measure within them
for i, f in enumerate(block):
# retrieve the feature ids of all features in the ROI from the query
# engine
roi_fids = self._queryengine[f]
if __debug__ and debug_slc_:
debug('SLC_', 'For %r query returned ids %r' % (f, roi_fids))
# slice the dataset
roi = ds[:, roi_fids]
if self.__add_center_fa:
# add fa to indicate ROI seed if requested
roi_seed = np.zeros(roi.nfeatures, dtype='bool')
roi_seed[roi_fids.index(f)] = True
roi.fa[self.__add_center_fa] = roi_seed
# compute the datameasure and store in results
res = measure(roi)
if self.ca.is_enabled('roi_feature_ids'):
if not is_datasetlike(res):
res = Dataset(np.atleast_1d(res))
# add roi feature ids to intermediate result dataset for later
# aggregation
res.a['roi_feature_ids'] = roi_fids
results.append(res)
# store the size of the roi dataset
if not roi_sizes is None:
roi_sizes.append(roi.nfeatures)
if __debug__:
debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
% (len(block),
f+1,
roi.nfeatures,
float(i+1)/len(block)*100,), cr=True)
if self.results_backend == 'native':
pass # nothing special
elif self.results_backend == 'hdf5':
# store results in a temporary file and return a filename
results_file = tempfile.mktemp(prefix=self.tmp_prefix,
suffix='-%s.hdf5' % iblock)
if __debug__:
debug('SLC', "Storing results into %s" % results_file)
h5save(results_file, results)
if __debug__:
debug('SLC_', "Results stored")
results = results_file
else:
raise RuntimeError("Must not reach this point")
return results, roi_sizes
示例14: test_polydetrend
def test_polydetrend():
samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6],
[-2.0, -4, -6, -8, -10, -12]], ndmin=2 ).T
samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1],
[-2.0, -4, -6, -6, -4, -2]], ndmin=2 ).T
chunks = [0, 0, 0, 1, 1, 1]
chunks_bad = [ 0, 0, 1, 1, 1, 0]
target_whole = np.array( [[-3.0, -2, -1, 1, 2, 3],
[-6, -4, -2, 2, 4, 6]], ndmin=2 ).T
target_chunked = np.array( [[-1.0, 0, 1, 1, 0, -1],
[2, 0, -2, -2, 0, 2]], ndmin=2 ).T
ds = Dataset(samples_forwhole)
# this one will auto-train the mapper on first use
dm = PolyDetrendMapper(polyord=1, space='police')
mds = dm.forward(ds)
# features are linear trends, so detrending should remove all
assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
# we get the information where each sample is assumed to be in the
# space spanned by the polynomials
assert_array_equal(mds.sa.police, np.arange(len(ds)))
# hackish way to get the previous regressors into a dataset
ds.sa['opt_reg_const'] = dm._regs[:,0]
ds.sa['opt_reg_lin'] = dm._regs[:,1]
# using these precomputed regressors, we should get the same result as
# before even if we do not generate a regressor for linear
dm_optreg = PolyDetrendMapper(polyord=0,
opt_regs=['opt_reg_const', 'opt_reg_lin'])
mds_optreg = dm_optreg.forward(ds)
assert_array_almost_equal(mds_optreg, np.zeros(mds.shape))
ds = Dataset(samples_forchunks)
# 'constant' detrending removes the mean
mds = PolyDetrendMapper(polyord=0).forward(ds)
assert_array_almost_equal(
mds.samples,
samples_forchunks - np.mean(samples_forchunks, axis=0))
# if there is no GLOBAL linear trend it should be identical to mean removal
# even if trying to remove linear
mds2 = PolyDetrendMapper(polyord=1).forward(ds)
assert_array_almost_equal(mds, mds2)
# chunk-wise detrending
ds = dataset_wizard(samples_forchunks, chunks=chunks)
dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='police')
mds = dm.forward(ds)
# features are chunkswise linear trends, so detrending should remove all
assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
# we get the information where each sample is assumed to be in the
# space spanned by the polynomials, which is the identical linspace in both
# chunks
assert_array_equal(mds.sa.police, range(3) * 2)
# non-matching number of samples cannot be mapped
assert_raises(ValueError, dm.forward, ds[:-1])
# however, if the dataset knows about the space it is possible
ds.sa['police'] = mds.sa.police
# XXX this should be
#mds2 = dm(ds[1:-1])
#assert_array_equal(mds[1:-1], mds2)
# XXX but right now is
assert_raises(NotImplementedError, dm.forward, ds[1:-1])
# Detrend must preserve the size of dataset
assert_equal(mds.shape, ds.shape)
# small additional test for break points
# although they are no longer there
ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T,
targets=chunks, chunks=chunks)
mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1).forward(ds)
assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
# test of different polyord on each chunk
target_mixed = np.array( [[-1.0, 0, 1, 0, 0, 0],
[2.0, 0, -2, 0, 0, 0]], ndmin=2 ).T
ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks)
mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0,1]).forward(ds)
assert_array_almost_equal(mds, target_mixed)
# test irregluar spacing of samples, but with corrective time info
samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9],
[-2.0, -8, -12, -16, -4, -18]], ndmin=2 ).T
ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:,0]})
# linear detrending that makes use of temporal info from dataset
dm = PolyDetrendMapper(polyord=1, space='time')
mds = dm.forward(ds)
assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
# and now the same stuff, but with chunking and ordered by time
samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1],
[-2.0, -6, -6, -4, -4, -2]], ndmin=2 ).T
chunks = [0, 1, 0, 1, 0, 1]
time = [4, 4, 12, 8, 8, 12]
ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time})
mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='time').forward(ds)
#.........這裏部分代碼省略.........
示例15: test_datasetmapping
def test_datasetmapping():
# 6 samples, 4X2 features
data = np.arange(48).reshape(6,4,2)
ds = Dataset(data,
sa={'timepoints': np.arange(6),
'multidim': data.copy()},
fa={'fid': np.arange(4)})
# with overlapping and non-overlapping boxcars
startpoints = [0, 1, 4]
boxlength = 2
bm = BoxcarMapper(startpoints, boxlength, space='boxy')
# train is critical
bm.train(ds)
mds = bm.forward(ds)
assert_equal(len(mds), len(startpoints))
assert_equal(mds.nfeatures, boxlength)
# all samples attributes remain, but the can rotated/compressed into
# multidimensional attributes
assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
assert_equal(mds.sa.multidim.shape,
(len(startpoints), boxlength) + ds.shape[1:])
assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
assert_array_equal(mds.sa.timepoints.flatten(),
np.array([(s, s+1) for s in startpoints]).flatten())
assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
# feature attributes also get rotated and broadcasted
assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
# and finally there is a new one
assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))
# now see how it works on reverse()
rds = bm.reverse(mds)
# we got at least something of all original attributes back
assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
# it is not possible to reconstruct the full samples array
# some samples even might show up multiple times (when there are overlapping
# boxcars
assert_array_equal(rds.samples,
np.array([[[ 0, 1], [ 2, 3], [ 4, 5], [ 6, 7]],
[[ 8, 9], [10, 11], [12, 13], [14, 15]],
[[ 8, 9], [10, 11], [12, 13], [14, 15]],
[[16, 17], [18, 19], [20, 21], [22, 23]],
[[32, 33], [34, 35], [36, 37], [38, 39]],
[[40, 41], [42, 43], [44, 45], [46, 47]]]))
assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
# but feature attributes should be fully recovered
assert_array_equal(rds.fa.fid, ds.fa.fid)
# popular dataset configuration (double flatten + boxcar)
cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
cm.train(ds)
bflat = ds.get_mapped(cm)
assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
# add attributes
bflat.fa['testfa'] = np.arange(bflat.nfeatures)
bflat.sa['testsa'] = np.arange(bflat.nsamples)
# now try to go back
bflatrev = bflat.mapper.reverse(bflat)
# data should be same again, as far as the boxcars match
assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
# feature axis should match
assert_equal(ds.shape[1:], bflatrev.shape[1:])