本文整理汇总了Python中mvpa2.base.types.is_datasetlike函数的典型用法代码示例。如果您正苦于以下问题:Python is_datasetlike函数的具体用法?Python is_datasetlike怎么用?Python is_datasetlike使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了is_datasetlike函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: hdf2ds
def hdf2ds(fnames):
"""Load dataset(s) from an HDF5 file
Parameters
----------
fname : list(str)
Names of the input HDF5 files
Returns
-------
list(Dataset)
All datasets-like elements in all given HDF5 files (in order of
appearance). If any given HDF5 file contains non-Dataset elements
they are silently ignored. If no given HDF5 file contains any
dataset, an empty list is returned.
"""
from mvpa2.base.hdf5 import h5load
dss = []
for fname in fnames:
content = h5load(fname)
if is_datasetlike(content):
dss.append(content)
else:
for c in content:
if is_datasetlike(c):
dss.append(c)
return dss
示例2: forward
def forward(self, data):
"""Map data from input to output space.
Parameters
----------
data : Dataset-like, (at least 2D)-array-like
Typically this is a `Dataset`, but it might also be a plain data
array, or even something completely different(TM) that is supported
by a subclass' implementation. If such an object is Dataset-like it
is handled by a dedicated method that also transforms dataset
attributes if necessary. If an array-like is passed, it has to be
at least two-dimensional, with the first axis separating samples
or observations. For single samples `forward1()` might be more
appropriate.
"""
if is_datasetlike(data):
if __debug__:
debug('MAP', "Forward-map %s-shaped dataset through '%s'."
% (data.shape, self))
return self._forward_dataset(data)
else:
if hasattr(data, 'ndim') and data.ndim < 2:
raise ValueError(
'Mapper.forward() only support mapping of data with '
'at least two dimensions, where the first axis '
'separates samples/observations. Consider using '
'Mapper.forward1() instead.')
if __debug__:
debug('MAP', "Forward-map data through '%s'." % (self))
return self._forward_data(data)
示例3: train
def train(self, ds):
"""
The default implementation calls ``_pretrain()``, ``_train()``, and
finally ``_posttrain()``.
Parameters
----------
ds: Dataset
Training dataset.
Returns
-------
None
"""
got_ds = is_datasetlike(ds)
# TODO remove first condition if all Learners get only datasets
if got_ds and (ds.nfeatures == 0 or len(ds) == 0):
raise DegenerateInputError(
"Cannot train learner on degenerate data %s" % ds)
if __debug__:
debug(
"LRN",
"Training learner %(lrn)s on dataset %(dataset)s",
msgargs={'lrn': self, 'dataset': ds})
self._pretrain(ds)
# remember the time when started training
t0 = time.time()
if got_ds:
# things might have happened during pretraining
if ds.nfeatures > 0:
self._train(ds)
else:
warning("Trying to train on dataset with no features present")
if __debug__:
debug("LRN",
"No features present for training, no actual training "
"is called")
else:
# in this case we claim to have no idea and simply try to train
self._train(ds)
# store timing
self.ca.training_time = time.time() - t0
# and post-proc
self._posttrain(ds)
# finally flag as trained
self._set_trained()
if __debug__:
debug(
"LRN",
"Finished training learner %(lrn)s on dataset %(dataset)s",
msgargs={'lrn': self, 'dataset': ds})
示例4: compute
def compute(self, ds1, ds2=None):
"""Generic computation of any kernel
Assumptions:
- ds1, ds2 are either datasets or arrays,
- presumably 2D (not checked neither enforced here
- _compute takes ndarrays. If your kernel needs datasets,
override compute
"""
if is_datasetlike(ds1):
ds1 = ds1.samples
if ds2 is None:
ds2 = ds1
elif is_datasetlike(ds2):
ds2 = ds2.samples
# TODO: assure 2D shape
self._compute(ds1, ds2)
示例5: _sl_call
def _sl_call(self, dataset, roi_ids, nproc):
"""Classical generic searchlight implementation
"""
assert(self.results_backend in ('native', 'hdf5'))
# compute
if nproc is not None and nproc > 1:
# split all target ROIs centers into `nproc` equally sized blocks
nproc_needed = min(len(roi_ids), nproc)
nblocks = nproc_needed \
if self.nblocks is None else self.nblocks
roi_blocks = np.array_split(roi_ids, nblocks)
# the next block sets up the infrastructure for parallel computing
# this can easily be changed into a ParallelPython loop, if we
# decide to have a PP job server in PyMVPA
import pprocess
p_results = pprocess.Map(limit=nproc_needed)
if __debug__:
debug('SLC', "Starting off %s child processes for nblocks=%i"
% (nproc_needed, nblocks))
compute = p_results.manage(
pprocess.MakeParallel(self._proc_block))
for iblock, block in enumerate(roi_blocks):
# should we maybe deepcopy the measure to have a unique and
# independent one per process?
seed = mvpa2.get_random_seed()
compute(block, dataset, copy.copy(self.__datameasure),
seed=seed, iblock=iblock)
else:
# otherwise collect the results in an 1-item list
p_results = [
self._proc_block(roi_ids, dataset, self.__datameasure)]
# Finally collect and possibly process results
# p_results here is either a generator from pprocess.Map or a list.
# In case of a generator it allows to process results as they become
# available
result_ds = self.results_fx(sl=self,
dataset=dataset,
roi_ids=roi_ids,
results=self.__handle_all_results(p_results))
# Assure having a dataset (for paranoid ones)
if not is_datasetlike(result_ds):
try:
result_a = np.atleast_1d(result_ds)
except ValueError, e:
if 'setting an array element with a sequence' in str(e):
# try forcing object array. Happens with
# test_custom_results_fx_logic on numpy 1.4.1 on Debian
# squeeze
result_a = np.array(result_ds, dtype=object)
else:
raise
result_ds = Dataset(result_a)
示例6: _proc_block
def _proc_block(self, block, ds, measure):
"""Little helper to capture the parts of the computation that can be
parallelized
"""
if __debug__:
debug_slc_ = 'SLC_' in debug.active
debug('SLC',
"Starting computing block for %i elements" % len(block))
if self.ca.is_enabled('roi_sizes'):
roi_sizes = []
else:
roi_sizes = None
results = []
# put rois around all features in the dataset and compute the
# measure within them
for i, f in enumerate(block):
# retrieve the feature ids of all features in the ROI from the query
# engine
roi_fids = self._queryengine[f]
if __debug__ and debug_slc_:
debug('SLC_', 'For %r query returned ids %r' % (f, roi_fids))
# slice the dataset
roi = ds[:, roi_fids]
if self.__add_center_fa:
# add fa to indicate ROI seed if requested
roi_seed = np.zeros(roi.nfeatures, dtype='bool')
roi_seed[roi_fids.index(f)] = True
roi.fa[self.__add_center_fa] = roi_seed
# compute the datameasure and store in results
res = measure(roi)
if self.ca.is_enabled('roi_feature_ids'):
if not is_datasetlike(res):
res = Dataset(np.atleast_1d(res))
# add roi feature ids to intermediate result dataset for later
# aggregation
res.a['roi_feature_ids'] = roi_fids
results.append(res)
# store the size of the roi dataset
if not roi_sizes is None:
roi_sizes.append(roi.nfeatures)
if __debug__:
debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
% (len(block),
f+1,
roi.nfeatures,
float(i+1)/len(block)*100,), cr=True)
return results, roi_sizes
示例7: _get_sl_connectomes
def _get_sl_connectomes(self, seed_means, qe_all, datasets, inode, connectivity_mapper):
# For each SL, computing connectivity of features to seed means
sl_connectomes = []
# Looping over each subject
for seed_mean, qe_, sd in zip(seed_means, qe_all, datasets):
connectivity_mapper.train(seed_mean)
sl_ids = qe_[inode]
if is_datasetlike(sl_ids):
assert (sl_ids.nsamples == 1)
sl_ids = sl_ids.samples[0, :].tolist()
sl_connectomes.append(connectivity_mapper.forward(sd[:, sl_ids]))
return sl_connectomes
示例8: _predict
def _predict(self, data):
l = len(self._ulabels)
# oh those lovely random estimates, for now just an estimate
# per sample. Since we are random after all -- keep it random
self.ca.estimates = np.random.normal(size=len(data))
if is_datasetlike(data) and self.params.same:
# decide on mapping between original labels
labels_map = dict(
(t, rt) for t, rt in zip(self._ulabels,
self._ulabels[npr.randint(0, l, size=l)]))
return [labels_map[t] for t in data.sa[self.get_space()].value]
else:
# random one per each
return self._ulabels[npr.randint(0, l, size=len(data))]
示例9: hstack
def hstack(datasets):
"""Stacks datasets horizontally (appending features).
Sample attribute collections are merged incrementally, attribute with
identical keys overwriting previous ones in the stacked dataset. All
datasets must have an identical set of feature attributes (matching keys,
not values), otherwise a ValueError will be raised.
No dataset attributes from any source dataset will be transferred into the
stacked dataset.
Parameters
----------
datasets : tuple
Sequence of datasets to be stacked.
Returns
-------
AttrDataset (or respective subclass)
"""
#
# XXX Use CombinedMapper in here whenever it comes back
#
# fall back to numpy if it is not a dataset
if not is_datasetlike(datasets[0]):
# we might get a list of 1Ds that would yield wrong results when
# turned into a dict (would run along samples-axis)
return AttrDataset(np.atleast_2d(np.hstack(datasets)))
if __debug__:
target = sorted(datasets[0].fa.keys())
if not np.all([sorted(ds.fa.keys()) == target for ds in datasets]):
raise ValueError("Feature attributes collections of to be stacked "
"datasets have varying attributes.")
# will puke if not equal number of samples
stacked_samp = np.concatenate([ds.samples for ds in datasets], axis=1)
stacked_fa = {}
for attr in datasets[0].fa:
stacked_fa[attr] = np.concatenate(
[ds.fa[attr].value for ds in datasets], axis=0)
# create the dataset
merged = datasets[0].__class__(stacked_samp, fa=stacked_fa)
for ds in datasets:
merged.sa.update(ds.sa)
return merged
示例10: vstack
def vstack(datasets):
"""Stacks datasets vertically (appending samples).
Feature attribute collections are merged incrementally, attribute with
identical keys overwriting previous ones in the stacked dataset. All
datasets must have an identical set of sample attributes (matching keys,
not values), otherwise a ValueError will be raised.
No dataset attributes from any source dataset will be transferred into the
stacked dataset. If all input dataset have common dataset attributes that
are also valid for the stacked dataset, they can be moved into the output
dataset like this::
ds_merged = vstack((ds1, ds2, ds3))
ds_merged.a.update(ds1.a)
Parameters
----------
datasets : tuple
Sequence of datasets to be stacked.
Returns
-------
AttrDataset (or respective subclass)
"""
# fall back to numpy if it is not a dataset
if not is_datasetlike(datasets[0]):
return AttrDataset(np.vstack(datasets))
if __debug__:
target = sorted(datasets[0].sa.keys())
if not np.all([sorted(ds.sa.keys()) == target for ds in datasets]):
raise ValueError("Sample attributes collections of to be stacked "
"datasets have varying attributes.")
# will puke if not equal number of features
stacked_samp = np.concatenate([ds.samples for ds in datasets], axis=0)
stacked_sa = {}
for attr in datasets[0].sa:
stacked_sa[attr] = np.concatenate(
[ds.sa[attr].value for ds in datasets], axis=0)
# create the dataset
merged = datasets[0].__class__(stacked_samp, sa=stacked_sa)
for ds in datasets:
merged.fa.update(ds.fa)
return merged
示例11: _posttrain
def _posttrain(self, dataset):
"""Functionality post training
For instance -- computing confusion matrix.
Parameters
----------
dataset : Dataset
Data which was used for training
"""
super(Classifier, self)._posttrain(dataset)
ca = self.ca
# needs to be assigned first since below we use predict
self.__trainednfeatures = dataset.nfeatures
if __debug__ and 'CHECK_TRAINED' in debug.active:
self.__trainedidhash = dataset.idhash
if ca.is_enabled('training_stats') and \
not ca.is_set('training_stats'):
# we should not store predictions for training data,
# it is confusing imho (yoh)
ca.change_temporarily(
disable_ca=["predictions"])
if self.params.retrainable:
# we would need to recheck if data is the same,
# XXX think if there is a way to make this all
# efficient. For now, probably, retrainable
# classifiers have no chance but not to use
# training_stats... sad
self.__changedData_isset = False
predictions = self.predict(dataset)
ca.reset_changed_temporarily()
targets = dataset.sa[self.get_space()].value
if is_datasetlike(predictions) and (self.get_space() in predictions.fa):
# e.g. in case of pair-wise uncombined results - provide
# stats per each of the targets pairs
prediction_targets = predictions.fa[self.get_space()].value
ca.training_stats = dict(
(t, self.__summary_class__(
targets=targets, predictions=predictions.samples[:, i]))
for i, t in enumerate(prediction_targets))
else:
ca.training_stats = self.__summary_class__(
targets=targets, predictions=predictions)
示例12: reverse
def reverse(self, data):
"""Reverse-map data from output back into input space.
Parameters
----------
data : Dataset-like, anything
Typically this is a `Dataset`, but it might also be a plain data
array, or even something completely different(TM) that is supported
by a subclass' implementation. If such an object is Dataset-like it
is handled by a dedicated method that also transforms dataset
attributes if necessary.
"""
if is_datasetlike(data):
if __debug__:
debug('MAP', "Reverse-map %s-shaped dataset through '%s'."
% (data.shape, self))
return self._reverse_dataset(data)
else:
if __debug__:
debug('MAP', "Reverse-map data through '%s'." % (self))
return self._reverse_data(data)
示例13: p
def p(self, x, return_tails=False, **kwargs):
"""Returns the p-value for values of `x`.
Returned values are determined left, right, or from any tail
depending on the constructor setting.
In case a `FeaturewiseMeasure` was used to estimate the
distribution the method returns an array. In that case `x` can be
a scalar value or an array of a matching shape.
"""
peas = _pvalue(x, self.cdf, self.__tail, return_tails=return_tails, **kwargs)
if is_datasetlike(x):
# return the p-values in a dataset as well and assign the input
# dataset attributes to the return dataset too
pds = x.copy(deep=False)
if return_tails:
pds.samples = peas[0]
return pds, peas[1]
else:
pds.samples = peas
return pds
return peas
示例14: _get_hypesvs
def _get_hypesvs(self, sl_connectomes, local_common_model=None):
'''
Hyperalign connectomes and return mapppers
and trained SVDMapper of common space.
Parameters
----------
sl_connectomes: a list of connectomes to hyperalign
local_common_model: a reference common model to be used.
Returns
-------
a tuple (sl_hmappers, svm, local_common_model)
sl_hmappers: a list of mappers corresponding to input list in that order.
svm: a svm mapper based on the input data. if given a common model, this is None.
local_common_model: If local_common_model is provided as input, this will be None.
Otherwise, local_common_model will be computed here and returned.
'''
# TODO Should we z-score sl_connectomes?
return_model = False if self.params.save_model is None else True
if local_common_model is not None:
ha = Hyperalignment(level2_niter=0)
if not is_datasetlike(local_common_model):
local_common_model = Dataset(samples=local_common_model)
ha.train([local_common_model])
sl_hmappers = ha(sl_connectomes)
return sl_hmappers, None, None
ha = Hyperalignment()
sl_hmappers = ha(sl_connectomes)
sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)]
_ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
svm = SVDMapper(force_train=True)
svm.train(sl_connectomes)
if return_model:
local_common_model = svm.forward(sl_connectomes)
else:
local_common_model = None
return sl_hmappers, svm, local_common_model
示例15: hstack
def hstack(datasets, a=None):
"""Stacks datasets horizontally (appending features).
Sample attribute collections are merged incrementally, attribute with
identical keys overwriting previous ones in the stacked dataset. All
datasets must have an identical set of feature attributes (matching keys,
not values), otherwise a ValueError will be raised.
No dataset attributes from any source dataset will be transferred into the
stacked dataset.
Parameters
----------
datasets : tuple
Sequence of datasets to be stacked.
a: {'unique','drop_nonunique','uniques','all'} or True or False or None (default: None)
Indicates which dataset attributes from datasets are stored
in merged_dataset. If an int k, then the dataset attributes from
datasets[k] are taken. If 'unique' then it is assumed that any
attribute common to more than one dataset in datasets is unique;
if not an exception is raised. If 'drop_nonunique' then as 'unique',
except that exceptions are not raised. If 'uniques' then, for each
attribute, any unique value across the datasets is stored in a tuple
in merged_datasets. If 'all' then each attribute present in any
dataset across datasets is stored as a tuple in merged_datasets;
missing values are replaced by None. If None (the default) then no
attributes are stored in merged_dataset. True is equivalent to
'drop_nonunique'. False is equivalent to None.
Returns
-------
AttrDataset (or respective subclass)
"""
#
# XXX Use CombinedMapper in here whenever it comes back
#
if not len(datasets):
raise ValueError('concatenation of zero-length sequences is impossible')
if not len(datasets) > 1:
# trivial hstack
return datasets[0]
# fall back to numpy if it is not a dataset
if not is_datasetlike(datasets[0]):
# we might get a list of 1Ds that would yield wrong results when
# turned into a dict (would run along samples-axis)
return AttrDataset(np.atleast_2d(np.hstack(datasets)))
if __debug__:
target = sorted(datasets[0].fa.keys())
if not np.all([sorted(ds.fa.keys()) == target for ds in datasets]):
raise ValueError("Feature attributes collections of to be stacked "
"datasets have varying attributes.")
# will puke if not equal number of samples
stacked_samp = np.concatenate([ds.samples for ds in datasets], axis=1)
stacked_fa = {}
for attr in datasets[0].fa:
stacked_fa[attr] = np.concatenate(
[ds.fa[attr].value for ds in datasets], axis=0)
# create the dataset
merged = datasets[0].__class__(stacked_samp, fa=stacked_fa)
for ds in datasets:
merged.sa.update(ds.sa)
_stack_add_equal_dataset_attributes(merged, datasets, a)
return merged