当前位置: 首页>>代码示例>>Python>>正文


Python datasets.Dataset类代码示例

本文整理汇总了Python中mvpa2.datasets.Dataset的典型用法代码示例。如果您正苦于以下问题:Python Dataset类的具体用法?Python Dataset怎么用?Python Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_surf_ring_queryengine

    def test_surf_ring_queryengine(self):
        s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)
        # add second layer
        s2 = surf.merge(s, (s + (.01, 0, 0)))
        ds = Dataset(samples=np.arange(20)[np.newaxis],
                     fa=dict(node_indices=np.arange(39, 0, -2)))
        # add more features (with shared node indices)
        ds3 = hstack((ds, ds, ds))
        radius = 2.5
        inner_radius = 1.0
        # Makes sure it raises error if inner_radius is >= radius
        assert_raises(ValueError,
                      lambda: queryengine.SurfaceRingQueryEngine(surface=s2,
                                                         inner_radius=2.5,
                                                         radius=radius))
        distance_metrics = ('euclidean', 'dijkstra', 'euclidean', 'dijkstra')
        for distance_metric, include_center in zip(distance_metrics, [True, False]*2):
            qe = queryengine.SurfaceRingQueryEngine(surface=s2, radius=radius,
                                inner_radius=inner_radius, distance_metric=distance_metric,
                                include_center=include_center)
            # untrained qe should give errors
            assert_raises(ValueError, lambda: qe.ids)
            assert_raises(ValueError, lambda: qe.query_byid(0))

            # node index out of bounds should give error
            ds_ = ds.copy()
            ds_.fa.node_indices[0] = 100
            assert_raises(ValueError, lambda: qe.train(ds_))

            # lack of node indices should give error
            ds_.fa.pop('node_indices')
            assert_raises(ValueError, lambda: qe.train(ds_))
            # train the qe
            qe.train(ds3)

            for node in np.arange(-1, s2.nvertices + 1):
                if node < 0 or node >= s2.nvertices:
                    assert_raises(KeyError, lambda: qe.query_byid(node))
                    continue

                feature_ids = np.asarray(qe.query_byid(node))
                # node indices relative to ds
                base_ids = feature_ids[feature_ids < 20]
                # should have multiples of 20
                assert_equal(set(feature_ids),
                             set((base_ids[np.newaxis].T + \
                                  [0, 20, 40]).ravel()))

                node_indices = s2.circlearound_n2d(node,
                                    radius, distance_metric or 'dijkstra')

                fa_indices = [fa_index for fa_index, inode in
                              enumerate(ds3.fa.node_indices)
                              if inode in node_indices and node_indices[inode] > inner_radius]
                if include_center and node in ds3.fa.node_indices:
                    fa_indices += np.where(ds3.fa.node_indices == node)[0].tolist()
                assert_equal(set(feature_ids), set(fa_indices))
开发者ID:PyMVPA,项目名称:PyMVPA,代码行数:57,代码来源:test_surfing.py

示例2: test_zscore_withoutchunks

def test_zscore_withoutchunks():
    # just a smoke test to see if all issues of
    # https://github.com/PyMVPA/PyMVPA/issues/26
    # are fixed
    from mvpa2.datasets import Dataset
    ds = Dataset(np.arange(32).reshape((8,-1)), sa=dict(targets=range(8)))
    zscore(ds, chunks_attr=None)
    assert(np.any(ds.samples != np.arange(32).reshape((8,-1))))
    ds_summary = ds.summary()
    assert(ds_summary is not None)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:10,代码来源:test_zscoremapper.py

示例3: _proc_block

    def _proc_block(self, block, ds, measure):
        """Little helper to capture the parts of the computation that can be
        parallelized
        """
        if __debug__:
            debug_slc_ = 'SLC_' in debug.active
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))
        if self.ca.is_enabled('roi_sizes'):
            roi_sizes = []
        else:
            roi_sizes = None
        results = []
        # put rois around all features in the dataset and compute the
        # measure within them
        for i, f in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine
            roi_fids = self._queryengine[f]

            if __debug__ and  debug_slc_:
                debug('SLC_', 'For %r query returned ids %r' % (f, roi_fids))

            # slice the dataset
            roi = ds[:, roi_fids]

            if self.__add_center_fa:
                # add fa to indicate ROI seed if requested
                roi_seed = np.zeros(roi.nfeatures, dtype='bool')
                roi_seed[roi_fids.index(f)] = True
                roi.fa[self.__add_center_fa] = roi_seed

            # compute the datameasure and store in results
            res = measure(roi)
            if self.ca.is_enabled('roi_feature_ids'):
                if not is_datasetlike(res):
                    res = Dataset(np.atleast_1d(res))
                # add roi feature ids to intermediate result dataset for later
                # aggregation
                res.a['roi_feature_ids'] = roi_fids
            results.append(res)

            # store the size of the roi dataset
            if not roi_sizes is None:
                roi_sizes.append(roi.nfeatures)

            if __debug__:
                debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
                    % (len(block),
                       f+1,
                       roi.nfeatures,
                       float(i+1)/len(block)*100,), cr=True)

        return results, roi_sizes
开发者ID:jgors,项目名称:PyMVPA,代码行数:54,代码来源:searchlight.py

示例4: run

def run(args):
    from mvpa2.base.hdf5 import h5save
    ds = None
    if not args.txt_data is None:
        verbose(1, "Load data from TXT file '%s'" % args.txt_data)
        samples = _load_from_txt(args.txt_data)
        ds = Dataset(samples)
    elif not args.npy_data is None:
        verbose(1, "Load data from NPY file '%s'" % args.npy_data)
        samples = _load_from_npy(args.npy_data)
        ds = Dataset(samples)
    elif not args.mri_data is None:
        verbose(1, "Load data from MRI image(s) %s" % args.mri_data)
        from mvpa2.datasets.mri import fmri_dataset
        vol_attr = dict()
        if not args.add_vol_attr is None:
            # XXX add a way to use the mapper of an existing dataset to
            # add a volume attribute without having to load the entire
            # mri data again
            vol_attr = dict(args.add_vol_attr)
            if not len(args.add_vol_attr) == len(vol_attr):
                warning("--vol-attr option with duplicate attribute name: "
                        "check arguments!")
            verbose(2, "Add volumetric feature attributes: %s" % vol_attr)
        ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr)

    if ds is None:
        if args.data is None:
            raise RuntimeError('no data source specific')
        else:
            ds = hdf2ds(args.data)[0]
    else:
        if args.data is not None:
            verbose(1, 'ignoring dataset input in favor of other data source -- remove either one to disambiguate')

    # act on all attribute options
    ds = process_common_dsattr_opts(ds, args)

    if not args.add_fsl_mcpar is None:
        from mvpa2.misc.fsl.base import McFlirtParams
        mc_par = McFlirtParams(args.add_fsl_mcpar)
        for param in mc_par:
            verbose(2, "Add motion regressor as sample attribute '%s'"
                       % ('mc_' + param))
            ds.sa['mc_' + param] = mc_par[param]

    verbose(3, "Dataset summary %s" % (ds.summary()))
    # and store
    outfilename = args.output
    if not outfilename.endswith('.hdf5'):
        outfilename += '.hdf5'
    verbose(1, "Save dataset to '%s'" % outfilename)
    h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
开发者ID:liujiantong,项目名称:PyMVPA,代码行数:53,代码来源:cmd_mkds.py

示例5: eep_dataset

def eep_dataset(samples, targets=None, chunks=None):
    """Create a dataset using an EEP binary file as source.

    EEP files are used by *eeprobe* a software for analysing even-related
    potentials (ERP), which was developed at the Max-Planck Institute for
    Cognitive Neuroscience in Leipzig, Germany.

      http://www.ant-neuro.com/products/eeprobe
    """
    
    if isinstance(samples, str):
        # open the eep file
        eb = EEPBin(samples)
    elif isinstance(samples, EEPBin):
        # nothing special
        eb = samples
    else:
        raise ValueError("eep_dataset takes the filename of an "
              "EEP file or a EEPBin object as 'samples' argument.")

    # init dataset
    ds = Dataset.from_channeltimeseries(
            eb.data, targets=targets, chunks=chunks, t0=eb.t0, dt=eb.dt,
            channelids=eb.channels)
    return ds
开发者ID:roshan-srin,项目名称:nidata,代码行数:25,代码来源:eeglab.py

示例6: eep_dataset

def eep_dataset(samples, targets=None, chunks=None):
    """Create a dataset using an EEP binary file as source.

    EEP files are used by *eeprobe* a software for analysing even-related
    potentials (ERP), which was developed at the Max-Planck Institute for
    Cognitive Neuroscience in Leipzig, Germany.

      http://www.ant-neuro.com/products/eeprobe

    Parameters
    ----------
    samples : str or EEPBin instance
      This is either a filename of an EEP file, or an EEPBin instance, providing
      the samples data in EEP format.
    targets, chunks : sequence or scalar or None
      Values are pass through to `Dataset.from_wizard()`. See its documentation
      for more information.
    """
    if isinstance(samples, str):
        # open the eep file
        eb = EEPBin(samples)
    elif isinstance(samples, EEPBin):
        # nothing special
        eb = samples
    else:
        raise ValueError("eep_dataset takes the filename of an "
              "EEP file or a EEPBin object as 'samples' argument.")

    # init dataset
    ds = Dataset.from_channeltimeseries(
            eb.data, targets=targets, chunks=chunks, t0=eb.t0, dt=eb.dt,
            channelids=eb.channels)
    return ds
开发者ID:roshan-srin,项目名称:nidata,代码行数:33,代码来源:eep.py

示例7: test_random_affine_transformation

def test_random_affine_transformation():
    ds = Dataset.from_wizard(np.random.randn(8,3,2))
    ds_d = random_affine_transformation(ds)
    # compare original to the inverse of the distortion using reported
    # parameters
    assert_array_almost_equal(
        np.dot((ds_d.samples - ds_d.a.random_shift) / ds_d.a.random_scale,
               ds_d.a.random_rotation.T),
        ds.samples)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:9,代码来源:test_misc.py

示例8: _fill_in_scattered_results

def _fill_in_scattered_results(sl, dataset, roi_ids, results):
    """this requires the searchlight conditional attribute 'roi_feature_ids'
    to be enabled"""
    import numpy as np
    from mvpa2.datasets import Dataset

    resmap = None
    probmap = None
    for resblock in results:
        for res in resblock:
            if resmap is None:
                # prepare the result container
                resmap = np.zeros((len(res), dataset.nfeatures),
                                  dtype=res.samples.dtype)
                if 'null_prob' in res.fa:
                    # initialize the prob map also with zeroes, as p=0 can never
                    # happen as an empirical result
                    probmap = np.zeros((dataset.nfeatures,) + res.fa.null_prob.shape[1:],
                                       dtype=res.samples.dtype)
                observ_counter = np.zeros(dataset.nfeatures, dtype=int)
            # project the result onto all features -- love broadcasting!
            #print "averaging"
            resmap[:, res.a.roi_feature_ids] += res.samples
            if not probmap is None:
                probmap[res.a.roi_feature_ids] += res.fa.null_prob
            # increment observation counter for all relevant features
            observ_counter[res.a.roi_feature_ids] += 1
    # when all results have been added up average them according to the number
    # of observations
    observ_mask = observ_counter > 0
    resmap[:, observ_mask] /= observ_counter[observ_mask]
    result_ds = Dataset(resmap,
                        fa={'observations': observ_counter})
    if not probmap is None:
        # transpose to make broadcasting work -- creates a view, so in-place
        # modification still does the job
        probmap.T[:, observ_mask] /= observ_counter[observ_mask]
        result_ds.fa['null_prob'] = probmap.squeeze()
    if 'mapper' in dataset.a:
        import copy
        result_ds.a['mapper'] = copy.copy(dataset.a.mapper)
    return result_ds
开发者ID:ronimaimon,项目名称:mvpa_analysis,代码行数:42,代码来源:single_subject_sl.py

示例9: test_resample

def test_resample():
    time = np.linspace(0, 2*np.pi, 100)
    ds = Dataset(np.vstack((np.sin(time), np.cos(time))).T,
                 sa = {'time': time,
                       'section': np.repeat(range(10), 10)})
    assert_equal(ds.shape, (100, 2))

    # downsample
    num = 10
    rm = FFTResampleMapper(num, window=('gauss', 50),
                           position_attr='time',
                           attr_strategy='sample')
    mds = rm.forward(ds)
    assert_equal(mds.shape, (num, ds.nfeatures))
    # didn't change the orig
    assert_equal(len(ds), 100)

    # check position-based resampling
    ds_partial = ds[0::10]
    mds_partial = rm.forward(ds_partial)
    # despite different input sampling should yield the same output timepoints
    assert_array_almost_equal(mds.sa.time, mds_partial.sa.time)
    # exclude the first points to prevent edge effects, but the data should be
    # very similar too
    assert_array_almost_equal(mds.samples[2:], mds_partial.samples[2:], decimal=2)
    # simple sample of sa's should give meaningful stuff
    assert_array_equal(mds.sa.section, range(10))

    # and now for a dataset with chunks
    cds = vstack([ds.copy(), ds.copy()])
    cds.sa['chunks'] = np.repeat([0,1], len(ds))
    rm = FFTResampleMapper(num, attr_strategy='sample', chunks_attr='chunks',
                           window=('gauss', 50))
    mcds = rm.forward(cds)
    assert_equal(mcds.shape, (20, 2))
    assert_array_equal(mcds.sa.section, np.tile(range(10),2))
    # each individual chunks should be identical to previous dataset
    assert_array_almost_equal(mds.samples, mcds.samples[:10])
    assert_array_almost_equal(mds.samples, mcds.samples[10:])
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:39,代码来源:test_filters.py

示例10: test_1d_multispace_searchlight

 def test_1d_multispace_searchlight(self):
     ds = Dataset([np.arange(6)])
     ds.fa['coord1'] = np.repeat(np.arange(3), 2)
     # add a second space to the dataset
     ds.fa['coord2'] = np.tile(np.arange(2), 3)
     measure = lambda x: "+".join([str(x) for x in x.samples[0]])
     # simply select each feature once
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0),
                                        coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0),
                                        coord2=Sphere(1)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(1),
                                        coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
开发者ID:kirty,项目名称:PyMVPA,代码行数:24,代码来源:test_searchlight.py

示例11: _proc_block

    def _proc_block(self, block, ds, measure, seed=None, iblock='main'):
        """Little helper to capture the parts of the computation that can be
        parallelized

        Parameters
        ----------
        seed
          RNG seed.  Should be provided e.g. in child process invocations
          to guarantee that they all seed differently to not keep generating
          the same sequencies due to reusing the same copy of numpy's RNG
        block
          Critical for generating non-colliding temp filenames in case
          of hdf5 backend.  Otherwise RNGs of different processes might
          collide in their temporary file names leading to problems.
        """
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug_slc_ = 'SLC_' in debug.active
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))
        results = []
        store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids')
        store_roi_sizes = self.ca.is_enabled('roi_sizes')
        store_roi_center_ids = self.ca.is_enabled('roi_center_ids')

        assure_dataset = any([store_roi_feature_ids,
                              store_roi_sizes,
                              store_roi_center_ids])

        # put rois around all features in the dataset and compute the
        # measure within them
        for i, f in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine
            roi_specs = self._queryengine[f]

            if __debug__ and  debug_slc_:
                debug('SLC_', 'For %r query returned roi_specs %r'
                      % (f, roi_specs))

            if is_datasetlike(roi_specs):
                # TODO: unittest
                assert(len(roi_specs) == 1)
                roi_fids = roi_specs.samples[0]
            else:
                roi_fids = roi_specs

            # slice the dataset
            roi = ds[:, roi_fids]

            if is_datasetlike(roi_specs):
                for n, v in roi_specs.fa.iteritems():
                    roi.fa[n] = v

            if self.__add_center_fa:
                # add fa to indicate ROI seed if requested
                roi_seed = np.zeros(roi.nfeatures, dtype='bool')
                if f in roi_fids:
                    roi_seed[roi_fids.index(f)] = True
                else:
                    warning("Center feature attribute id %s not found" % f)
                roi.fa[self.__add_center_fa] = roi_seed

            # compute the datameasure and store in results
            res = measure(roi)

            if assure_dataset and not is_datasetlike(res):
                res = Dataset(np.atleast_1d(res))
            if store_roi_feature_ids:
                # add roi feature ids to intermediate result dataset for later
                # aggregation
                res.a['roi_feature_ids'] = roi_fids
            if store_roi_sizes:
                res.a['roi_sizes'] = roi.nfeatures
            if store_roi_center_ids:
                res.a['roi_center_ids'] = f
            results.append(res)

            if __debug__:
                debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
                    % (len(block),
                       f + 1,
                       roi.nfeatures,
                       float(i + 1) / len(block) * 100,), cr=True)

        if self.results_postproc_fx:
            if __debug__:
                debug('SLC', "Post-processing %d results in proc_block using %s"
                      % (len(results), self.results_postproc_fx))
            results = self.results_postproc_fx(results)
        if self.results_backend == 'native':
            pass                        # nothing special
        elif self.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = tempfile.mktemp(prefix=self.tmp_prefix,
                                           suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, results)
#.........这里部分代码省略.........
开发者ID:andreirusu,项目名称:PyMVPA,代码行数:101,代码来源:searchlight.py

示例12: test_surf_queryengine

    def test_surf_queryengine(self, qefn):
        s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5)

        # add scond layer
        s2 = surf.merge(s, (s + (.01, 0, 0)))

        ds = Dataset(samples=np.arange(20)[np.newaxis],
                    fa=dict(node_indices=np.arange(39, 0, -2)))

        # add more features (with shared node indices)
        ds3 = hstack((ds, ds, ds))

        radius = 2.5

        # Note: sweepargs it not used to avoid re-generating the same
        #       surface and dataset multiple times.
        for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None):
            builder = lambda: queryengine.SurfaceQueryEngine(s2, radius,
                                                             distance_metric)
            if distance_metric in ('<illegal>', None):
                assert_raises(ValueError, builder)
                continue

            qe = builder()

            # test i/o and ensure that the untrained instance is not trained
            if externals.exists('h5py'):
                fd, qefn = tempfile.mkstemp('qe.hdf5', 'test'); os.close(fd)
                h5save(qefn, qe)
                qe = h5load(qefn)
                os.remove(qefn)


            # untrained qe should give errors
            assert_raises(ValueError, lambda:qe.ids)
            assert_raises(ValueError, lambda:qe.query_byid(0))

            # node index out of bounds should give error
            ds_ = ds.copy()
            ds_.fa.node_indices[0] = 100
            assert_raises(ValueError, lambda: qe.train(ds_))

            # lack of node indices should give error
            ds_.fa.pop('node_indices')
            assert_raises(ValueError, lambda: qe.train(ds_))


            # train the qe
            qe.train(ds3)

            # test i/o and ensure that the loaded instance is trained
            if externals.exists('h5py'):
                h5save(qefn, qe)
                qe = h5load(qefn)

            for node in np.arange(-1, s2.nvertices + 1):
                if node < 0 or node >= s2.nvertices:
                    assert_raises(KeyError, lambda: qe.query_byid(node))
                    continue

                feature_ids = np.asarray(qe.query_byid(node))

                # node indices relative to ds
                base_ids = feature_ids[feature_ids < 20]

                # should have multiples of 20
                assert_equal(set(feature_ids),
                             set((base_ids[np.newaxis].T + \
                                            [0, 20, 40]).ravel()))



                node_indices = list(s2.circlearound_n2d(node,
                                    radius, distance_metric or 'dijkstra'))

                fa_indices = [fa_index for fa_index, node in
                                    enumerate(ds3.fa.node_indices)
                                    if node in node_indices]


                assert_equal(set(feature_ids), set(fa_indices))

            # smoke tests
            assert_true('SurfaceQueryEngine' in '%s' % qe)
            assert_true('SurfaceQueryEngine' in '%r' % qe)
开发者ID:Arthurkorn,项目名称:PyMVPA,代码行数:85,代码来源:test_surfing.py

示例13: _proc_block

    def _proc_block(self, block, ds, measure, iblock='main'):
        """Little helper to capture the parts of the computation that can be
        parallelized

        Parameters
        ----------
        iblock
          Critical for generating non-colliding temp filenames in case
          of hdf5 backend.  Otherwise RNGs of different processes might
          collide in their temporary file names leading to problems.
        """
        if __debug__:
            debug_slc_ = 'SLC_' in debug.active
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))
        if self.ca.is_enabled('roi_sizes'):
            roi_sizes = []
        else:
            roi_sizes = None
        results = []
        # put rois around all features in the dataset and compute the
        # measure within them
        for i, f in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine
            roi_fids = self._queryengine[f]

            if __debug__ and  debug_slc_:
                debug('SLC_', 'For %r query returned ids %r' % (f, roi_fids))

            # slice the dataset
            roi = ds[:, roi_fids]

            if self.__add_center_fa:
                # add fa to indicate ROI seed if requested
                roi_seed = np.zeros(roi.nfeatures, dtype='bool')
                roi_seed[roi_fids.index(f)] = True
                roi.fa[self.__add_center_fa] = roi_seed

            # compute the datameasure and store in results
            res = measure(roi)
            if self.ca.is_enabled('roi_feature_ids'):
                if not is_datasetlike(res):
                    res = Dataset(np.atleast_1d(res))
                # add roi feature ids to intermediate result dataset for later
                # aggregation
                res.a['roi_feature_ids'] = roi_fids
            results.append(res)

            # store the size of the roi dataset
            if not roi_sizes is None:
                roi_sizes.append(roi.nfeatures)

            if __debug__:
                debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
                    % (len(block),
                       f+1,
                       roi.nfeatures,
                       float(i+1)/len(block)*100,), cr=True)

        if self.results_backend == 'native':
            pass                        # nothing special
        elif self.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = tempfile.mktemp(prefix=self.tmp_prefix,
                                           suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, results)
            if __debug__:
                debug('SLC_', "Results stored")
            results = results_file
        else:
            raise RuntimeError("Must not reach this point")
        return results, roi_sizes
开发者ID:otizonaizit,项目名称:PyMVPA,代码行数:75,代码来源:searchlight.py

示例14: test_polydetrend

def test_polydetrend():
    samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6],
                                 [-2.0, -4, -6, -8, -10, -12]], ndmin=2 ).T
    samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1],
                                  [-2.0, -4, -6, -6, -4, -2]], ndmin=2 ).T
    chunks = [0, 0, 0, 1, 1, 1]
    chunks_bad = [ 0, 0, 1, 1, 1, 0]
    target_whole = np.array( [[-3.0, -2, -1, 1, 2, 3],
                             [-6, -4, -2,  2, 4, 6]], ndmin=2 ).T
    target_chunked = np.array( [[-1.0, 0, 1, 1, 0, -1],
                               [2, 0, -2, -2, 0, 2]], ndmin=2 ).T


    ds = Dataset(samples_forwhole)

    # this one will auto-train the mapper on first use
    dm = PolyDetrendMapper(polyord=1, space='police')
    mds = dm.forward(ds)
    # features are linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials
    assert_array_equal(mds.sa.police, np.arange(len(ds)))

    # hackish way to get the previous regressors into a dataset
    ds.sa['opt_reg_const'] = dm._regs[:,0]
    ds.sa['opt_reg_lin'] = dm._regs[:,1]
    # using these precomputed regressors, we should get the same result as
    # before even if we do not generate a regressor for linear
    dm_optreg = PolyDetrendMapper(polyord=0,
                                  opt_regs=['opt_reg_const', 'opt_reg_lin'])
    mds_optreg = dm_optreg.forward(ds)
    assert_array_almost_equal(mds_optreg, np.zeros(mds.shape))


    ds = Dataset(samples_forchunks)
    # 'constant' detrending removes the mean
    mds = PolyDetrendMapper(polyord=0).forward(ds)
    assert_array_almost_equal(
            mds.samples,
            samples_forchunks - np.mean(samples_forchunks, axis=0))
    # if there is no GLOBAL linear trend it should be identical to mean removal
    # even if trying to remove linear
    mds2 = PolyDetrendMapper(polyord=1).forward(ds)
    assert_array_almost_equal(mds, mds2)

    # chunk-wise detrending
    ds = dataset_wizard(samples_forchunks, chunks=chunks)
    dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='police')
    mds = dm.forward(ds)
    # features are chunkswise linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials, which is the identical linspace in both
    # chunks
    assert_array_equal(mds.sa.police, range(3) * 2)
    # non-matching number of samples cannot be mapped
    assert_raises(ValueError, dm.forward, ds[:-1])
    # however, if the dataset knows about the space it is possible
    ds.sa['police'] = mds.sa.police
    # XXX this should be
    #mds2 = dm(ds[1:-1])
    #assert_array_equal(mds[1:-1], mds2)
    # XXX but right now is
    assert_raises(NotImplementedError, dm.forward, ds[1:-1])

    # Detrend must preserve the size of dataset
    assert_equal(mds.shape, ds.shape)

    # small additional test for break points
    # although they are no longer there
    ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T,
                 targets=chunks, chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1).forward(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # test of different polyord on each chunk
    target_mixed = np.array( [[-1.0, 0, 1, 0, 0, 0],
                             [2.0, 0, -2, 0, 0, 0]], ndmin=2 ).T
    ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0,1]).forward(ds)
    assert_array_almost_equal(mds, target_mixed)

    # test irregluar spacing of samples, but with corrective time info
    samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9],
                                 [-2.0, -8, -12, -16, -4, -18]], ndmin=2 ).T
    ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:,0]})
    # linear detrending that makes use of temporal info from dataset
    dm = PolyDetrendMapper(polyord=1, space='time')
    mds = dm.forward(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # and now the same stuff, but with chunking and ordered by time
    samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1],
                                  [-2.0, -6, -6, -4, -4, -2]], ndmin=2 ).T
    chunks = [0, 1, 0, 1, 0, 1]
    time = [4, 4, 12, 8, 8, 12]
    ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time})
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, space='time').forward(ds)

#.........这里部分代码省略.........
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:101,代码来源:test_mapper_sp.py

示例15: test_datasetmapping

def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6,4,2)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
            (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(rds.samples,
                       np.array([[[ 0,  1], [ 2,  3], [ 4,  5], [ 6,  7]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[16, 17], [18, 19], [20, 21], [22, 23]],
                                 [[32, 33], [34, 35], [36, 37], [38, 39]],
                                 [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:65,代码来源:test_boxcarmapper.py


注:本文中的mvpa2.datasets.Dataset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。