当前位置: 首页>>代码示例>>Python>>正文


Python DenseDesignMatrix.__init__方法代码示例

本文整理汇总了Python中pylearn2.datasets.dense_design_matrix.DenseDesignMatrix.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python DenseDesignMatrix.__init__方法的具体用法?Python DenseDesignMatrix.__init__怎么用?Python DenseDesignMatrix.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pylearn2.datasets.dense_design_matrix.DenseDesignMatrix的用法示例。


在下文中一共展示了DenseDesignMatrix.__init__方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
 def __init__(self, which_set, data_path=None, 
              term_range=None, target_type='cluster100'):
     """
     which_set: a string specifying which portion of the dataset
         to load. Valid values are 'train', 'valid' or 'test'
     data_path: a string specifying the directory containing the 
         webcluster data. If None (default), use environment 
         variable WEBCLUSTER_DATA_PATH.
     term_range: a tuple for taking only a slice of the available
         terms. Default is to use all 6275. For example, an input
         range of (10,2000) will truncate the 10 most frequent terms
         and the 6275-2000=4275 les frequent terms, whereby frequency
         we mean how many unique documents each term is in.
     target_type: the type of targets to use. Valid options are 
         'cluster[10,100,1000]'
     """
     self.__dict__.update(locals())
     del self.self
     
     self.corpus_terms = None
     self.doc_info = None
     
     print "loading WebCluster DDM. which_set =", self.which_set
     
     if self.data_path is None:
         self.data_path \
             = string_utils.preprocess('${WEBCLUSTER_DATA_PATH}')
     
     fname = os.path.join(self.data_path, which_set+'_doc_inputs.npy')
     X = np.load(fname)
     if self.term_range is not None:
         X = X[:,self.term_range[0]:self.term_range[1]]
         X = X/X.sum(1).reshape(X.shape[0],1)
     print X.sum(1).mean()
     
     fname = os.path.join(self.data_path, which_set+'_doc_targets.npy')
     # columns: 0:cluster10s, 1:cluster100s, 2:cluster1000s
     self.cluster_hierarchy = np.load(fname)
     
     y = None
     if self.target_type == 'cluster10':
         y = self.cluster_hierarchy[:,0]
     elif self.target_type == 'cluster100':
         y = self.cluster_hierarchy[:,1]
     elif self.target_type == 'cluster1000':
         y = self.cluster_hierarchy[:,2]
     elif self.target_type is None:
         pass
     else:
         raise NotImplementedError()
     
     DenseDesignMatrix.__init__(self, X=X, y=y)
     
     print "... WebCluster ddm loaded"
开发者ID:nicholas-leonard,项目名称:delicious,代码行数:56,代码来源:webcluster.py

示例2: __init__

# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
 def __init__(self, filename, X=None, topo_view=None, y=None,
              load_all=False, **kwargs):
     if 'preprocessor' in kwargs:
         if ('fit_preprocessor' in kwargs and 
             kwargs['fit_preprocessor'] is False) or ('fit_preprocessor' 
                                                      not in kwargs):
             self._preprocessor = kwargs['preprocessor']
             kwargs['preprocessor'] = None
     else:
         self._preprocessor = None
     self.load_all = load_all
     if h5py is None:
         raise RuntimeError("Could not import h5py.")
     self._file = h5py.File(filename)
     if X is not None:
         X = self.get_dataset(X, load_all)
     if topo_view is not None:
         topo_view = self.get_dataset(topo_view, load_all)
     if y is not None:
         y = self.get_dataset(y, load_all)
     DenseDesignMatrix.__init__(self, X=X, topo_view=topo_view, y=y,
                                **kwargs)
开发者ID:everglory99,项目名称:deepAutoController,代码行数:24,代码来源:icmc.py

示例3: __init__

# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
    def __init__(self,
                 patient_id,
                 which_set,
                 list_features,
                 leave_out_seizure_idx_valid,
                 leave_out_seizure_idx_test,
                 data_dir,
                 preictal_sec,
                 use_all_nonictals,
                 preprocessor_dir,
                 n_selected_features=-1,
                 batch_size=None,
                 balance_class=True,
                 axes=('b', 0, 1, 'c'),
                 default_seed=0):

        self.balance_class = balance_class
        self.batch_size = batch_size

        tmp_list_features = np.empty(len(list_features), dtype=object)
        for f_idx in range(len(list_features)):
            tmp_list_features[f_idx] = FeatureList.get_info(list_features[f_idx])
        list_features = tmp_list_features

        print 'List of features:'
        for f in list_features:
            print f['feature'] + '.' + f['param']
        print ''

        EpilepsiaeFeatureLoader.__init__(self,
                                         patient_id=patient_id,
                                         which_set=which_set,
                                         list_features=list_features,
                                         leave_out_seizure_idx_valid=leave_out_seizure_idx_valid,
                                         leave_out_seizure_idx_test=leave_out_seizure_idx_test,
                                         data_dir=data_dir,
                                         preictal_sec=preictal_sec,
                                         use_all_nonictals=use_all_nonictals)
        # Row: samples, Col: features
        raw_X, y = self.load_data()

        if n_selected_features != -1:
            all_rank_df = None
            for f_idx, feature in enumerate(self.list_features):
                rank_df = pd.read_csv(os.path.join(data_dir, patient_id +
                                                 '/rank_feature_idx_' + feature['param'] + '_' +
                                                 'leaveout_' + str(leave_out_seizure_idx_valid) + '_' +
                                                 str(leave_out_seizure_idx_test) + '.txt'))
                if f_idx == 0:
                    all_rank_df = rank_df
                else:
                    offset_f_idx = 0
                    for i in range(f_idx):
                        offset_f_idx = offset_f_idx + self.list_features[i]['n_features']
                    rank_df['feature_idx'] = rank_df['feature_idx'].values + offset_f_idx
                    all_rank_df = pd.concat([all_rank_df, rank_df])

            sorted_feature_df = all_rank_df.sort(['D_ADH'], ascending=[0])
            self.selected_feature_idx = sorted_feature_df['feature_idx'][:n_selected_features]
            raw_X = raw_X[:, self.selected_feature_idx]
        else:
            self.selected_feature_idx = np.arange(raw_X.shape[1])

        # Print shape of input data
        print '------------------------------'
        print 'Dataset: {0}'.format(self.which_set)
        print 'Number of samples: {0}'.format(raw_X.shape[0])
        print ' Preictal samples: {0}'.format(self.preictal_samples)
        print ' Nonictal samples: {0}'.format(self.nonictal_samples)
        print ' NaN samples: {0}'.format(self.nan_non_flat_samples)
        print ' Note for ''train'' and ''valid_train'': number of samples will be equal without removing the nan samples.'
        print 'Number of features: {0}'.format(raw_X.shape[1])
        print '------------------------------'

        # Preprocessing
        if which_set == 'train':
            scaler = preprocessing.StandardScaler()
            # scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
            scaler = scaler.fit(raw_X)

            with open(os.path.join(preprocessor_dir, self.patient_id + '_scaler_feature_' +
                                                     str(self.leave_out_seizure_idx_valid) + '_' +
                                                     str(self.leave_out_seizure_idx_test) + '.pkl'), 'wb') as f:
                pickle.dump(scaler, f)

            preprocessed_X = scaler.transform(raw_X)
        else:
            with open(os.path.join(preprocessor_dir, self.patient_id + '_scaler_feature_' +
                                                     str(self.leave_out_seizure_idx_valid) + '_' +
                                                     str(self.leave_out_seizure_idx_test) + '.pkl'), 'rb') as f:
                scaler = pickle.load(f)

            preprocessed_X = scaler.transform(raw_X)

        raw_X = None

        if self.which_set == 'train' or self.which_set == 'valid_train':
            # Shuffle the data
            print ''
            print '*** Shuffle data ***'
#.........这里部分代码省略.........
开发者ID:akaraspt,项目名称:epilepsy-system,代码行数:103,代码来源:epilepsiae.py

示例4: __init__

# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
    def __init__(self, patient_id, which_set, preprocessor_path, data_dir, transform, window_size, batch_size,
                 specified_files=None, leave_one_out_file=None, axes=('b', 0, 1, 'c'), default_seed=0):
        """
        The CHBMIT dataset customized for leave-one-file-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        transform : string
            Specify how to transform the data. ('multiple_channels' | 'single_channel')
        window_size : int
            Size of each sample.
        batch_size : int
            Size of the batch, used for zero-padding to make the the number samples dividable by the batch size.
        specified_files : dictionary
            Dictionary to specified which files are used for training, validation and testing.
        leave_one_out_file : int
            Index of the withheld file.
        axes : tuple
            axes of the DenseDesignMatrix.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        self.patient_id = patient_id
        self.data_dir = data_dir
        self.preprocessor_path = preprocessor_path
        self.window_size = window_size
        self.n_classes = 2
        self.default_seed = default_seed
        self.transform = transform
        self.specified_files = specified_files
        self.leave_one_out_file = leave_one_out_file
        self.batch_size = batch_size

        raw_X, raw_y = self._load_data(which_set=which_set)

        self.raw_X = raw_X
        self.raw_y = raw_y

        # Filter representative channels
        if not(self.rep_channel_matlab_idx.get(patient_id) is None):
            # Map the representative MATLAB index to python index
            # Also the raw_data read from the .mat file has already removed inactive channels
            # So we need to search for the match original index with MATLAB index
            # Then transfer to the python index
            self.rep_channel_python_idx = np.empty(0, dtype=int)
            for ch in self.rep_channel_matlab_idx[patient_id]:
                if ch in self.used_channel_matlab_idx:
                    ch_python_idx = np.where(ch == self.used_channel_matlab_idx)[0]
                    self.rep_channel_python_idx = np.append(self.rep_channel_python_idx, ch_python_idx)
                else:
                    raise Exception('There is no representative channel ' + str(ch) + ' in the input data.')
            assert np.all(self.used_channel_matlab_idx[self.rep_channel_python_idx] ==
                          self.rep_channel_matlab_idx[patient_id])

            raw_X = raw_X[self.rep_channel_python_idx, :]
            self.n_channels = self.rep_channel_python_idx.size

            print 'Used channel MATLAB index:', self.used_channel_matlab_idx
            print 'Representative channel MATLAB index:', self.rep_channel_matlab_idx[patient_id]
            print 'Representative channel Python index:', self.rep_channel_python_idx

        self.sample_shape = [self.window_size, 1, self.n_channels]
        self.sample_size = np.prod(self.sample_shape)

        # Preprocessing
        if which_set == 'train':
            scaler = preprocessing.StandardScaler()
            scaler = scaler.fit(raw_X.transpose())

            with open(self.preprocessor_path, 'w') as f:
                pickle.dump(scaler, f)

            scaled_X = scaler.transform(raw_X.transpose()).transpose()
        else:
            with open(self.preprocessor_path) as f:
                scaler = pickle.load(f)

            scaled_X = scaler.transform(raw_X.transpose()).transpose()

        # Transform data into format usable by the network
        if self.transform == 'multiple_channels':
            X, y, view_converter = self._transform_multi_channel_data(X=scaled_X, y=raw_y)
        elif self.transform == 'single_channel':
#.........这里部分代码省略.........
开发者ID:akaraspt,项目名称:epilepsy-system,代码行数:103,代码来源:chbmit.py

示例5: __init__

# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
    def __init__(self, patient_id, which_set, preprocessor_path, data_dir,
                 leave_one_out_seizure, sample_size_second, batch_size,
                 default_seed=0):
        """
        The Epilepsiae dataset customized for leave-one-seizure-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        leave_one_out_seizure : int
            Index of the withheld seizure.
        sample_size_second : int
            Number of seconds used to specify sample size.
        batch_size : int
            Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        # Load data
        files = ['rec_26402102/26402102_0003.mat',
                 'rec_26402102/26402102_0007.mat',
                 'rec_26402102/26402102_0008.mat',
                 'rec_26402102/26402102_0017.mat']
        scalp_channels = np.asarray([   u'FP1',
                                        u'FP2',
                                        u'F3',
                                        u'F4',
                                        u'C3',
                                        u'C4',
                                        u'P3',
                                        u'P4',
                                        u'O1',
                                        u'O2',
                                        u'F7',
                                        u'F8',
                                        u'T3',
                                        u'T4',
                                        u'T5',
                                        u'T6',
                                        u'FZ',
                                        u'CZ',
                                        u'PZ'   ])
        # Get seizure information
        seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t')
        seizure_info['filename'] = seizure_info['filename'].str.replace('.data', '.mat', case=False)

        self.data_dir = data_dir
        self.files = files
        self.seizure_info = seizure_info
        self.filter_channels = scalp_channels
        self.default_seed = default_seed
        self.leave_one_out_seizure = leave_one_out_seizure
        self.batch_size = batch_size

        X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path)
        self.n_channels = n_channels
        self.sample_size = sample_size

        view_converter = DefaultViewConverter((1, sample_size, 1))
        view_converter.set_axes(axes=['b', 0, 1, 'c'])

        DenseDesignMatrix.__init__(self, X=X, y=y,
                                   view_converter=view_converter,
                                   axes=['b', 0, 1, 'c'])
开发者ID:akaraspt,项目名称:epilepsy-system,代码行数:82,代码来源:epilepsiae.py


注:本文中的pylearn2.datasets.dense_design_matrix.DenseDesignMatrix.__init__方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。