本文整理汇总了Python中pylearn2.datasets.dense_design_matrix.DenseDesignMatrix.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python DenseDesignMatrix.__init__方法的具体用法?Python DenseDesignMatrix.__init__怎么用?Python DenseDesignMatrix.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pylearn2.datasets.dense_design_matrix.DenseDesignMatrix
的用法示例。
在下文中一共展示了DenseDesignMatrix.__init__方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
def __init__(self, which_set, data_path=None,
term_range=None, target_type='cluster100'):
"""
which_set: a string specifying which portion of the dataset
to load. Valid values are 'train', 'valid' or 'test'
data_path: a string specifying the directory containing the
webcluster data. If None (default), use environment
variable WEBCLUSTER_DATA_PATH.
term_range: a tuple for taking only a slice of the available
terms. Default is to use all 6275. For example, an input
range of (10,2000) will truncate the 10 most frequent terms
and the 6275-2000=4275 les frequent terms, whereby frequency
we mean how many unique documents each term is in.
target_type: the type of targets to use. Valid options are
'cluster[10,100,1000]'
"""
self.__dict__.update(locals())
del self.self
self.corpus_terms = None
self.doc_info = None
print "loading WebCluster DDM. which_set =", self.which_set
if self.data_path is None:
self.data_path \
= string_utils.preprocess('${WEBCLUSTER_DATA_PATH}')
fname = os.path.join(self.data_path, which_set+'_doc_inputs.npy')
X = np.load(fname)
if self.term_range is not None:
X = X[:,self.term_range[0]:self.term_range[1]]
X = X/X.sum(1).reshape(X.shape[0],1)
print X.sum(1).mean()
fname = os.path.join(self.data_path, which_set+'_doc_targets.npy')
# columns: 0:cluster10s, 1:cluster100s, 2:cluster1000s
self.cluster_hierarchy = np.load(fname)
y = None
if self.target_type == 'cluster10':
y = self.cluster_hierarchy[:,0]
elif self.target_type == 'cluster100':
y = self.cluster_hierarchy[:,1]
elif self.target_type == 'cluster1000':
y = self.cluster_hierarchy[:,2]
elif self.target_type is None:
pass
else:
raise NotImplementedError()
DenseDesignMatrix.__init__(self, X=X, y=y)
print "... WebCluster ddm loaded"
示例2: __init__
# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
def __init__(self, filename, X=None, topo_view=None, y=None,
load_all=False, **kwargs):
if 'preprocessor' in kwargs:
if ('fit_preprocessor' in kwargs and
kwargs['fit_preprocessor'] is False) or ('fit_preprocessor'
not in kwargs):
self._preprocessor = kwargs['preprocessor']
kwargs['preprocessor'] = None
else:
self._preprocessor = None
self.load_all = load_all
if h5py is None:
raise RuntimeError("Could not import h5py.")
self._file = h5py.File(filename)
if X is not None:
X = self.get_dataset(X, load_all)
if topo_view is not None:
topo_view = self.get_dataset(topo_view, load_all)
if y is not None:
y = self.get_dataset(y, load_all)
DenseDesignMatrix.__init__(self, X=X, topo_view=topo_view, y=y,
**kwargs)
示例3: __init__
# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
def __init__(self,
patient_id,
which_set,
list_features,
leave_out_seizure_idx_valid,
leave_out_seizure_idx_test,
data_dir,
preictal_sec,
use_all_nonictals,
preprocessor_dir,
n_selected_features=-1,
batch_size=None,
balance_class=True,
axes=('b', 0, 1, 'c'),
default_seed=0):
self.balance_class = balance_class
self.batch_size = batch_size
tmp_list_features = np.empty(len(list_features), dtype=object)
for f_idx in range(len(list_features)):
tmp_list_features[f_idx] = FeatureList.get_info(list_features[f_idx])
list_features = tmp_list_features
print 'List of features:'
for f in list_features:
print f['feature'] + '.' + f['param']
print ''
EpilepsiaeFeatureLoader.__init__(self,
patient_id=patient_id,
which_set=which_set,
list_features=list_features,
leave_out_seizure_idx_valid=leave_out_seizure_idx_valid,
leave_out_seizure_idx_test=leave_out_seizure_idx_test,
data_dir=data_dir,
preictal_sec=preictal_sec,
use_all_nonictals=use_all_nonictals)
# Row: samples, Col: features
raw_X, y = self.load_data()
if n_selected_features != -1:
all_rank_df = None
for f_idx, feature in enumerate(self.list_features):
rank_df = pd.read_csv(os.path.join(data_dir, patient_id +
'/rank_feature_idx_' + feature['param'] + '_' +
'leaveout_' + str(leave_out_seizure_idx_valid) + '_' +
str(leave_out_seizure_idx_test) + '.txt'))
if f_idx == 0:
all_rank_df = rank_df
else:
offset_f_idx = 0
for i in range(f_idx):
offset_f_idx = offset_f_idx + self.list_features[i]['n_features']
rank_df['feature_idx'] = rank_df['feature_idx'].values + offset_f_idx
all_rank_df = pd.concat([all_rank_df, rank_df])
sorted_feature_df = all_rank_df.sort(['D_ADH'], ascending=[0])
self.selected_feature_idx = sorted_feature_df['feature_idx'][:n_selected_features]
raw_X = raw_X[:, self.selected_feature_idx]
else:
self.selected_feature_idx = np.arange(raw_X.shape[1])
# Print shape of input data
print '------------------------------'
print 'Dataset: {0}'.format(self.which_set)
print 'Number of samples: {0}'.format(raw_X.shape[0])
print ' Preictal samples: {0}'.format(self.preictal_samples)
print ' Nonictal samples: {0}'.format(self.nonictal_samples)
print ' NaN samples: {0}'.format(self.nan_non_flat_samples)
print ' Note for ''train'' and ''valid_train'': number of samples will be equal without removing the nan samples.'
print 'Number of features: {0}'.format(raw_X.shape[1])
print '------------------------------'
# Preprocessing
if which_set == 'train':
scaler = preprocessing.StandardScaler()
# scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(raw_X)
with open(os.path.join(preprocessor_dir, self.patient_id + '_scaler_feature_' +
str(self.leave_out_seizure_idx_valid) + '_' +
str(self.leave_out_seizure_idx_test) + '.pkl'), 'wb') as f:
pickle.dump(scaler, f)
preprocessed_X = scaler.transform(raw_X)
else:
with open(os.path.join(preprocessor_dir, self.patient_id + '_scaler_feature_' +
str(self.leave_out_seizure_idx_valid) + '_' +
str(self.leave_out_seizure_idx_test) + '.pkl'), 'rb') as f:
scaler = pickle.load(f)
preprocessed_X = scaler.transform(raw_X)
raw_X = None
if self.which_set == 'train' or self.which_set == 'valid_train':
# Shuffle the data
print ''
print '*** Shuffle data ***'
#.........这里部分代码省略.........
示例4: __init__
# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
def __init__(self, patient_id, which_set, preprocessor_path, data_dir, transform, window_size, batch_size,
specified_files=None, leave_one_out_file=None, axes=('b', 0, 1, 'c'), default_seed=0):
"""
The CHBMIT dataset customized for leave-one-file-out cross validation.
Parameters
----------
patient_id : int
Patient ID.
which_set : string
Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
If not specified, all data will be loaded.
preprocessor_path : string
File path to store the scaler for pre-processing the EEG data.
data_dir : string
Directory that store the source EEG data.
transform : string
Specify how to transform the data. ('multiple_channels' | 'single_channel')
window_size : int
Size of each sample.
batch_size : int
Size of the batch, used for zero-padding to make the the number samples dividable by the batch size.
specified_files : dictionary
Dictionary to specified which files are used for training, validation and testing.
leave_one_out_file : int
Index of the withheld file.
axes : tuple
axes of the DenseDesignMatrix.
default_seed : int, optional
Seed for random.
For preprocessing, see more in
https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py
For customizing dataset, see more in
https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py
"""
self.patient_id = patient_id
self.data_dir = data_dir
self.preprocessor_path = preprocessor_path
self.window_size = window_size
self.n_classes = 2
self.default_seed = default_seed
self.transform = transform
self.specified_files = specified_files
self.leave_one_out_file = leave_one_out_file
self.batch_size = batch_size
raw_X, raw_y = self._load_data(which_set=which_set)
self.raw_X = raw_X
self.raw_y = raw_y
# Filter representative channels
if not(self.rep_channel_matlab_idx.get(patient_id) is None):
# Map the representative MATLAB index to python index
# Also the raw_data read from the .mat file has already removed inactive channels
# So we need to search for the match original index with MATLAB index
# Then transfer to the python index
self.rep_channel_python_idx = np.empty(0, dtype=int)
for ch in self.rep_channel_matlab_idx[patient_id]:
if ch in self.used_channel_matlab_idx:
ch_python_idx = np.where(ch == self.used_channel_matlab_idx)[0]
self.rep_channel_python_idx = np.append(self.rep_channel_python_idx, ch_python_idx)
else:
raise Exception('There is no representative channel ' + str(ch) + ' in the input data.')
assert np.all(self.used_channel_matlab_idx[self.rep_channel_python_idx] ==
self.rep_channel_matlab_idx[patient_id])
raw_X = raw_X[self.rep_channel_python_idx, :]
self.n_channels = self.rep_channel_python_idx.size
print 'Used channel MATLAB index:', self.used_channel_matlab_idx
print 'Representative channel MATLAB index:', self.rep_channel_matlab_idx[patient_id]
print 'Representative channel Python index:', self.rep_channel_python_idx
self.sample_shape = [self.window_size, 1, self.n_channels]
self.sample_size = np.prod(self.sample_shape)
# Preprocessing
if which_set == 'train':
scaler = preprocessing.StandardScaler()
scaler = scaler.fit(raw_X.transpose())
with open(self.preprocessor_path, 'w') as f:
pickle.dump(scaler, f)
scaled_X = scaler.transform(raw_X.transpose()).transpose()
else:
with open(self.preprocessor_path) as f:
scaler = pickle.load(f)
scaled_X = scaler.transform(raw_X.transpose()).transpose()
# Transform data into format usable by the network
if self.transform == 'multiple_channels':
X, y, view_converter = self._transform_multi_channel_data(X=scaled_X, y=raw_y)
elif self.transform == 'single_channel':
#.........这里部分代码省略.........
示例5: __init__
# 需要导入模块: from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix [as 别名]
# 或者: from pylearn2.datasets.dense_design_matrix.DenseDesignMatrix import __init__ [as 别名]
def __init__(self, patient_id, which_set, preprocessor_path, data_dir,
leave_one_out_seizure, sample_size_second, batch_size,
default_seed=0):
"""
The Epilepsiae dataset customized for leave-one-seizure-out cross validation.
Parameters
----------
patient_id : int
Patient ID.
which_set : string
Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
If not specified, all data will be loaded.
preprocessor_path : string
File path to store the scaler for pre-processing the EEG data.
data_dir : string
Directory that store the source EEG data.
leave_one_out_seizure : int
Index of the withheld seizure.
sample_size_second : int
Number of seconds used to specify sample size.
batch_size : int
Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
default_seed : int, optional
Seed for random.
For preprocessing, see more in
https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py
For customizing dataset, see more in
https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py
"""
# Load data
files = ['rec_26402102/26402102_0003.mat',
'rec_26402102/26402102_0007.mat',
'rec_26402102/26402102_0008.mat',
'rec_26402102/26402102_0017.mat']
scalp_channels = np.asarray([ u'FP1',
u'FP2',
u'F3',
u'F4',
u'C3',
u'C4',
u'P3',
u'P4',
u'O1',
u'O2',
u'F7',
u'F8',
u'T3',
u'T4',
u'T5',
u'T6',
u'FZ',
u'CZ',
u'PZ' ])
# Get seizure information
seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t')
seizure_info['filename'] = seizure_info['filename'].str.replace('.data', '.mat', case=False)
self.data_dir = data_dir
self.files = files
self.seizure_info = seizure_info
self.filter_channels = scalp_channels
self.default_seed = default_seed
self.leave_one_out_seizure = leave_one_out_seizure
self.batch_size = batch_size
X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path)
self.n_channels = n_channels
self.sample_size = sample_size
view_converter = DefaultViewConverter((1, sample_size, 1))
view_converter.set_axes(axes=['b', 0, 1, 'c'])
DenseDesignMatrix.__init__(self, X=X, y=y,
view_converter=view_converter,
axes=['b', 0, 1, 'c'])