本文整理汇总了Python中pylearn2.format.target_format.OneHotFormatter.format方法的典型用法代码示例。如果您正苦于以下问题:Python OneHotFormatter.format方法的具体用法?Python OneHotFormatter.format怎么用?Python OneHotFormatter.format使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pylearn2.format.target_format.OneHotFormatter
的用法示例。
在下文中一共展示了OneHotFormatter.format方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_dtype_errors
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def test_dtype_errors():
# Try to call theano_expr with a bad label dtype.
raised = False
fmt = OneHotFormatter(max_labels=50)
try:
fmt.theano_expr(theano.tensor.vector(dtype=theano.config.floatX))
except TypeError:
raised = True
assert raised
# Try to call format with a bad label dtype.
raised = False
try:
fmt.format(numpy.zeros(10, dtype='float64'))
except TypeError:
raised = True
assert raised
示例2: check_one_hot_formatter
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def check_one_hot_formatter(seed, max_labels, dtype, ncases):
rng = numpy.random.RandomState(seed)
fmt = OneHotFormatter(max_labels=max_labels, dtype=dtype)
integer_labels = rng.random_integers(0, max_labels - 1, size=ncases)
one_hot_labels = fmt.format(integer_labels)
assert len(zip(*one_hot_labels.nonzero())) == ncases
for case, label in enumerate(integer_labels):
assert one_hot_labels[case, label] == 1
示例3: test_bad_arguments
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def test_bad_arguments():
# Make sure an invalid max_labels raises an error.
raised = False
try:
fmt = OneHotFormatter(max_labels=-10)
except ValueError:
raised = True
assert raised
raised = False
try:
fmt = OneHotFormatter(max_labels='10')
except ValueError:
raised = True
assert raised
# Make sure an invalid dtype identifier raises an error.
raised = False
try:
fmt = OneHotFormatter(max_labels=10, dtype='invalid')
except TypeError:
raised = True
assert raised
# Make sure an invalid ndim raises an error for format().
fmt = OneHotFormatter(max_labels=10)
raised = False
try:
fmt.format(numpy.zeros((2, 3), dtype='int32'))
except ValueError:
raised = True
assert raised
# Make sure an invalid ndim raises an error for theano_expr().
raised = False
try:
fmt.theano_expr(theano.tensor.imatrix())
except ValueError:
raised = True
assert raised
示例4: generate_datasets
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def generate_datasets(inputs):
targets = np.zeros(inputs.shape[0]).astype('int')
targets[::2] = 1 # every second target is class 1 others class 0
inputs[targets == 1] = inputs[targets == 1] + 1
target_formatter = OneHotFormatter(2)
targets_one_hot = target_formatter.format(targets)
train_set = VolumetricDenseDesignMatrix(topo_view=inputs[0:50],
y=targets_one_hot[0:50], axes=('b', 0, 1, 2, 'c'))
valid_set = VolumetricDenseDesignMatrix(topo_view=inputs[50:75],
y=targets_one_hot[50:75], axes=('b', 0, 1, 2, 'c'))
test_set = VolumetricDenseDesignMatrix(topo_view=inputs[75:100],
y=targets_one_hot[75:100], axes=('b', 0, 1, 2, 'c'))
return train_set, valid_set, test_set
示例5: test_one_hot_formatter_simple
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def test_one_hot_formatter_simple():
def check_one_hot_formatter(seed, max_labels, dtype, ncases):
rng = numpy.random.RandomState(seed)
fmt = OneHotFormatter(max_labels=max_labels, dtype=dtype)
integer_labels = rng.random_integers(0, max_labels - 1, size=ncases)
one_hot_labels = fmt.format(integer_labels)
assert len(list(zip(*one_hot_labels.nonzero()))) == ncases
for case, label in enumerate(integer_labels):
assert one_hot_labels[case, label] == 1
rng = numpy.random.RandomState(0)
for seed, dtype in enumerate(all_types):
yield (check_one_hot_formatter, seed, rng.random_integers(1, 30), dtype, rng.random_integers(1, 100))
fmt = OneHotFormatter(max_labels=10)
assert fmt.format(numpy.zeros((1, 1), dtype="uint8")).shape == (1, 1, 10)
示例6: _transform_single_channel_data
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def _transform_single_channel_data(self, X, y):
windowed_X = np.reshape(X, (-1, self.window_size))
windowed_y = np.reshape(y, (-1, self.window_size))
# Format the target into proper format
sum_y = np.sum(windowed_y, axis=1)
sum_y[sum_y > 0] = 1
# Duplicate the labels for all channels
dup_y = np.tile(sum_y, self.n_channels)
one_hot_formatter = OneHotFormatter(max_labels=self.n_classes)
hot_y = one_hot_formatter.format(dup_y)
return windowed_X, hot_y, None
示例7: ConditionalGeneratorTestCase
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
class ConditionalGeneratorTestCase(unittest.TestCase):
def setUp(self):
self.noise_dim = 10
self.num_labels = 10
self.condition_dtype = 'uint8'
self.condition_space = VectorSpace(dim=self.num_labels, dtype=self.condition_dtype)
self.condition_formatter = OneHotFormatter(self.num_labels, dtype=self.condition_dtype)
self.condition_distribution = OneHotDistribution(self.condition_space)
# TODO this nvis stuff is dirty. The ConditionalGenerator should handle it
self.mlp_nvis = self.noise_dim + self.num_labels
self.mlp_nout = 1
# Set up model
self.mlp = MLP(nvis=self.mlp_nvis, layers=[Linear(self.mlp_nout, 'out', irange=0.1)])
self.G = ConditionalGenerator(input_condition_space=self.condition_space,
condition_distribution=self.condition_distribution,
noise_dim=self.noise_dim,
mlp=self.mlp)
def test_conditional_generator_input_setup(self):
"""Check that conditional generator correctly sets up composite
input layer."""
# Feedforward: We want the net to ignore the noise and simply
# convert the one-hot vector to a number
weights = np.concatenate([np.zeros((self.mlp_nout, self.noise_dim)),
np.array(range(self.num_labels)).reshape((1, -1)).repeat(self.mlp_nout, axis=0)],
axis=1).T.astype(theano.config.floatX)
self.mlp.layers[0].set_weights(weights)
inp = (T.matrix(), T.matrix(dtype=self.condition_dtype))
f = theano.function(inp, self.G.mlp.fprop(inp))
assert_array_equal(
f(np.random.rand(self.num_labels, self.noise_dim).astype(theano.config.floatX),
self.condition_formatter.format(np.array(range(self.num_labels)))),
np.array(range(self.num_labels)).reshape(self.num_labels, 1))
def test_sample_noise(self):
"""Test barebones noise sampling."""
n = T.iscalar()
cond_inp = self.condition_distribution.sample(n)
sample_and_noise = theano.function([n], self.G.sample_and_noise(cond_inp, all_g_layers=True)[1])
print sample_and_noise(15)
示例8: check_one_hot_formatter
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def check_one_hot_formatter(seed, max_labels, dtype, ncases, nmultis):
rng = numpy.random.RandomState(seed)
fmt = OneHotFormatter(max_labels=max_labels, dtype=dtype)
integer_labels = rng.random_integers(0, max_labels - 1, size=ncases * nmultis).reshape(ncases, nmultis)
one_hot_labels = fmt.format(integer_labels, mode="merge")
# n_ones was expected to be equal to ncases * nmultis if integer_labels
# do not contain duplicated tags. (i.e., those labels like
# [1, 2, 2, 3, 5, 6].) Because that we are not depreciating this kind
# of duplicated labels, which allows different cases belong to
# different number of classes, and those duplicated tags will only
# activate one neuron in the k-hot representation, we need to use
# numpy.unique() here to eliminate those duplications while counting
# "1"s in the final k-hot representation.
n_ones = numpy.concatenate([numpy.unique(l) for l in integer_labels])
assert len(list(zip(*one_hot_labels.nonzero()))) == len(n_ones)
for case, label in enumerate(integer_labels):
assert numpy.sum(one_hot_labels[case, label]) == nmultis
示例9: _transform_multi_channel_data
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def _transform_multi_channel_data(self, X, y):
# Data partitioning
parted_X, parted_y = self._partition_data(X=X, y=y, partition_size=self.window_size)
transposed_X = np.transpose(parted_X, [0, 2, 1])
converted_X = np.reshape(transposed_X, (transposed_X.shape[0],
transposed_X.shape[1],
1,
transposed_X.shape[2]))
# Create view converter
view_converter = DefaultViewConverter(shape=self.sample_shape,
axes=('b', 0, 1, 'c'))
# Convert data into a design matrix
view_converted_X = view_converter.topo_view_to_design_mat(converted_X)
assert np.all(converted_X == view_converter.design_mat_to_topo_view(view_converted_X))
# Format the target into proper format
sum_y = np.sum(parted_y, axis=1)
sum_y[sum_y > 0] = 1
one_hot_formatter = OneHotFormatter(max_labels=self.n_classes)
hot_y = one_hot_formatter.format(sum_y)
return view_converted_X, hot_y, view_converter
示例10: __init__
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
#.........这里部分代码省略.........
# keep only files that match the metadata filters
self.datafiles = apply_filters([subjects,trial_types,trial_numbers,conditions], datafiles_metadata)
# copy metadata for retained files
self.metadb = {}
for datafile in self.datafiles:
self.metadb[datafile] = metadb[datafile]
# print self.datafiles
# print self.metadb
self.name = name
if partitioner is not None:
self.datafiles = partitioner.get_partition(self.name, self.metadb)
self.include_phase = include_phase
self.spectrum_normalization_mode = spectrum_normalization_mode
self.spectrum_log_amplitude = spectrum_log_amplitude
self.sequence_partitions = [] # used to keep track of original sequences
# metadata: [subject, trial_no, stimulus, channel, start, ]
self.metadata = []
sequences = []
labels = []
n_sequences = 0
if frame_size > 0 and hop_size == -1 and hop_fraction is not None:
hop_size = np.ceil(frame_size / hop_fraction)
for i in xrange(len(self.datafiles)):
with log_timing(log, 'loading data from {}'.format(self.datafiles[i])):
# save start of next sequence
self.sequence_partitions.append(n_sequences)
data, metadata = load(os.path.join(path, self.datafiles[i]))
label = metadata['label']
if label_map is not None:
label = label_map[label]
multi_channel_frames = []
# process 1 channel at a time
for channel in xrange(data.shape[1]):
# filter channels
if not channel_filter.keep_channel(channel):
continue
samples = data[:, channel]
# subtract channel mean
if remove_dc_offset:
samples -= samples.mean()
# down-sample if requested
if resample is not None and resample[0] != resample[1]:
samples = librosa.resample(samples, resample[0], resample[1])
# apply optional signal filter after down-sampling -> requires lower order
if signal_filter is not None:
samples = signal_filter.process(samples)
示例11: __init__
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def __init__(self,
db, # data source
name = '', # optional name
selectors = dict(),
partitioner = None,
meta_sources = [], # optional sources other than 'features' and 'targets' from metadata
channel_filter = NoChannelFilter(), # optional channel filter, default: keep all
channel_names = None, # optional channel names (for metadata)
label_attribute = 'label', # metadata attribute to be used as label
label_map = None, # optional conversion of labels
use_targets = True, # use targets if provides, otherwise labels are used
remove_dc_offset = False, # optional subtraction of channel mean, usually done already earlier
resample = None, # optional down-sampling
normalize = True, # normalize to max=1
# optional sub-sequences selection
start_sample = 0,
stop_sample = None, # optional for selection of sub-sequences
zero_padding = True, # if True (default) trials that are too short will be padded with
# otherwise they will rejected.
# optional signal filter to by applied before splitting the signal
signal_filter = None,
trial_processors = [], # optional processing of the trials
target_processor = None, # optional processing of the targets, e.g. zero-padding
transformers = [], # optional transformations of the dataset
layout='tf', # (0,1)-axes layout tf=time x features or ft=features x time
debug=False,
):
'''
Constructor
'''
# save params
self.params = locals().copy()
del self.params['self']
# print self.params
self.name = name
self.debug = debug
metadb = DatasetMetaDB(db.metadata, selectors.keys())
if partitioner is not None:
pass # FIXME
selected_trial_ids = metadb.select(selectors)
log.info('selectors: {}'.format(selectors))
log.info('selected trials: {}'.format(selected_trial_ids))
if normalize:
log.info('Data will be normalized to max amplitude 1 per channel (normalize=True).')
trials = list()
labels = list()
targets = list()
meta = list()
if stop_sample == 'auto-min':
stop_sample = np.min([db.data[trial_i].shape[-1] for trial_i in selected_trial_ids])
log.info('Using minimum trial length. stop_sample={}'.format(stop_sample))
elif stop_sample == 'auto-max':
stop_sample = np.max([db.data[trial_i].shape[-1] for trial_i in selected_trial_ids])
log.info('Using maximum trial length. stop_sample={}'.format(stop_sample))
for trial_i in selected_trial_ids:
trial_meta = db.metadata[trial_i]
if use_targets:
if targets is None:
target = None
else:
target = db.targets[trial_i]
assert not np.isnan(np.sum(target))
if target_processor is not None:
target = target_processor.process(target, trial_meta)
assert not np.isnan(np.sum(target))
else:
# get and process label
label = db.metadata[trial_i][label_attribute]
if label_map is not None:
label = label_map[label]
processed_trial = []
trial = db.data[trial_i]
if np.isnan(np.sum(trial)):
#.........这里部分代码省略.........
示例12: OneHotFormatter
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
# Samples per condition
sample_cols = 5
# Generate conditional information
conditional_batch = model.generator.condition_space.make_theano_batch()
formatter = OneHotFormatter(rows,
dtype=model.generator.condition_space.dtype)
conditional = formatter.theano_expr(conditional_batch, mode='concatenate')
# Now sample from generator
# For some reason format_as from VectorSpace is not working right
topo_samples_batch = model.generator.sample(conditional)
topo_sample_f = theano.function([conditional], topo_samples_batch)
conditional_data = formatter.format(np.concatenate([np.repeat(i, sample_cols) for i in range(rows)])
.reshape((rows * sample_cols, 1)),
mode='concatenate')
topo_samples = topo_sample_f(conditional_data)
samples = dataset.get_design_matrix(topo_samples)
dataset.axes = ['b', 0, 1, 'c']
dataset.view_converter.axes = ['b', 0, 1, 'c']
topo_samples = dataset.get_topological_view(samples)
pv = PatchViewer(grid_shape=(rows, sample_cols + 1), patch_shape=(32,32),
is_color=True)
scale = np.abs(samples).max()
X = dataset.X
topo = dataset.get_topological_view()
index = 0
示例13: load_data
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def load_data(self):
# Get the directory of the patient data
patient_dir = os.path.join(self.data_dir, self.patient_id)
# Load metadata about dataset form MAT file
metadata_fname = os.path.join(patient_dir, 'trainset_' + str(self.preictal_sec) + '.mat')
metadata_mat = loadmat(metadata_fname)
# Get number of seizures
self.n_seizures = metadata_mat.get('ictals').size
# Get detail of the segment
self.sampling_rate = metadata_mat['sampling_rate'][0][0]
self.segment_sec = metadata_mat['segment_sec'][0][0]
self.segment_samples = self.sampling_rate * self.segment_sec
# Get the number blocks to extend from the withheld seizure
self.n_extended_blocks_test = metadata_mat['n_extended_blocks_test'][0][0]
self.preictal_samples = 0
self.nonictal_samples = 0
self.nan_non_flat_samples = 0
# Examples of indexing through MAT file
# mat['nonictals'][i][0]['filename'][0][0][0][j][0]
# mat['nonictals'][i][0]['idx'][0][0][0][j][0]
# mat['nonictals'][i][0]['n_segments'][0][0][0][0]
# Load shuffle data
if self.which_set == 'train' or self.which_set == 'valid_train':
if self.which_set == 'train':
select_idx = np.setdiff1d(range(metadata_mat['preictals'].size),
np.asarray([self.leave_out_seizure_idx_valid,
self.leave_out_seizure_idx_test]))
else:
select_idx = np.asarray([self.leave_out_seizure_idx_valid])
X = None
y = None
if self.use_all_nonictals:
temp_preictal_X = None
for i in select_idx:
print '====== Seizure', i, '======'
# Pre-ictal
temp_X = self.load_feature(part='preictals',
list_features=self.list_features,
seizure_idx=i,
metadata_mat=metadata_mat,
patient_dir=patient_dir)
if not (temp_preictal_X is None):
temp_preictal_X = np.concatenate((temp_preictal_X, temp_X), axis=1)
else:
temp_preictal_X = temp_X
self.preictal_samples = temp_preictal_X.shape[1]
# Non-ictal data
temp_nonictal_X = self.load_feature(part='nonictals_all',
list_features=self.list_features,
seizure_idx=self.leave_out_seizure_idx_test,
metadata_mat=metadata_mat,
patient_dir=patient_dir)
X = np.concatenate((temp_preictal_X, temp_nonictal_X), axis=1)
y = np.zeros(X.shape[1], dtype=int)
y[range(self.preictal_samples)] = 1
self.nonictal_samples = temp_nonictal_X.shape[1]
print 'Preictal samples: {0}, Nonictal samples: {1}'.format(self.preictal_samples,
self.nonictal_samples)
if not np.all(np.arange(self.preictal_samples) == np.where(y)[0]):
raise Exception('There is a mismatch between the number of preictal data and labels.')
else:
for i in select_idx:
print '====== Seizure', i, '======'
# Non-ictal data
temp_nonictal_X = self.load_feature(part='nonictals',
list_features=self.list_features,
seizure_idx=i,
metadata_mat=metadata_mat,
patient_dir=patient_dir)
# Pre-ictal
temp_preictal_X = self.load_feature(part='preictals',
list_features=self.list_features,
seizure_idx=i,
metadata_mat=metadata_mat,
patient_dir=patient_dir)
# Concatenate preictal and nonictal data
temp_X = np.concatenate((temp_preictal_X, temp_nonictal_X), axis=1)
temp_y = np.zeros(temp_X.shape[1], dtype=int)
temp_y[range(temp_preictal_X.shape[1])] = 1
#.........这里部分代码省略.........
示例14: load_data
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
#.........这里部分代码省略.........
# Leave-one-out cross-validation - seizure
n_seizures = seizure_range_idx.shape[0]
rest_seizure_idx = np.setdiff1d(np.arange(n_seizures), test_seizure_idx)
perm_rest_seizure_idx = np.random.permutation(rest_seizure_idx)
train_seizure_idx = perm_rest_seizure_idx
cv_seizure_idx = perm_rest_seizure_idx
# Leave-one-out cross-validation - non-seizure
n_train_segments = int(n_segments * 0.6)
n_cv_segments = int(n_segments * 0.2)
non_seizure_segment_idx = np.arange(n_segments)
perm_non_seizure_segment_idx = np.random.permutation(non_seizure_segment_idx)
train_sample_segments = perm_non_seizure_segment_idx[:n_train_segments]
cv_sample_segments = perm_non_seizure_segment_idx[n_train_segments:n_train_segments+n_cv_segments]
test_sample_segments = perm_non_seizure_segment_idx[n_train_segments+n_cv_segments:]
train_sample_idx = np.empty(0, dtype=int)
for s in train_sample_segments:
train_sample_idx = np.append(train_sample_idx, segment_idx[s])
cv_sample_idx = np.empty(0, dtype=int)
for s in cv_sample_segments:
cv_sample_idx = np.append(cv_sample_idx, segment_idx[s])
test_sample_idx = np.empty(0, dtype=int)
for s in test_sample_segments:
test_sample_idx = np.append(test_sample_idx, segment_idx[s])
print 'Segment index for train, cv and test sets:', \
train_sample_segments, cv_sample_segments, test_sample_segments
print 'Seizure index for train, cv and test sets:', \
train_seizure_idx, cv_seizure_idx, [test_seizure_idx]
if which_set == 'train':
print("Loading training data...")
data = raw_data[:,non_seizure_round_sample_idx[train_sample_idx]]
labels = raw_labels[non_seizure_round_sample_idx[train_sample_idx]]
select_seizure = train_seizure_idx
elif which_set == 'valid':
print("Loading validation data...")
data = raw_data[:,non_seizure_round_sample_idx[cv_sample_idx]]
labels = raw_labels[non_seizure_round_sample_idx[cv_sample_idx]]
select_seizure = cv_seizure_idx
elif which_set == 'test':
print("Loading test data...")
data = raw_data[:,non_seizure_round_sample_idx[test_sample_idx]]
labels = raw_labels[non_seizure_round_sample_idx[test_sample_idx]]
select_seizure = [test_seizure_idx]
elif which_set == 'all':
print("Loading all data...")
data = raw_data
labels = raw_labels
select_seizure = []
else:
raise('Invalid set.')
# Add seizure data
for sz in select_seizure:
data = np.concatenate((data, raw_data[:, seizure_round_sample_idx[sz]]), axis=1)
labels = np.concatenate((labels, raw_labels[seizure_round_sample_idx[sz]]), axis=1)
# No filtering
# Preprocessing
if which_set == 'train':
scaler = preprocessing.StandardScaler()
scaler = scaler.fit(data.transpose())
with open(scaler_path, 'w') as f:
pickle.dump(scaler, f)
data = scaler.transform(data.transpose()).transpose()
else:
with open(scaler_path) as f:
scaler = pickle.load(f)
data = scaler.transform(data.transpose()).transpose()
# Input transformation
X = np.reshape(data, (-1, sample_size))
y = np.reshape(labels, (-1, sample_size))
y = np.sum(y, 1).transpose()
y[y > 0] = 1
print 'Seizure index after transform:', np.where(y)[0]
self.seizure_seconds = np.where(y)[0]
# Duplicate the labels for all channels
y = np.tile(y, n_channels)
# Format the target into proper format
n_classes = 2
one_hot_formatter = OneHotFormatter(max_labels=n_classes)
y = one_hot_formatter.format(y)
# Check batch size
cut_off = X.shape[0] % batch_size
if cut_off > 0:
X = X[:-cut_off,:]
y = y[:-cut_off,:]
return X, y, n_channels, sample_size
示例15: __init__
# 需要导入模块: from pylearn2.format.target_format import OneHotFormatter [as 别名]
# 或者: from pylearn2.format.target_format.OneHotFormatter import format [as 别名]
def __init__(self, which_set, onehot_dtype='uint8',
center=False, rescale=False, gcn=None,
start=None, stop=None, axes=('b', 0, 1, 'c'),
toronto_prepro=False, preprocessor=None):
"""Modified version of the CIFAR10 constructor which creates Y
as one-hot vectors rather than simple indexes. This is super
hacky. Sorry, Guido.."""
# note: there is no such thing as the cifar10 validation set;
# pylearn1 defined one but really it should be user-configurable
# (as it is here)
self.axes = axes
# we define here:
dtype = 'uint8'
ntrain = 50000
nvalid = 0 # artefact, we won't use it
ntest = 10000
# we also expose the following details:
self.img_shape = (3, 32, 32)
self.img_size = numpy.prod(self.img_shape)
self.n_classes = 10
self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
# prepare loading
fnames = ['data_batch_%i' % i for i in range(1, 6)]
datasets = {}
datapath = os.path.join(
string_utils.preprocess('${PYLEARN2_DATA_PATH}'),
'cifar10', 'cifar-10-batches-py')
for name in fnames + ['test_batch']:
fname = os.path.join(datapath, name)
if not os.path.exists(fname):
raise IOError(fname + " was not found. You probably need to "
"download the CIFAR-10 dataset by using the "
"download script in "
"pylearn2/scripts/datasets/download_cifar10.sh "
"or manually from "
"http://www.cs.utoronto.ca/~kriz/cifar.html")
datasets[name] = cache.datasetCache.cache_file(fname)
lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000
x = numpy.zeros((lenx, self.img_size), dtype=dtype)
y = numpy.zeros((lenx, 1), dtype=dtype)
# load train data
nloaded = 0
for i, fname in enumerate(fnames):
_logger.info('loading file %s' % datasets[fname])
data = serial.load(datasets[fname])
x[i * 10000:(i + 1) * 10000, :] = data['data']
y[i * 10000:(i + 1) * 10000, 0] = data['labels']
nloaded += 10000
if nloaded >= ntrain + nvalid + ntest:
break
# load test data
_logger.info('loading file %s' % datasets['test_batch'])
data = serial.load(datasets['test_batch'])
# process this data
Xs = {'train': x[0:ntrain],
'test': data['data'][0:ntest]}
Ys = {'train': y[0:ntrain],
'test': data['labels'][0:ntest]}
X = numpy.cast['float32'](Xs[which_set])
y = Ys[which_set]
if isinstance(y, list):
y = numpy.asarray(y).astype(dtype)
if which_set == 'test':
assert y.shape[0] == 10000
y = y.reshape((y.shape[0], 1))
formatter = OneHotFormatter(self.n_classes, dtype=onehot_dtype)
y = formatter.format(y, mode='concatenate')
if center:
X -= 127.5
self.center = center
if rescale:
X /= 127.5
self.rescale = rescale
if toronto_prepro:
assert not center
assert not gcn
X = X / 255.
if which_set == 'test':
other = CIFAR10(which_set='train')
oX = other.X
oX /= 255.
X = X - oX.mean(axis=0)
else:
#.........这里部分代码省略.........