本文整理匯總了Python中utils.AttributeDict.valid方法的典型用法代碼示例。如果您正苦於以下問題:Python AttributeDict.valid方法的具體用法?Python AttributeDict.valid怎麽用?Python AttributeDict.valid使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類utils.AttributeDict
的用法示例。
在下文中一共展示了AttributeDict.valid方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_mnist_data_dict
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def get_mnist_data_dict(unlabeled_samples, valid_set_size, test_set=False):
train_set = MNIST(("train",))
# Make sure the MNIST data is in right format
train_set.data_sources = (
(train_set.data_sources[0] / 255.).astype(numpy.float32),
train_set.data_sources[1])
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
rng = numpy.random.RandomState(seed=1)
rng.shuffle(all_ind)
data = AttributeDict()
# Choose the training set
data.train = train_set
data.train_ind = all_ind[:unlabeled_samples]
# Then choose validation set from the remaining indices
data.valid = train_set
data.valid_ind = numpy.setdiff1d(all_ind, data.train_ind)[:valid_set_size]
logger.info('Using %d examples for validation' % len(data.valid_ind))
# Only touch test data if requested
if test_set:
data.test = MNIST(("test",))
data.test_ind = numpy.arange(data.test.num_examples)
return data
示例2: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
dataset_class, training_set_size = {"cifar10": (CIFAR10, 40000), "mnist": (MNIST, 50000)}[p.dataset]
# Allow overriding the default from command line
if p.get("unlabeled_samples") is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
if p.dataset == "mnist":
d = train_set.data_sources[train_set.sources.index("features")]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), "Make sure data is in float format and in range 0 to 1"
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get("dseed"):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[: p.valid_set_size]
logger.info("Using %d examples for validation" % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index("features")].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, "Need %d whitening dimensions, not %d" % (
numpy.product(in_dim),
p.whiten_zca,
)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index("features")]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info("Whitening using %d ZCA components" % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
示例3: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
'reddit': (SubredditTopPhotosFeatures22, 20000)
}[p.dataset]
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class(("train",))
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(("test",))
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
示例4: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
return in_dim, d
示例5: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
if p.dataset in ['cifar10','mnist']:
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
else:
from fuel.datasets import H5PYDataset
from fuel.utils import find_in_data_path
from functools import partial
fn=p.dataset
fn=os.path.join(fn, fn + '.hdf5')
def dataset_class(which_sets):
return H5PYDataset(file_or_path=find_in_data_path(fn),
which_sets=which_sets,
load_in_memory=True)
training_set_size = None
train_set = dataset_class(["train"])
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None and p.unlabeled_samples >= 0:
training_set_size = p.unlabeled_samples
elif training_set_size is None:
training_set_size = train_set.num_examples
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(["test"])
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
示例6: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
dataset_class = {
'cifar10': (CIFAR10),
'jos' : (JOS),
'mnist': (MNIST),
}[p.dataset]
training_set_size = p.unlabeled_samples
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class(["train"])
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(["test"])
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
fn = find_in_data_path(train_set.filename)
#iprint(fn)
s1 = H5PYDataset(fn, ("train",))
handle = s1.open()
in_dim = s1.get_data(handle,slice(0,1))[0].shape[1:]
s1.close(handle)
#in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=list(i))[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
示例7: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, test_set=False):
# CIFAR10與MNIST都是封裝過後的HDF5數據集
# p.dataset為命令行傳入的參數,在cifar10與mnist之間選擇其一
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
# 可以通過命令行指定為標注樣本的大小
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
# 選出mnist數據集裏麵的train子集
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
# 對minst進行數據檢查,查看是否所有值都在0-1之間且都為float
if p.dataset == 'mnist':
# features大小為60000*1*28*28,num_examples*channel*height*weight,minst為灰度圖片所以channel=1
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# 隨機打亂樣本順序
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
# 通過dseed製作一個隨機器,用於打亂樣本編號
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
# 此時index應該都被打亂
# 取出前training_set_size個數的樣本做為訓練集(的index)
d.train_ind = all_ind[:training_set_size]
# 選出一部分數據作為驗證集
# Then choose validation set from the remaining indices
d.valid = train_set
# 全部的數據集中去掉訓練用的樣本,剩下的作為驗證集
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# 如果有測試數據的話,生成測試數據的index
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
# 計算特征值的維度,shape[1:]:獲取第一個樣本的維度
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
# 歸一化參數如果不為空,創建歸一化類
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
# 檢查data集合中的item是否符合float32類型
data = numpy.require(data, dtype=numpy.float32)
# TODO ContrastNorm.apply
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
# TODO ZCA
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
示例8: setup_data
# 需要導入模塊: from utils import AttributeDict [as 別名]
# 或者: from utils.AttributeDict import valid [as 別名]
def setup_data(p, use_unlabeled=True, use_labeled=True):
assert use_unlabeled or use_labeled, 'Cannot train without cost'
dataset_class = DATASETS[p.dataset]
dataset = dataset_class(p)
train_ind = dataset.trn.ind
if 'labeled_samples' not in p or p.labeled_samples == 0:
n_labeled = len(train_ind)
else:
n_labeled = p.labeled_samples
if 'unlabeled_samples' not in p:
n_unlabeled = len(train_ind)
else:
n_unlabeled = p.unlabeled_samples
assert p.batch_size <= n_labeled, "batch size too large"
assert len(train_ind) >= n_labeled
assert len(train_ind) >= n_unlabeled, "not enough training samples"
assert n_labeled <= n_unlabeled, \
"at least as many unlabeled samples as number of labeled samples"
# If not using all labels, let's balance classes
balance_classes = n_labeled < len(train_ind)
if balance_classes and use_labeled:
# Ensure each label is equally represented
y = dataset.get_train_labels()
n_classes = numpy.max(y) + 1
n_from_each_class = n_labeled / n_classes
logger.info('n_sample_from_each_class {0}'.format(n_from_each_class))
assert n_labeled % n_classes == 0
i_labeled = []
for c in xrange(n_classes):
i = (train_ind[y[:, 0] == c])[:n_from_each_class]
if len(i) < n_from_each_class:
logger.warning('Class {0} : only got {1}'.format(c, len(i)))
i_labeled += list(i)
else:
i_labeled = train_ind[:n_labeled]
def make_unlabeled_set(train_ind, i_labeled, n_unlabeled):
""" i_unused_labeled: the labels that are not used in i_labeled.
n_unlabeled_needed: the number of need for i_unlabeled beyond len(i_labeled)
"""
i_unused_labeled = list(set(train_ind) - set(i_labeled))
n_unlabeled_needed = n_unlabeled - len(i_labeled)
i_unlabeled = i_unused_labeled[:n_unlabeled_needed]
i_unlabeled.extend(i_labeled)
return i_unlabeled
i_unlabeled = make_unlabeled_set(train_ind, i_labeled, n_unlabeled)
logger.info('Creating data set with %d labeled and %d total samples' %
(len(i_labeled), len(i_unlabeled)))
streams = AttributeDict()
def make(kind, ind_labeled, ind_unlabeled):
ds_labeled, ds_unlabeled = None, None
if use_labeled:
ds_labeled = dataset.get_datastream(kind, ind_labeled)
if use_unlabeled:
ds_unlabeled = dataset.get_datastream(kind, ind_unlabeled)
return combine_datastreams(ds_labeled, ds_unlabeled)
streams.train = make('trn', i_labeled, i_unlabeled)
streams.valid = make('val', None, None) # use all indices
streams.test = make('tst', None, None) # use all indices
return streams