本文整理汇总了Python中sklearn.model_selection._split._validate_shuffle_split方法的典型用法代码示例。如果您正苦于以下问题:Python _split._validate_shuffle_split方法的具体用法?Python _split._validate_shuffle_split怎么用?Python _split._validate_shuffle_split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection._split
的用法示例。
在下文中一共展示了_split._validate_shuffle_split方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_stratified_shuffle_split_iter
# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50),
np.concatenate([[i] * (100 + i) for i in range(11)]),
[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
]
for y in ys:
sss = StratifiedShuffleSplit(6, test_size=0.33,
random_state=0).split(np.ones(len(y)), y)
y = np.asanyarray(y) # To make it indexable for y[train]
# this is how test-size is computed internally
# in _validate_shuffle_split
test_size = np.ceil(0.33 * len(y))
train_size = len(y) - test_size
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(len(train) + len(test), y.size)
assert_equal(len(train), train_size)
assert_equal(len(test), test_size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
示例2: getDataLoaders
# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def getDataLoaders(self, batch_size, shuffle, device, *args):
kwargs = {'num_workers': 1, 'pin_memory': True} if device == "cuda" else {}
print('Load training data...')
dataset = SyntheticDataset(*self.data_size, *map(lambda x: float(x), args))
n_train, n_test = _validate_shuffle_split(len(dataset), test_size=None, train_size=0.7)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [n_train, n_test])
train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True, shuffle=shuffle, **kwargs)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, shuffle=False, **kwargs)
return train_loader, test_loader
示例3: _split_blockwise
# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def _split_blockwise(self, X, seeds):
chunks = X.chunks[0]
train_pct, test_pct = _maybe_normalize_split_sizes(
self.train_size, self.test_size
)
sizes = [_validate_shuffle_split(c, test_pct, train_pct) for c in chunks]
objs = [
dask.delayed(_generate_idx, nout=2)(chunksize, seed, n_train, n_test)
for chunksize, seed, (n_train, n_test) in zip(chunks, seeds, sizes)
]
train_objs, test_objs = zip(*objs)
offsets = np.hstack([0, np.cumsum(chunks)])
train_idx = da.concatenate(
[
da.from_delayed(x + offset, (train_size,), np.dtype("int"))
for x, chunksize, (train_size, _), offset in zip(
train_objs, chunks, sizes, offsets
)
]
)
test_idx = da.concatenate(
[
da.from_delayed(x + offset, (test_size,), np.dtype("int"))
for x, chunksize, (_, test_size), offset in zip(
test_objs, chunks, sizes, offsets
)
]
)
return train_idx, test_idx
示例4: test_stratified_shuffle_split_even
# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_even():
# Test the StratifiedShuffleSplit, indices are drawn with a
# equal chance
n_folds = 5
n_splits = 1000
def assert_counts_are_ok(idx_counts, p):
# Here we test that the distribution of the counts
# per index is close enough to a binomial
threshold = 0.05 / n_splits
bf = stats.binom(n_splits, p)
for count in idx_counts:
prob = bf.pmf(count)
assert prob > threshold, \
"An index is not drawn with chance corresponding to even draws"
for n_samples in (6, 22):
groups = np.array((n_samples // 2) * [0, 1])
splits = StratifiedShuffleSplit(n_splits=n_splits,
test_size=1. / n_folds,
random_state=0)
train_counts = [0] * n_samples
test_counts = [0] * n_samples
n_splits_actual = 0
for train, test in splits.split(X=np.ones(n_samples), y=groups):
n_splits_actual += 1
for counter, ids in [(train_counts, train), (test_counts, test)]:
for id in ids:
counter[id] += 1
assert_equal(n_splits_actual, n_splits)
n_train, n_test = _validate_shuffle_split(
n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))
assert_equal(len(train), n_train)
assert_equal(len(test), n_test)
assert_equal(len(set(train).intersection(test)), 0)
group_counts = np.unique(groups)
assert_equal(splits.test_size, 1.0 / n_folds)
assert_equal(n_train + n_test, len(groups))
assert_equal(len(group_counts), 2)
ex_test_p = float(n_test) / n_samples
ex_train_p = float(n_train) / n_samples
assert_counts_are_ok(train_counts, ex_train_p)
assert_counts_are_ok(test_counts, ex_test_p)
示例5: test_stratified_shuffle_split_even
# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_even():
# Test the StratifiedShuffleSplit, indices are drawn with a
# equal chance
n_folds = 5
n_splits = 1000
def assert_counts_are_ok(idx_counts, p):
# Here we test that the distribution of the counts
# per index is close enough to a binomial
threshold = 0.05 / n_splits
bf = stats.binom(n_splits, p)
for count in idx_counts:
prob = bf.pmf(count)
assert_true(prob > threshold,
"An index is not drawn with chance corresponding "
"to even draws")
for n_samples in (6, 22):
groups = np.array((n_samples // 2) * [0, 1])
splits = StratifiedShuffleSplit(n_splits=n_splits,
test_size=1. / n_folds,
random_state=0)
train_counts = [0] * n_samples
test_counts = [0] * n_samples
n_splits_actual = 0
for train, test in splits.split(X=np.ones(n_samples), y=groups):
n_splits_actual += 1
for counter, ids in [(train_counts, train), (test_counts, test)]:
for id in ids:
counter[id] += 1
assert_equal(n_splits_actual, n_splits)
n_train, n_test = _validate_shuffle_split(
n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))
assert_equal(len(train), n_train)
assert_equal(len(test), n_test)
assert_equal(len(set(train).intersection(test)), 0)
group_counts = np.unique(groups)
assert_equal(splits.test_size, 1.0 / n_folds)
assert_equal(n_train + n_test, len(groups))
assert_equal(len(group_counts), 2)
ex_test_p = float(n_test) / n_samples
ex_train_p = float(n_train) / n_samples
assert_counts_are_ok(train_counts, ex_train_p)
assert_counts_are_ok(test_counts, ex_test_p)