本文整理汇总了Python中sklearn.model_selection.GroupShuffleSplit方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.GroupShuffleSplit方法的具体用法?Python model_selection.GroupShuffleSplit怎么用?Python model_selection.GroupShuffleSplit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.GroupShuffleSplit方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例2: test_group_shuffle_split_default_test_size
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_group_shuffle_split_default_test_size(train_size, exp_train,
exp_test):
# Check that the default value has the expected behavior, i.e. 0.2 if both
# unspecified or complement train_size unless both are specified.
X = np.ones(10)
y = np.ones(10)
groups = range(10)
X_train, X_test = next(GroupShuffleSplit(train_size=train_size)
.split(X, y, groups))
assert len(X_train) == exp_train
assert len(X_test) == exp_test
示例3: test_group_shuffle_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_group_shuffle_split():
for groups_i in test_groups:
X = y = np.ones(len(groups_i))
n_splits = 6
test_size = 1. / 3
slo = GroupShuffleSplit(n_splits, test_size=test_size, random_state=0)
# Make sure the repr works
repr(slo)
# Test that the length is correct
assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits)
l_unique = np.unique(groups_i)
l = np.asarray(groups_i)
for train, test in slo.split(X, y, groups=groups_i):
# First test: no train group is in the test set and vice versa
l_train_unique = np.unique(l[train])
l_test_unique = np.unique(l[test])
assert not np.any(np.in1d(l[train], l_test_unique))
assert not np.any(np.in1d(l[test], l_train_unique))
# Second test: train and test add up to all the data
assert_equal(l[train].size + l[test].size, l.size)
# Third test: train and test are disjoint
assert_array_equal(np.intersect1d(train, test), [])
# Fourth test:
# unique train and test groups are correct, +- 1 for rounding error
assert abs(len(l_test_unique) -
round(test_size * len(l_unique))) <= 1
assert abs(len(l_train_unique) -
round((1.0 - test_size) * len(l_unique))) <= 1
示例4: train_test_split_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def train_test_split_groups(X, *, val_size, groups=None, **kwargs):
split_class = (ShuffleSplit if groups is None else GroupShuffleSplit)
split = split_class(test_size=val_size, **kwargs)
train, val = next(split.split(X=X, groups=groups))
return X[train], X[val]
示例5: train_test_split_with_empty_fraction_with_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def train_test_split_with_empty_fraction_with_groups(df,
groups,
empty_fraction,
test_size,
shuffle=True, random_state=1234):
cv = GroupShuffleSplit(n_splits=2, test_size=test_size, random_state=random_state)
for train_inds, test_inds in cv.split(df.values, groups=groups.values):
train, test = df.iloc[train_inds], df.iloc[test_inds]
break
empty_train, empty_test = train[train['is_not_empty'] == 0], test[test['is_not_empty'] == 0]
non_empty_train, non_empty_test = train[train['is_not_empty'] == 1], test[test['is_not_empty'] == 1]
test_empty_size = int(test_size * empty_fraction)
test_non_empty_size = int(test_size * (1.0 - empty_fraction))
empty_test = empty_test.sample(test_empty_size, random_state=random_state)
non_empty_test = non_empty_test.sample(test_non_empty_size, random_state=random_state)
train = pd.concat([empty_train, non_empty_train], axis=0).sample(frac=1, random_state=random_state)
test = pd.concat([empty_test, non_empty_test], axis=0)
if shuffle:
train = train.sample(frac=1, random_state=random_state)
test = test.sample(frac=1, random_state=random_state)
return train, test
示例6: test_objectmapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.model_selection.KFold, ms.KFold)
self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)
self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.model_selection.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.model_selection.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)
# Model validation
示例7: test_objectmapper_abbr
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_objectmapper_abbr(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.ms.KFold, ms.KFold)
self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.ms.LeavePOut, ms.LeavePOut)
self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.ms.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.ms.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)
# Model validation
示例8: test_group_shuffle_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_group_shuffle_split():
for groups_i in test_groups:
X = y = np.ones(len(groups_i))
n_splits = 6
test_size = 1. / 3
slo = GroupShuffleSplit(n_splits, test_size=test_size, random_state=0)
# Make sure the repr works
repr(slo)
# Test that the length is correct
assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits)
l_unique = np.unique(groups_i)
l = np.asarray(groups_i)
for train, test in slo.split(X, y, groups=groups_i):
# First test: no train group is in the test set and vice versa
l_train_unique = np.unique(l[train])
l_test_unique = np.unique(l[test])
assert_false(np.any(np.in1d(l[train], l_test_unique)))
assert_false(np.any(np.in1d(l[test], l_train_unique)))
# Second test: train and test add up to all the data
assert_equal(l[train].size + l[test].size, l.size)
# Third test: train and test are disjoint
assert_array_equal(np.intersect1d(train, test), [])
# Fourth test:
# unique train and test groups are correct, +- 1 for rounding error
assert_true(abs(len(l_test_unique) -
round(test_size * len(l_unique))) <= 1)
assert_true(abs(len(l_train_unique) -
round((1.0 - test_size) * len(l_unique))) <= 1)
示例9: test_train_test_default_warning
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def test_train_test_default_warning():
assert_warns(FutureWarning, ShuffleSplit, train_size=0.75)
assert_warns(FutureWarning, GroupShuffleSplit, train_size=0.75)
assert_warns(FutureWarning, StratifiedShuffleSplit, train_size=0.75)
assert_warns(FutureWarning, train_test_split, range(3),
train_size=0.75)
示例10: temp
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def temp(samples):
from sklearn import model_selection
from ibeis.algo.verif import sklearn_utils
def check_balance(idxs):
# from sklearn.utils.fixes import bincount
print('-------')
for count, (test, train) in enumerate(idxs):
print('split %r' % (count))
groups_train = set(groups.take(train))
groups_test = set(groups.take(test))
n_group_isect = len(groups_train.intersection(groups_test))
y_train_freq = bincount(y.take(train))
y_test_freq = bincount(y.take(test))
y_test_ratio = y_test_freq / y_test_freq.sum()
y_train_ratio = y_train_freq / y_train_freq.sum()
balance_error = np.sum((y_test_ratio - y_train_ratio) ** 2)
print('n_group_isect = %r' % (n_group_isect,))
print('y_test_ratio = %r' % (y_test_ratio,))
print('y_train_ratio = %r' % (y_train_ratio,))
print('balance_error = %r' % (balance_error,))
X = np.empty((len(samples), 0))
y = samples.encoded_1d().values
groups = samples.group_ids
n_splits = 3
splitter = model_selection.GroupShuffleSplit(n_splits=n_splits)
idxs = list(splitter.split(X=X, y=y, groups=groups))
check_balance(idxs)
splitter = model_selection.GroupKFold(n_splits=n_splits)
idxs = list(splitter.split(X=X, y=y, groups=groups))
check_balance(idxs)
splitter = model_selection.StratifiedKFold(n_splits=n_splits)
idxs = list(splitter.split(X=X, y=y, groups=groups))
check_balance(idxs)
splitter = sklearn_utils.StratifiedGroupKFold(n_splits=n_splits)
idxs = list(splitter.split(X=X, y=y, groups=groups))
check_balance(idxs)
示例11: artist_conditional_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupShuffleSplit [as 别名]
def artist_conditional_split(trackid_list=None, test_size=0.15, num_splits=5,
random_state=None, artist_index=None):
"""Create artist-conditional train-test splits.
The same artist (as defined by the artist_index) cannot appear
in both the training and testing set.
Parameters
----------
trackid_list : list or None, default=None
List of trackids to use in train-test split. If None, uses all tracks
test_size : float, default=0.15
Fraction of tracks to use in test set. The test set will be as close
as possible in size to this value, but it may not be exact due to the
artist-conditional constraint.
num_splits : int, default=5
Number of random splits to create
random_state : int or None, default=None
A random state to optionally reproduce the same random split.
artist_index : dict or None, default=None
Dictionary mapping each track id in trackid_list to a string that
uniquely identifies each artist.
If None, uses the predefined index ARTIST_INDEX.
Returns
-------
splits : list of dicts
List of length num_splits of train/test split dictionaries. Each
dictionary has the keys 'train' and 'test', each which map to lists of
trackids.
"""
if trackid_list is None:
trackid_list = TRACK_LIST_V1
if artist_index is None:
artist_index = ARTIST_INDEX
artists = np.asarray([ARTIST_INDEX[trackid] for trackid in trackid_list])
splitter = GroupShuffleSplit(n_splits=num_splits,
random_state=random_state,
test_size=test_size)
trackid_array = np.array(trackid_list)
splits = []
for train, test in splitter.split(trackid_array, groups=artists):
splits.append({
'train': list(trackid_array[train]),
'test': list(trackid_array[test])
})
return splits