本文整理汇总了Python中sklearn.model_selection.GroupKFold方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.GroupKFold方法的具体用法?Python model_selection.GroupKFold怎么用?Python model_selection.GroupKFold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.GroupKFold方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_custom_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_custom_cv(self):
x = np.array([
['a', 'b', 'c'],
['a', 'b', 'c'],
['a', 'b', 'c'],
['a', 'b', 'c'],
['b', 'b', 'c'],
['b', 'b', 'c'],
['b', 'b', 'b'],
['b', 'b', 'b'],
['b', 'b', 'b'],
['b', 'b', 'b'],
['a', 'b', 'a'],
['a', 'b', 'a'],
])
groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
y = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
gkfold = GroupKFold(n_splits=3)
wrapper = NestedCVWrapper(encoders.TargetEncoder(), cv=gkfold)
result_train, result_valid = wrapper.fit_transform(x, y, X_test=x, groups=groups)
# We would expect result_train != result_valid since result_train has been generated using nested
# folds and result_valid is generated by fitting the encoder on all of the x & y daya
self.assertFalse(np.allclose(result_train, result_valid))
示例2: test_rfe_cv_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_rfe_cv_groups():
generator = check_random_state(0)
iris = load_iris()
number_groups = 4
groups = np.floor(np.linspace(0, number_groups, len(iris.target)))
X = iris.data
y = (iris.target > 0).astype(int)
est_groups = RFECV(
estimator=RandomForestClassifier(random_state=generator),
step=1,
scoring='accuracy',
cv=GroupKFold(n_splits=2)
)
est_groups.fit(X, y, groups=groups)
assert est_groups.n_features_ > 0
示例3: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例4: test_cross_val_score_predict_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_cross_val_score_predict_groups():
# Check if ValueError (when groups is None) propagates to cross_val_score
# and cross_val_predict
# And also check if groups is correctly passed to the cv object
X, y = make_classification(n_samples=20, n_classes=2, random_state=0)
clf = SVC(kernel="linear")
group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
GroupShuffleSplit()]
for cv in group_cvs:
assert_raise_message(ValueError,
"The 'groups' parameter should not be None.",
cross_val_score, estimator=clf, X=X, y=y, cv=cv)
assert_raise_message(ValueError,
"The 'groups' parameter should not be None.",
cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
示例5: test_grid_search_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_grid_search_groups():
# Check if ValueError (when groups is None) propagates to GridSearchCV
# And also check if groups is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
groups = rng.randint(0, 3, 15)
clf = LinearSVC(random_state=0)
grid = {'C': [1]}
group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
GroupShuffleSplit()]
for cv in group_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
assert_raise_message(ValueError,
"The 'groups' parameter should not be None.",
gs.fit, X, y)
gs.fit(X, y, groups=groups)
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
for cv in non_group_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
示例6: test_fit_GroupKFold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_fit_GroupKFold():
"""Assert that TPOT properly handles the group parameter when using GroupKFold."""
# This check tests if the darker digits images would generalize to the lighter ones.
means = np.mean(training_features, axis=1)
groups = means >= np.median(means)
tpot_obj = TPOTClassifier(
random_state=42,
population_size=2,
offspring_size=4,
generations=1,
verbosity=0,
config_dict='TPOT light',
cv=model_selection.GroupKFold(n_splits=2),
)
tpot_obj.fit(training_features, training_target, groups=groups)
assert_greater_equal(tpot_obj.score(testing_features, testing_target), 0.97)
示例7: __init__
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def __init__(self, classes, hyperparams=None, groups=None):
self._classes = classes
# Initialize params.
self._params = {}
self._model = None
'''
Used by GroupKFold for splitting train/validation.
'''
self._groups = groups
# Initialize hyperparams.
self._hyperparams = {} if hyperparams is None else hyperparams
self._hyperparam_search_space = {}
# Set algorithm.
self._get_or_set_hyperparam('algorithm')
# Set random state.
self._get_or_set_hyperparam('random_state')
# Set CV strategy.
self._get_or_set_hyperparam('hyperparam_strategy')
示例8: test_sklearn_cv_with_groups
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_sklearn_cv_with_groups(tmp_dir):
tuner = sklearn_tuner.Sklearn(
oracle=kt.oracles.BayesianOptimization(
objective=kt.Objective('score', 'max'),
max_trials=10),
hypermodel=build_model,
cv=model_selection.GroupKFold(5),
directory=tmp_dir)
x = np.random.uniform(size=(50, 10))
y = np.random.randint(0, 2, size=(50,))
groups = np.random.randint(0, 5, size=(50,))
tuner.search(x, y, groups=groups)
assert len(tuner.oracle.trials) == 10
best_trial = tuner.oracle.get_best_trials()[0]
assert best_trial.status == 'COMPLETED'
assert best_trial.score is not None
assert best_trial.best_step == 0
assert best_trial.metrics.exists('score')
# Make sure best model can be reloaded.
best_model = tuner.get_best_models()[0]
best_model.score(x, y)
示例9: test_experiment_manual_cv_group
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_experiment_manual_cv_group(tmpdir_name):
df1 = pd.DataFrame()
df1['x'] = np.random.randint(0, 10, size=1000)
df1['y'] = df1['x'] > 5
df1['grp'] = 0
df2 = pd.DataFrame()
df2['x'] = np.random.randint(0, 10, size=100)
df2['y'] = df2['x'] <= 5
df2['grp'] = 1
X = pd.concat([df1, df2]).reset_index(drop=True)
y = X['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
grp = X_train['grp']
X_train = X_train.drop(['y', 'grp'], axis=1)
X_test = X_test.drop(['y', 'grp'], axis=1)
params = {
'objective': 'binary',
'max_depth': 8
}
result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, cv=GroupKFold(2), groups=grp)
assert result.metrics[-1] < 0.7
示例10: test_nested_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_nested_cv():
# Test if nested cross validation works with different combinations of cv
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
groups = rng.randint(0, 5, 15)
cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_splits=3, random_state=0)]
for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
cv=inner_cv, error_score='raise', iid=False)
cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
fit_params={'groups': groups})
示例11: test_nsplit_default_warn
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_nsplit_default_warn():
# Test that warnings are raised. Will be removed in 0.22
assert_warns_message(FutureWarning, NSPLIT_WARNING, KFold)
assert_warns_message(FutureWarning, NSPLIT_WARNING, GroupKFold)
assert_warns_message(FutureWarning, NSPLIT_WARNING, StratifiedKFold)
assert_warns_message(FutureWarning, NSPLIT_WARNING, TimeSeriesSplit)
assert_no_warnings(KFold, n_splits=5)
assert_no_warnings(GroupKFold, n_splits=5)
assert_no_warnings(StratifiedKFold, n_splits=5)
assert_no_warnings(TimeSeriesSplit, n_splits=5)
示例12: test_objectmapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.model_selection.KFold, ms.KFold)
self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)
self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.model_selection.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.model_selection.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)
# Model validation
示例13: test_objectmapper_abbr
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_objectmapper_abbr(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.ms.KFold, ms.KFold)
self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.ms.LeavePOut, ms.LeavePOut)
self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.ms.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.ms.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)
# Model validation
示例14: test_nested_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_nested_cv():
# Test if nested cross validation works with different combinations of cv
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
groups = rng.randint(0, 5, 15)
cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_splits=3, random_state=0)]
for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
cv=inner_cv)
cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
fit_params={'groups': groups})
示例15: test_learning_curve_with_shuffle
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_learning_curve_with_shuffle():
# Following test case was designed this way to verify the code
# changes made in pull request: #7506.
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
[17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
[15, 16], [17, 18]])
y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
# Splits on these groups fail without shuffle as the first iteration
# of the learning curve doesn't contain label 4 in the training set.
estimator = PassiveAggressiveClassifier(max_iter=5, tol=None,
shuffle=False)
cv = GroupKFold(n_splits=2)
train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
groups=groups, shuffle=True, random_state=2)
assert_array_almost_equal(train_scores_batch.mean(axis=1),
np.array([0.75, 0.3, 0.36111111]))
assert_array_almost_equal(test_scores_batch.mean(axis=1),
np.array([0.36111111, 0.25, 0.25]))
assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1,
train_sizes=np.linspace(0.3, 1.0, 3), groups=groups)
train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
groups=groups, shuffle=True, random_state=2,
exploit_incremental_learning=True)
assert_array_almost_equal(train_scores_inc.mean(axis=1),
train_scores_batch.mean(axis=1))
assert_array_almost_equal(test_scores_inc.mean(axis=1),
test_scores_batch.mean(axis=1))