Python model_selection.GroupKFold方法代码示例

本文整理汇总了Python中sklearn.model_selection.GroupKFold方法的典型用法代码示例。如果您正苦于以下问题：Python model_selection.GroupKFold方法的具体用法？Python model_selection.GroupKFold怎么用？Python model_selection.GroupKFold使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection的用法示例。

在下文中一共展示了model_selection.GroupKFold方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_custom_cv

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_custom_cv(self):
        x = np.array([
            ['a', 'b', 'c'],
            ['a', 'b', 'c'],
            ['a', 'b', 'c'],
            ['a', 'b', 'c'],
            ['b', 'b', 'c'],
            ['b', 'b', 'c'],
            ['b', 'b', 'b'],
            ['b', 'b', 'b'],
            ['b', 'b', 'b'],
            ['b', 'b', 'b'],
            ['a', 'b', 'a'],
            ['a', 'b', 'a'],
        ])
        groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
        y = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
        gkfold = GroupKFold(n_splits=3)
        wrapper = NestedCVWrapper(encoders.TargetEncoder(), cv=gkfold)
        result_train, result_valid = wrapper.fit_transform(x, y, X_test=x, groups=groups)

        # We would expect result_train != result_valid since result_train has been generated using nested
        # folds and result_valid is generated by fitting the encoder on all of the x & y daya
        self.assertFalse(np.allclose(result_train, result_valid))

开发者ID:scikit-learn-contrib，项目名称:category_encoders，代码行数:26，代码来源:test_wrapper.py

示例2: test_rfe_cv_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_rfe_cv_groups():
    generator = check_random_state(0)
    iris = load_iris()
    number_groups = 4
    groups = np.floor(np.linspace(0, number_groups, len(iris.target)))
    X = iris.data
    y = (iris.target > 0).astype(int)

    est_groups = RFECV(
        estimator=RandomForestClassifier(random_state=generator),
        step=1,
        scoring='accuracy',
        cv=GroupKFold(n_splits=2)
    )
    est_groups.fit(X, y, groups=groups)
    assert est_groups.n_features_ > 0

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:18，代码来源:test_rfe.py

示例3: test_2d_y

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:27，代码来源:test_split.py

示例4: test_cross_val_score_predict_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_cross_val_score_predict_groups():
    # Check if ValueError (when groups is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if groups is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:19，代码来源:test_validation.py

示例5: test_grid_search_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to GridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             gs.fit, X, y)
        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:27，代码来源:test_search.py

示例6: test_fit_GroupKFold

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_fit_GroupKFold():
    """Assert that TPOT properly handles the group parameter when using GroupKFold."""
    # This check tests if the darker digits images would generalize to the lighter ones.
    means = np.mean(training_features, axis=1)
    groups = means >= np.median(means)

    tpot_obj = TPOTClassifier(
        random_state=42,
        population_size=2,
        offspring_size=4,
        generations=1,
        verbosity=0,
        config_dict='TPOT light',
        cv=model_selection.GroupKFold(n_splits=2),
    )
    tpot_obj.fit(training_features, training_target, groups=groups)

    assert_greater_equal(tpot_obj.score(testing_features, testing_target), 0.97)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:20，代码来源:tpot_tests.py

示例7: init

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def __init__(self, classes, hyperparams=None, groups=None):
        self._classes = classes

        # Initialize params.
        self._params = {}
        self._model = None

        '''
        Used by GroupKFold for splitting train/validation. 
        '''
        self._groups = groups

        # Initialize hyperparams.
        self._hyperparams = {} if hyperparams is None else hyperparams
        self._hyperparam_search_space = {}
        # Set algorithm.
        self._get_or_set_hyperparam('algorithm')
        # Set random state.
        self._get_or_set_hyperparam('random_state')
        # Set CV strategy.
        self._get_or_set_hyperparam('hyperparam_strategy')

开发者ID:HealthRex，项目名称:CDSS，代码行数:23，代码来源:SupervisedClassifier.py

示例8: test_sklearn_cv_with_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_sklearn_cv_with_groups(tmp_dir):
    tuner = sklearn_tuner.Sklearn(
        oracle=kt.oracles.BayesianOptimization(
            objective=kt.Objective('score', 'max'),
            max_trials=10),
        hypermodel=build_model,
        cv=model_selection.GroupKFold(5),
        directory=tmp_dir)

    x = np.random.uniform(size=(50, 10))
    y = np.random.randint(0, 2, size=(50,))
    groups = np.random.randint(0, 5, size=(50,))
    tuner.search(x, y, groups=groups)

    assert len(tuner.oracle.trials) == 10

    best_trial = tuner.oracle.get_best_trials()[0]
    assert best_trial.status == 'COMPLETED'
    assert best_trial.score is not None
    assert best_trial.best_step == 0
    assert best_trial.metrics.exists('score')

    # Make sure best model can be reloaded.
    best_model = tuner.get_best_models()[0]
    best_model.score(x, y)

开发者ID:keras-team，项目名称:keras-tuner，代码行数:27，代码来源:sklearn_test.py

示例9: test_experiment_manual_cv_group

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_experiment_manual_cv_group(tmpdir_name):
    df1 = pd.DataFrame()
    df1['x'] = np.random.randint(0, 10, size=1000)
    df1['y'] = df1['x'] > 5
    df1['grp'] = 0

    df2 = pd.DataFrame()
    df2['x'] = np.random.randint(0, 10, size=100)
    df2['y'] = df2['x'] <= 5
    df2['grp'] = 1

    X = pd.concat([df1, df2]).reset_index(drop=True)
    y = X['y']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    grp = X_train['grp']
    X_train = X_train.drop(['y', 'grp'], axis=1)
    X_test = X_test.drop(['y', 'grp'], axis=1)

    params = {
        'objective': 'binary',
        'max_depth': 8
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, cv=GroupKFold(2), groups=grp)
    assert result.metrics[-1] < 0.7

开发者ID:nyanp，项目名称:nyaggle，代码行数:29，代码来源:test_run.py

示例10: test_nested_cv

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv, error_score='raise', iid=False)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups})

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:17，代码来源:test_split.py

示例11: test_nsplit_default_warn

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_nsplit_default_warn():
    # Test that warnings are raised. Will be removed in 0.22
    assert_warns_message(FutureWarning, NSPLIT_WARNING, KFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, GroupKFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, StratifiedKFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, TimeSeriesSplit)

    assert_no_warnings(KFold, n_splits=5)
    assert_no_warnings(GroupKFold, n_splits=5)
    assert_no_warnings(StratifiedKFold, n_splits=5)
    assert_no_warnings(TimeSeriesSplit, n_splits=5)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:13，代码来源:test_split.py

示例12: test_objectmapper

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation

开发者ID:pandas-ml，项目名称:pandas-ml，代码行数:32，代码来源:test_model_selection.py

示例13: test_objectmapper_abbr

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import GroupKFold [as 别名]
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation