当前位置: 首页>>代码示例>>Python>>正文

Python model_selection.StratifiedShuffleSplit方法代码示例

本文整理汇总了Python中sklearn.model_selection.StratifiedShuffleSplit方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.StratifiedShuffleSplit方法的具体用法?Python model_selection.StratifiedShuffleSplit怎么用?Python model_selection.StratifiedShuffleSplit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection的用法示例。


示例1: get_full_rbf_svm_clf

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def get_full_rbf_svm_clf(train_x, train_y, c_range=None, gamma_range=None):
		param_grid = dict(gamma=gamma_range, C=c_range)
		cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42)
		grid = GridSearchCV(SVC(cache_size=1024), param_grid=param_grid, cv=cv, n_jobs=14, verbose=10)
		grid.fit(train_x, train_y)
		print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_))
		scores = grid.cv_results_['mean_test_score'].reshape(len(c_range), len(gamma_range))
		print("c_range:", c_range)
		print("gamma_range:", gamma_range)

		c_best = grid.best_params_['C']
		gamma_best = grid.best_params_['gamma']

		clf = SVC(C=c_best, gamma=gamma_best, verbose=True)
		return clf


示例2: test_2d_y

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
            assert msg in str(e) 

示例3: test_stratified_shuffle_split_init

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratified_shuffle_split_init():
    X = np.arange(7)
    y = np.asarray([0, 1, 1, 1, 2, 2, 2])
    # Check that error is raised if there is a class with only one sample
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 0.2).split(X, y))

    # Check that error is raised if the test set size is smaller than n_classes
    assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y))
    # Check that error is raised if the train set size is smaller than
    # n_classes
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 3, 2).split(X, y))

    X = np.arange(9)
    y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2])

    # Train size or test size too small
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(train_size=2).split(X, y))
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(test_size=2).split(X, y)) 

示例4: test_stratified_shuffle_split_multilabel

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratified_shuffle_split_multilabel():
    # fix for issue 9037
    for y in [np.array([[0, 1], [1, 0], [1, 0], [0, 1]]),
              np.array([[0, 1], [1, 1], [1, 1], [0, 1]])]:
        X = np.ones_like(y)
        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
        train, test = next(sss.split(X=X, y=y))
        y_train = y[train]
        y_test = y[test]

        # no overlap
        assert_array_equal(np.intersect1d(train, test), [])

        # complete partition
        assert_array_equal(np.union1d(train, test), np.arange(len(y)))

        # correct stratification of entire rows
        # (by design, here y[:, 0] uniquely determines the entire row of y)
        expected_ratio = np.mean(y[:, 0])
        assert_equal(expected_ratio, np.mean(y_train[:, 0]))
        assert_equal(expected_ratio, np.mean(y_test[:, 0])) 

示例5: test_stratified_shuffle_split_multilabel_many_labels

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratified_shuffle_split_multilabel_many_labels():
    # fix in PR #9922: for multilabel data with > 1000 labels, str(row)
    # truncates with an ellipsis for elements in positions 4 through
    # len(row) - 4, so labels were not being correctly split using the powerset
    # method for transforming a multilabel problem to a multiclass one; this
    # test checks that this problem is fixed.
    row_with_many_zeros = [1, 0, 1] + [0] * 1000 + [1, 0, 1]
    row_with_many_ones = [1, 0, 1] + [1] * 1000 + [1, 0, 1]
    y = np.array([row_with_many_zeros] * 10 + [row_with_many_ones] * 100)
    X = np.ones_like(y)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
    train, test = next(sss.split(X=X, y=y))
    y_train = y[train]
    y_test = y[test]

    # correct stratification of entire rows
    # (by design, here y[:, 4] uniquely determines the entire row of y)
    expected_ratio = np.mean(y[:, 4])
    assert_equal(expected_ratio, np.mean(y_train[:, 4]))
    assert_equal(expected_ratio, np.mean(y_test[:, 4])) 

示例6: test_grid_search_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to GridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
    for cv in group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
                             "The 'groups' parameter should not be None.",
                             gs.fit, X, y)
        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y) 

示例7: divide_train_val

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def divide_train_val(zeroes, ones, val_size):
    """ sss = StratifiedShuffleSplit(n_splits=1,
    indices_0 = sss.split(np.zeros(len(zeroes)), zeroes)
    indices_1 = sss.split(np.zeros(len(ones)), ones)
    train_indices_0, val_indices_0 = indices_0.next()
    train_indices_1, val_indices_1 = indices_1.next() """

    rand0 = np.random.permutation(len(zeroes))
    train_indices_0 = zeroes[rand0[val_size//2:]]
    val_indices_0 = zeroes[rand0[:val_size//2]]
    rand1 = np.random.permutation(len(ones))
    train_indices_1 = ones[rand1[val_size//2:]]
    val_indices_1 = ones[rand1[:val_size//2]]

    return (train_indices_0, train_indices_1,
            val_indices_0, val_indices_1) 

示例8: subset_indices

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def subset_indices(d_source, d_target, subsetsize, subsetseed):
    if subsetsize > 0:
        if subsetseed != 0:
            subset_rng = np.random.RandomState(subsetseed)
            subset_rng = np.random
        strat = StratifiedShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        shuf = ShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
        _, source_indices = next(strat.split(d_source.y, d_source.y))
        n_src = source_indices.shape[0]
        if d_target.has_ground_truth:
            _, target_indices = next(strat.split(d_target.y, d_target.y))
            _, target_indices = next(shuf.split(np.arange(len(d_target.images))))
        n_tgt = target_indices.shape[0]
        source_indices = None
        target_indices = None
        n_src = len(d_source.images)
        n_tgt = len(d_target.images)

    return source_indices, target_indices, n_src, n_tgt 

示例9: gen_samples

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def gen_samples(self, y, n_samples, problem_type):
		if problem_type == 'classification':
			splits = StratifiedShuffleSplit(

			split_ = splits.split(np.zeros((y.size, 1)), y)
			splits = ShuffleSplit(

			split_ = splits.split(np.zeros((y.size, 1)))

		for train, cal in split_:
			yield train, cal

# -----------------------------------------------------------------------------
# Conformal ensemble
# ----------------------------------------------------------------------------- 

示例10: cv_clf

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def cv_clf(x, y, test_size = 0.2, n_splits = 5, random_state=None, doesUpsample = True):
    sss_obj = sss(n_splits, test_size, random_state=random_state).split(x, y)
    if not doesUpsample: yield sss_obj
    for train_inds, valid_inds in sss_obj: yield (upsample_indices_clf(train_inds, y[train_inds]), valid_inds) 

示例11: test_stratified_shuffle_split_respects_test_size

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratified_shuffle_split_respects_test_size():
    y = np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2])
    test_size = 5
    train_size = 10
    sss = StratifiedShuffleSplit(6, test_size=test_size, train_size=train_size,
                                 random_state=0).split(np.ones(len(y)), y)
    for train, test in sss:
        assert_equal(len(train), train_size)
        assert_equal(len(test), test_size) 

示例12: test_stratified_shuffle_split_iter

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50),
          np.concatenate([[i] * (100 + i) for i in range(11)]),
          [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
          ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],

    for y in ys:
        sss = StratifiedShuffleSplit(6, test_size=0.33,
                                     random_state=0).split(np.ones(len(y)), y)
        y = np.asanyarray(y)  # To make it indexable for y[train]
        # this is how test-size is computed internally
        # in _validate_shuffle_split
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), []) 

示例13: test_stratifiedshufflesplit_list_input

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_stratifiedshufflesplit_list_input():
    # Check that when y is a list / list of string labels, it works.
    sss = StratifiedShuffleSplit(test_size=2, random_state=42)
    X = np.ones(7)
    y1 = ['1'] * 4 + ['0'] * 3
    y2 = np.hstack((np.ones(4), np.zeros(3)))
    y3 = y2.tolist()

    np.testing.assert_equal(list(sss.split(X, y1)),
                            list(sss.split(X, y2)))
    np.testing.assert_equal(list(sss.split(X, y3)),
                            list(sss.split(X, y2))) 

示例14: test_nested_cv

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def test_nested_cv():
    # Test if nested cross validation works with different combinations of cv
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 5, 15)

    cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
           StratifiedShuffleSplit(n_splits=3, random_state=0)]

    for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                          cv=inner_cv, error_score='raise', iid=False)
        cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
                        fit_params={'groups': groups}) 

示例15: _is_stratified

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 别名]
def _is_stratified(self, cv):
        return isinstance(cv, (StratifiedKFold, StratifiedShuffleSplit)) 
