本文整理匯總了Python中sklearn.model_selection.StratifiedShuffleSplit方法的典型用法代碼示例。如果您正苦於以下問題:Python model_selection.StratifiedShuffleSplit方法的具體用法?Python model_selection.StratifiedShuffleSplit怎麽用?Python model_selection.StratifiedShuffleSplit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.StratifiedShuffleSplit方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_full_rbf_svm_clf
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def get_full_rbf_svm_clf(train_x, train_y, c_range=None, gamma_range=None):
param_grid = dict(gamma=gamma_range, C=c_range)
cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(cache_size=1024), param_grid=param_grid, cv=cv, n_jobs=14, verbose=10)
grid.fit(train_x, train_y)
print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_))
scores = grid.cv_results_['mean_test_score'].reshape(len(c_range), len(gamma_range))
print("Scores:")
print(scores)
print("c_range:", c_range)
print("gamma_range:", gamma_range)
c_best = grid.best_params_['C']
gamma_best = grid.best_params_['gamma']
clf = SVC(C=c_best, gamma=gamma_best, verbose=True)
return clf
#----------------
示例2: test_2d_y
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例3: test_stratified_shuffle_split_init
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratified_shuffle_split_init():
X = np.arange(7)
y = np.asarray([0, 1, 1, 1, 2, 2, 2])
# Check that error is raised if there is a class with only one sample
assert_raises(ValueError, next,
StratifiedShuffleSplit(3, 0.2).split(X, y))
# Check that error is raised if the test set size is smaller than n_classes
assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y))
# Check that error is raised if the train set size is smaller than
# n_classes
assert_raises(ValueError, next,
StratifiedShuffleSplit(3, 3, 2).split(X, y))
X = np.arange(9)
y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2])
# Train size or test size too small
assert_raises(ValueError, next,
StratifiedShuffleSplit(train_size=2).split(X, y))
assert_raises(ValueError, next,
StratifiedShuffleSplit(test_size=2).split(X, y))
示例4: test_stratified_shuffle_split_multilabel
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratified_shuffle_split_multilabel():
# fix for issue 9037
for y in [np.array([[0, 1], [1, 0], [1, 0], [0, 1]]),
np.array([[0, 1], [1, 1], [1, 1], [0, 1]])]:
X = np.ones_like(y)
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
train, test = next(sss.split(X=X, y=y))
y_train = y[train]
y_test = y[test]
# no overlap
assert_array_equal(np.intersect1d(train, test), [])
# complete partition
assert_array_equal(np.union1d(train, test), np.arange(len(y)))
# correct stratification of entire rows
# (by design, here y[:, 0] uniquely determines the entire row of y)
expected_ratio = np.mean(y[:, 0])
assert_equal(expected_ratio, np.mean(y_train[:, 0]))
assert_equal(expected_ratio, np.mean(y_test[:, 0]))
示例5: test_stratified_shuffle_split_multilabel_many_labels
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratified_shuffle_split_multilabel_many_labels():
# fix in PR #9922: for multilabel data with > 1000 labels, str(row)
# truncates with an ellipsis for elements in positions 4 through
# len(row) - 4, so labels were not being correctly split using the powerset
# method for transforming a multilabel problem to a multiclass one; this
# test checks that this problem is fixed.
row_with_many_zeros = [1, 0, 1] + [0] * 1000 + [1, 0, 1]
row_with_many_ones = [1, 0, 1] + [1] * 1000 + [1, 0, 1]
y = np.array([row_with_many_zeros] * 10 + [row_with_many_ones] * 100)
X = np.ones_like(y)
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
train, test = next(sss.split(X=X, y=y))
y_train = y[train]
y_test = y[test]
# correct stratification of entire rows
# (by design, here y[:, 4] uniquely determines the entire row of y)
expected_ratio = np.mean(y[:, 4])
assert_equal(expected_ratio, np.mean(y_train[:, 4]))
assert_equal(expected_ratio, np.mean(y_test[:, 4]))
示例6: test_grid_search_groups
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_grid_search_groups():
# Check if ValueError (when groups is None) propagates to GridSearchCV
# And also check if groups is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
groups = rng.randint(0, 3, 15)
clf = LinearSVC(random_state=0)
grid = {'C': [1]}
group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
GroupShuffleSplit()]
for cv in group_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
assert_raise_message(ValueError,
"The 'groups' parameter should not be None.",
gs.fit, X, y)
gs.fit(X, y, groups=groups)
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
for cv in non_group_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
示例7: divide_train_val
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def divide_train_val(zeroes, ones, val_size):
""" sss = StratifiedShuffleSplit(n_splits=1,
test_size=val_size/2,
random_state=7)
indices_0 = sss.split(np.zeros(len(zeroes)), zeroes)
indices_1 = sss.split(np.zeros(len(ones)), ones)
train_indices_0, val_indices_0 = indices_0.next()
train_indices_1, val_indices_1 = indices_1.next() """
rand0 = np.random.permutation(len(zeroes))
train_indices_0 = zeroes[rand0[val_size//2:]]
val_indices_0 = zeroes[rand0[:val_size//2]]
rand1 = np.random.permutation(len(ones))
train_indices_1 = ones[rand1[val_size//2:]]
val_indices_1 = ones[rand1[:val_size//2]]
return (train_indices_0, train_indices_1,
val_indices_0, val_indices_1)
開發者ID:AdrianNunez,項目名稱:Fall-Detection-with-CNNs-and-Optical-Flow,代碼行數:20,代碼來源:temporalnet_combined.py
示例8: subset_indices
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def subset_indices(d_source, d_target, subsetsize, subsetseed):
if subsetsize > 0:
if subsetseed != 0:
subset_rng = np.random.RandomState(subsetseed)
else:
subset_rng = np.random
strat = StratifiedShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
shuf = ShuffleSplit(n_splits=1, test_size=subsetsize, random_state=subset_rng)
_, source_indices = next(strat.split(d_source.y, d_source.y))
n_src = source_indices.shape[0]
if d_target.has_ground_truth:
_, target_indices = next(strat.split(d_target.y, d_target.y))
else:
_, target_indices = next(shuf.split(np.arange(len(d_target.images))))
n_tgt = target_indices.shape[0]
else:
source_indices = None
target_indices = None
n_src = len(d_source.images)
n_tgt = len(d_target.images)
return source_indices, target_indices, n_src, n_tgt
示例9: gen_samples
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def gen_samples(self, y, n_samples, problem_type):
if problem_type == 'classification':
splits = StratifiedShuffleSplit(
n_splits=n_samples,
test_size=self.cal_portion
)
split_ = splits.split(np.zeros((y.size, 1)), y)
else:
splits = ShuffleSplit(
n_splits=n_samples,
test_size=self.cal_portion
)
split_ = splits.split(np.zeros((y.size, 1)))
for train, cal in split_:
yield train, cal
# -----------------------------------------------------------------------------
# Conformal ensemble
# -----------------------------------------------------------------------------
示例10: cv_clf
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def cv_clf(x, y, test_size = 0.2, n_splits = 5, random_state=None, doesUpsample = True):
sss_obj = sss(n_splits, test_size, random_state=random_state).split(x, y)
if not doesUpsample: yield sss_obj
for train_inds, valid_inds in sss_obj: yield (upsample_indices_clf(train_inds, y[train_inds]), valid_inds)
示例11: test_stratified_shuffle_split_respects_test_size
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratified_shuffle_split_respects_test_size():
y = np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2])
test_size = 5
train_size = 10
sss = StratifiedShuffleSplit(6, test_size=test_size, train_size=train_size,
random_state=0).split(np.ones(len(y)), y)
for train, test in sss:
assert_equal(len(train), train_size)
assert_equal(len(test), test_size)
示例12: test_stratified_shuffle_split_iter
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratified_shuffle_split_iter():
ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
np.array([-1] * 800 + [1] * 50),
np.concatenate([[i] * (100 + i) for i in range(11)]),
[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
]
for y in ys:
sss = StratifiedShuffleSplit(6, test_size=0.33,
random_state=0).split(np.ones(len(y)), y)
y = np.asanyarray(y) # To make it indexable for y[train]
# this is how test-size is computed internally
# in _validate_shuffle_split
test_size = np.ceil(0.33 * len(y))
train_size = len(y) - test_size
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(len(train) + len(test), y.size)
assert_equal(len(train), train_size)
assert_equal(len(test), test_size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
示例13: test_stratifiedshufflesplit_list_input
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_stratifiedshufflesplit_list_input():
# Check that when y is a list / list of string labels, it works.
sss = StratifiedShuffleSplit(test_size=2, random_state=42)
X = np.ones(7)
y1 = ['1'] * 4 + ['0'] * 3
y2 = np.hstack((np.ones(4), np.zeros(3)))
y3 = y2.tolist()
np.testing.assert_equal(list(sss.split(X, y1)),
list(sss.split(X, y2)))
np.testing.assert_equal(list(sss.split(X, y3)),
list(sss.split(X, y2)))
示例14: test_nested_cv
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def test_nested_cv():
# Test if nested cross validation works with different combinations of cv
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
groups = rng.randint(0, 5, 15)
cvs = [LeaveOneGroupOut(), LeaveOneOut(), GroupKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_splits=3, random_state=0)]
for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
cv=inner_cv, error_score='raise', iid=False)
cross_val_score(gs, X=X, y=y, groups=groups, cv=outer_cv,
fit_params={'groups': groups})
示例15: _is_stratified
# 需要導入模塊: from sklearn import model_selection [as 別名]
# 或者: from sklearn.model_selection import StratifiedShuffleSplit [as 別名]
def _is_stratified(self, cv):
return isinstance(cv, (StratifiedKFold, StratifiedShuffleSplit))